using System;
using System.Text;
using System.Collections.Generic;
using System.Runtime.Serialization.Formatters.Binary;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using miew.ReadOnly;
using miew.Enumerable;
using miew.Tokenization;
namespace agree
{
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public partial class Grammar : ISysObj
{
static readonly byte[] binary_signature = Encoding.ASCII.GetBytes("gee-grammar-binary\x1A");
public Config config;
public TypeMgr tm;
public Lexicon lex;
readonly SysObj so;
public SysObj SysObj { get { return so; } }
readonly String s_name;
public String SysObjName { get { return s_name; } }
String s_description;
public string SysObjDescription { get { return s_description; } }
String s_author;
public string Author { get { return s_author; } }
GrammarNameResolver nr;
public GrammarNodeLabeler nl;
public Submitter sub;
//internal HashSet<ParseChart> parses = new HashSet<ParseChart>(); // beware of this GC-root
bool f_loaded = false;
internal event Action<CommandToken, Grammar> LoadedEvent = null;
public bool IsLoaded { get { return f_loaded; } }
public Rule[] _rules;
public GrammarRule[] _grammar_rules;
public LexicalRule[] _lexical_rules;
public StartSymbol[] StartSymbols;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Grammar constructors
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public Grammar(SysObj so, String name, Config config)
{
#if DEBUG
_singleton = this;
#endif
this.config = config ?? new Config();
this.so = so;
this.s_name = name;
this.nr = new GrammarNameResolver(this);
}
//public Grammar(CommandToken tx, String name, String filename)
// : this(tx.SystemInstance, name)
//{
// this.Load(tx, filename);
//}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Examine the specified file. If the signature of a binary grammar file is found, load it. Otherwise, assume
/// it's a script file that lists the component configuration and TDL files
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void Load(CommandToken tx, String filename)
{
if (f_loaded)
throw new Exception("grammar already loaded");
filename = Path.GetFullPath(filename);
if (!File.Exists(filename) && !File.Exists(filename = Path.ChangeExtension(filename, ".gee")))
{
String msg = String.Format("The file '{0}' could not be found.", filename);
throw new FileNotFoundException(msg, filename);
}
Stopwatch stopw = Stopwatch.StartNew();
tx.TransactionStatus("Begin loading '<span style='color:#008000;'>{0}</span>'.", filename);
if (!TryLoadBinary(tx, filename))
{
GrammarFileSet tdlg = new GrammarFileSet(this.config, filename);
this.Load(tx, tdlg);
}
/// the following needs the lexicon to initialize the tokenizer
this.sub = new Submitter(config, this);
/// Initialize the parsing component for this grammar. This parser will use this Grammar's TypeManager,
/// GrammarRules, Lexicon, and the StartSymbols from tm.AllEntries.
tx.TransactionStatus("Initialize parser.");
//RuntimeNodeLabelConfig rnlc = new RuntimeNodeLabelConfig(tm, config._NodeLabelConfiguration);
nl = new GrammarNodeLabeler(
this,
config.nodeLabels,
tm.AllEntries.OfType<NodeLabelTemplate>(),
tm.AllEntries.OfType<NodeMetaTemplate>());
StartSymbols = tm.AllEntries
.OfType<StartSymbol>()
.Where(ss => config.grammar.start_symbols.Contains(ss.Name))
.ToArray();
/// Expand start symbols
foreach (StartSymbol ss in StartSymbols)
{
Tfs te_ss = ss.Expanded;
}
f_loaded = true;
Action<CommandToken, Grammar> ev = LoadedEvent;
if (ev != null)
LoadedEvent.Invoke(tx, this);
tx.TransactionStatus("Loaded '<span style='color:#008000;'>{0}</span>' in {1:N4} s.", filename, stopw.Elapsed.TotalSeconds);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Load grammar from a set of token lists
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void Load(CommandToken tx, GrammarFileSet tdlg)
{
s_description = tdlg.description ?? s_name;
s_author = tdlg.author ?? "";
/// Create a type hierarchy
tm = new TypeMgr(this);
/// Populate type hierarchy from the specified tokens
tm.LoadTypeHierarchyFromTdl(tx, tdlg);
config.parser.CompileGrammarPaths(tm);
/// Create and persist definition TFSs for all types
tx.TransactionStatus("Parse type definitions.");
ParseTypeDefinitions(tx);
/// Gather and group the various entry types
tm.LoadEntriesFromTdl(tx, tdlg);
/// Create and persist TFSs for authored definitions
tx.TransactionStatus("Parse entry definitions.");
ParseEntryDefinitions(tx);
/// no more strings to add. freeze the type hierarchy
tm.Petrify();
#if FCTC_STATS
tm.TypePatternReport();
#endif
// gp loading needs some. todo: make Task<T>
ExpandTypeDefinitions(tx);
#if FCTC_STATS
tm.TypePatternReport();
#endif
if (f_garbage)
Console.WriteLine(GarbageReport());
if (regress_file != null)
tm.RegressionTest(Console.Out, regress_file, false);
tx.TransactionStatus("Check rule compatibility.");
AnalyzeRuleCompatibility();
Console.Write("scan lexicon...");
/// Create and initialize the lexicon.
lex = new Lexicon(this, tm.AllEntries.OfType<LexicalEntry>(), tdlg.irregs);
Console.WriteLine("done.");
}
public static String regress_file = null;
public static bool f_garbage = false;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
///
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void AnalyzeRuleCompatibility()
{
using (var ttr = new miew.Debugging.TimingReport(Console.Out, "Check rule compatibility."))
{
_rules = tm.AllEntries.OfType<Rule>().ToArray();
_lexical_rules = _rules.OfType<LexicalRule>().ToArray();
_grammar_rules = _rules.OfType<GrammarRule>().ToArray();
/// Check rule compatibility
if (config.system.MultiThreading)
{
Parallel.ForEach(_rules, r =>
{
if (r is LexicalRule)
r.AnalyzeRuleCompatibility(_rules, null);
else if (r is GrammarRule)
r.AnalyzeRuleCompatibility(_grammar_rules, _rules);
});
}
else
{
if (_lexical_rules.Length > 0)
foreach (LexicalRule r in _lexical_rules)
r.AnalyzeRuleCompatibility(_rules, null);
if (_grammar_rules.Length > 0)
foreach (GrammarRule r in _grammar_rules)
r.AnalyzeRuleCompatibility(_grammar_rules, _rules);
}
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Load a grammar that was saved in binary format
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool TryLoadBinary(CommandToken tx, String filename)
{
using (FileStream str = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read))
using (BinaryReader br = new BinaryReader(str))
{
byte[] sig_check = new byte[binary_signature.Length];
br.Read(sig_check, 0, sig_check.Length);
if (!sig_check.SequenceEqual(binary_signature))
return false;
BinaryFormatter bf = new BinaryFormatter();
/// read configuration options
config = (Config)bf.Deserialize(br.BaseStream);
/// Create a type hierarchy
tm = new TypeMgr(this);
//todo:
config.parser.CompileGrammarPaths(tm);
/// Load compiled grammar
tm.LoadBinary(tx, br);
f_expanded_all = true;
/// read lexicon
lex = Lexicon.Load(this, tm.AllEntries.OfType<LexicalEntry>(), br);
}
return true;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Save this grammar in binary format
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void Save(CommandToken tx, String filename)
{
Stopwatch stopw = Stopwatch.StartNew();
tx.TransactionStatus("Begin writing binary format '\ue099-008000{0}\ue099'.", filename);
using (Stream str = File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None))
using (BinaryWriter bw = new BinaryWriter(str))
{
/// write grammar signature
bw.Write(binary_signature);
/// write configuration options
BinaryFormatter bf = new BinaryFormatter();
bf.Serialize(bw.BaseStream, config);
/// Expand all types prior to saving
ExpandTypeDefinitions(tx);
/// write type manager
tm.Save(tx, bw);
/// write lexicon
bf.Serialize(bw.BaseStream, lex);
}
tx.TransactionStatus("Wrote '\ue099-008000{0}\ue099' in {1:N4} s.", filename, stopw.Elapsed.TotalSeconds);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public String GetInfo()
{
StringBuilder sb = new StringBuilder();
sb.AppendFormat("<b>Information for '{0}':</b><br />", SysObjName);
sb.Append("<br />");
sb.AppendFormat("Number of authored types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.code_size);
sb.AppendFormat("Number of GLB types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.type_arr.Length - tm.code_size);
sb.AppendFormat("Total types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.type_arr.Length);
sb.Append("<br />");
sb.AppendFormat("Number of grammar rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is GrammarRule));
sb.AppendFormat("Number of lexical rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e.GetType() == typeof(LexicalRule)));
sb.AppendFormat("Number of inflection rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is MorphologicalRule));
sb.AppendFormat("Number of start symbols: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is StartSymbol));
sb.AppendFormat("Number of node labels: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is NodeLabel));
//sb.AppendFormat("Number of lexicon entries: <span style='color:#008000;'>{0:#,#}</span><br />", this.lex.Count);
sb.Append("<br />");
//sb.AppendFormat("Number of edges: <span style='color:#008000;'>{0:#,#}</span><br />", loadtray.PoolMarkCount);
return sb.ToString();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void ParseTypeDefinitions(CommandToken tx)
{
tx.TransactionStatus("Loading definitions for all Types");
foreach (Instance t in tm.AllTypes)
{
Tfs tfs = t.Definition;
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void ParseEntryDefinitions(CommandToken tx)
{
tx.TransactionStatus("loading definitions for all Entries");
if (config.system.MultiThreading)
Parallel.ForEach(tm.AllEntries, t => { Tfs tfs = t.Definition; });
else
foreach (Instance t in tm.AllEntries) { Tfs tfs = t.Definition; }
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public bool f_expanded_all = false;
public void ExpandTypeDefinitions(CommandToken tx)
{
tx.TransactionStatus("Expanding all type definitions.");
if (f_expanded_all)
return;
if (config.system.MultiThreading)
Parallel.ForEach(tm.AllTypes, t => { Tfs tfs = t.Expanded; });
else
foreach (Type t in tm.AllTypes) { Tfs tfs = t.Expanded; }
f_expanded_all = true;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public Task<ParseControl> Parse(String sent)
{
if (sub == null)
{
var tcs = new TaskCompletionSource<ParseControl>();
tcs.SetResult(null);
return tcs.Task;
}
return sub.Parse(sent);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public Task<ParseControl> Parse(TokenSet ts)
{
return sub.Parse(ts);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
/// chart_size is not necessarily the same as the number of tokens, so don't try to combine into a ICollection...
/// TokenSet can be used for a single representations.
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//public Task<ParseChart> Parse(String source_text, int chart_size, IEnumerable<IParseObj> tokens)
//{
// return gp.Parse(source_text, chart_size, tokens);
//}
public IReadOnlyDictionary<String, ISysObj> SysObjChildren
{
get { return new GrammarNameResolver(this); }
}
public ISysObj SysObjParent
{
get { return so; }
}
class GrammarNameResolver : IReadOnlyDictionary<String, ISysObj>
{
Grammar g;
public GrammarNameResolver(Grammar g)
{
this.g = g;
}
public bool IsReadOnly { get { return true; } }
public bool ContainsKey(String key)
{
ISysObj o;
return TryGetValue(key, out o);
}
public ICollection<String> Keys
{
get { return new ReadOnlyCollection<String>(this.Keys); }
}
public ICollection<ISysObj> Values
{
get { return new ReadOnlyCollection<ISysObj>(this.Values); }
}
public ISysObj this[String key]
{
get
{
ISysObj so;
if (!TryGetValue(key, out so))
throw new KeyNotFoundException();
return so;
}
}
public int Count
{
get
{
int c = 0;
c += g.tm.type_dict.Count;
c += g.tm.entry_dict.Count;
//c += g.parses.Count;
return c;
}
}
public bool TryGetValue(String key, out ISysObj value)
{
Type t;
if (g.tm.type_dict.TryGetValue(key, out t))
{
value = t;
return true;
}
Entry e;
if (g.tm.entry_dict.TryGetValue(key, out e))
{
value = e;
return true;
}
//ParseChart pc = g.parses.FirstOrDefault(c => c.SysObjName == key);
//if (pc != null)
//{
// value = pc;
// return true;
//}
value = null;
return false;
}
public IEnumerator<KeyValuePair<String, ISysObj>> GetEnumerator()
{
foreach (Type t in g.tm.type_dict.Values)
yield return new KeyValuePair<String, ISysObj>(t.SysObjName, t);
foreach (Entry e in g.tm.entry_dict.Values)
yield return new KeyValuePair<String, ISysObj>(e.SysObjName, e);
//foreach (ParseChart pc in g.parses)
// yield return new KeyValuePair<String, ISysObj>(pc.SysObjName, pc);
}
public bool Contains(KeyValuePair<String, ISysObj> item)
{
return this.Any(kvp => kvp.Key == item.Key && kvp.Value == item.Value);
}
public void CopyTo(KeyValuePair<String, ISysObj>[] array, int arrayIndex)
{
foreach (var kvp in this)
array[arrayIndex++] = kvp;
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
};
};
}