using System;
using System.IO;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading.Tasks;
using System.Threading;
using miew.Debugging;
using miew.Enumerable;
using miew.String;
using miew.Unsafe;
using miew.Tokenization;
using miew.ReadOnly;
using miew.Concurrency;
namespace agree
{
using unif_stats = ParseControl.Stats._unification;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
///
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public class ParseChart : SubscriptionChart
{
public ParseChart(ParseControl ctrl)
: base(ctrl)
{
ctrl.chart = this;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Generate new activities for the specified IParseObj
/// Try to unify the given chart edge against the KEY position of each compatible rule. If successful, proceed as
/// follows:
/// - Unary rules are now finished up immediately here, creating a passive edge (without creating an ActiveEdge)
/// and entering it into the parse chart.
/// - Otherwise, an ActiveEdge that expands leftwards or rightwards is created, as appropriate. Active edges are
/// always seeded with a positioned passive edge, but note that neither this partial passive edge (nor the
/// ActiveEdge) are ever entered into the parse chart. Instead, this function uses a subscription mechanism
/// to register the active edge's interest in a particular chart position. This subscription is what keeps the
/// ActiveEdge (object) alive.
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
protected override void GenerateActiveEdges(IParseObj ce)
{
if (!ce.IsActive())
return;
/// this being a new task, we start a new set of statistics which will be aggregated later, in order
/// to avoid syncrhonization expenses
unif_stats u_stats = new unif_stats(ParseControl.Stats.Visibility.Private);
Tfs new_tfs = ce.Tfs;
int i_start = ce.TokenSpan.StartIndex;
int i_end = ce.TokenSpan.EndIndex;
bool f_entire_span = ce.TokenSpan.Equals(EntireSpan);
bool f_packing = ctrl.parser_config.packing != Config.Parser.PackingOpts.None;
/// Rule pre-check filter "A Bag of Useful Technique for Efficient and Robust Parsing" (Kiefer et al. 1999)
IEnumerable<GrammarRule> rules_to_use;
Rule new_daughter = ce.License as Rule;
if (new_daughter != null)
rules_to_use = new_daughter.CompatibleKeyMothers.OfType<GrammarRule>();
else
rules_to_use = ctrl.g._grammar_rules;
PassiveEdge.ParseObjs po = new PassiveEdge.ParseObjs(ce);
foreach (GrammarRule r in rules_to_use)
{
TfsSection[] rd = r.RuleDaughters;
TfsSection daughter;
int i_key = 0;
if (rd.Length == 1)
{
daughter = rd[0];
Debug.Assert(!r.IsSpanOnly);
}
else
{
i_key = r.KeyDaughterIndex;
// if the edge will not fit, don't bother creating the active edge.
if (i_start < i_key)
continue;
if (i_end > ColumnCount - (rd.Length - i_key))
continue;
/// We only have to try out one of the rule's ARGS positions and it doesn't matter which one it is,
/// as long as that choice is consistently used for that rule. This allows us to choose the KEY arg
/// without the risk of doing double work. Usage of this new edge in the other alignments may be
/// handled later, when/if the rule succeeds in a neighboring chart position.
if (i_key == 0)
{
/// Since active edges only grow in one direction, there's no point in creating a span-only
/// active edge which does not abut the beginning (or end) of the chart.
if (r.IsSpanOnly && i_start > 0)
continue;
daughter = rd[0];
}
else
{
if (r.IsSpanOnly && i_end < ColumnCount - 1)
continue;
daughter = rd[rd.Length - 1];
}
}
/// Quick-check:
/// Robert Malouf, John Carroll, Ann Copestake. 2000. Efficient feature structure operations without compliation.
/// Natural Language Engineering 6 (1): 29-46. Cambridge: Cambridge University Press
/// todo: track which qc rules failed for this rule daughter and propagate to the next rule..?
if (ctrl.QuickCheckParse(new_tfs, daughter))
continue;
MotherDaughterTfs mdtfs = ctrl.UnifySection(
daughter,
new_tfs,
rd.Length == 1,
f_packing,
ref u_stats);
if (mdtfs != null)
{
if (rd.Length > 2)
Nop.CodeCoverage();
if (rd.Length == 1)
FinishMatchedEdge(r, mdtfs, ref po, ref u_stats);
else if (i_key == 0)
Subscribe(RIGHT, new ActiveRightEdge(this, r, mdtfs, ref po), ref u_stats);
else
Subscribe(LEFT, new ActiveLeftEdge(this, r, mdtfs, ref po), ref u_stats);
}
}
ctrl.stats.Parsing.Unification.Add(u_stats);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// When a rule has successfully completed building all of its structure
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public unsafe void FinishMatchedEdge(GrammarRule rule, Tfs tfs, ref PassiveEdge.ParseObjs po, ref unif_stats u_stats)
{
PassiveEdge.Derived nce = null;
TextWriter tw = ctrl.config.system.tw_debug;
/// try to match a start symbol
bool f_entire_span = po.Span.Equals(EntireSpan);
if (f_entire_span)
{
StartSymbol[] rgss = ctrl.g.StartSymbols;
bool* rgss_map = stackalloc bool[rgss.Length];
int c_match = 0;
int i_max_ix = -1;
for (int i_ss = 0; i_ss < rgss.Length; i_ss++)
{
StartSymbol start_sym = rgss[i_ss];
u_stats.c_attempted++;
if (ctrl.UnifyCheck(start_sym.Expanded, tfs))
{
u_stats.c_succeeded++;
if (tw != null)
tw.WriteLineColor("$darkgreen {0}$ matched start symbol $magenta {1}", nce, start_sym);
/// If not packing, stop checking start symbols after a first match.
#if !CHECK_ALL_SS
if (ctrl.parser_config.packing == Config.Parser.PackingOpts.None)
{
nce = new PassiveEdge.Completed(ctrl, rule, tfs, i_ss, ref po);
break;
}
#endif
/// If packing, check all start symbols and create a map of any matches.
rgss_map[i_ss] = true;
if (i_ss > i_max_ix)
i_max_ix = i_ss;
c_match++;
}
}
/// If packing, tag the spanning restricted edge as 'Completed' if it matched at least one
/// start symbol. Only such edges--and their packed contents--need to be examined in the
/// unpacking phase
if (c_match == 1)
nce = new PassiveEdge.Completed(ctrl, rule, tfs, i_max_ix, ref po);
else if (c_match > 1)
nce = new PassiveEdge.Completed(ctrl, rule, tfs, Ext.ToArray(rgss_map, i_max_ix + 1), ref po);
}
nce = nce ?? new PassiveEdge.Derived(ctrl, rule, tfs, ref po);
ctrl.StartAttachedChildTask(AddParseObj, nce);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Check for a subsumption relationship between a newly derived edge and any of the existing co-spanning edges.
/// Proactive packing occurs when the existing edge subsumes the new edge, or when they subsume each other (which
/// means they are identical). Retroactive packing occurs when the new edge subsumes the existing edge. There are
/// two other possibilities: that there is no overall subsumption relationship between otherwise compatible edges,
/// or that the two are incompatible (do not unify). Fortunately, no packing is indicated in either case, because
/// if we had to distinguish between these two cases, the subsumption test would be much less efficient (it would
/// have to resolve coreference chains).
/// </summary>
/// <reference>
/// Stephan Oepen, John Carroll. 2000. "Ambiguity Packing in Constraint-based Parsing--Practical Results."
/// In Proceedings of the 1st North American chapter of the Association for Computational Linguistics
/// conference. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 162-169.
/// </reference>
/// <remarks>
/// derived parents of packed edges automatically become defunct (not 'active'). Since only packed
/// edges can be in the 'hold' state, any edge that has a daughter in the 'hold' state can be
/// ignored; this is achieved by incrementing a global generation count which
/// immediately invalidates all of these states, which are cached, causing any still 'active' chart edge
/// to re-evaluate this condition regarding itself, on demand, at such time as it's queried. Now since we
/// are in the parsing phase, the fact that we just put an object in the 'hold' state doesn't affect any
/// of these parent/daughter results ('hold' does not entail 'defunct') but what does make a difference
/// is the transfer of the packed edges from the old edge to the new, as the _daughters_ of the old edge
/// may now--or at such future time as they become defunct--cause the deactivation of their altered
/// derivation list.
/// </remarks>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
protected override bool SubsumptionPacking(IParseObj po)
{
PassiveEdge new_edge = po as PassiveEdge;
if (new_edge == null)
return false;
int i_seq = new_edge.SequenceId;
Config.Parser pcfg = ctrl.parser_config;
sbyte opt = (sbyte)pcfg.packing;
bool f_exact = (opt & (sbyte)Config.Parser.PackingOpts.Only) > 0;
if (f_exact)
opt &= (sbyte)~Config.Parser.PackingOpts.Only;
bool f_entire_span = new_edge.IsEntireSpan;
List<PassiveEdge> deferred_retro = null;
for (var e = new ChartCell._enum(this[new_edge.TokenSpan]); e.MoveNext(); )
{
PassiveEdge existing = e.Current as PassiveEdge;
/// packing of morphology edges is currently not implemented
if (existing == null || existing == new_edge)
continue;
if ((existing.State & (SequenceControl.stb_Hold | SequenceControl.stb_Remove)) > 0)
continue;
/// Don't permit a spanning edge which didn't match a start symbol (i.e. PassiveEdge.Derived)
/// to be packed into one that did (i.e. PassiveEdge.Completed), nor vice-versa.
if (f_entire_span && new_edge.GetType() != existing.GetType())
continue;
if (!existing.SeqIdBelow(i_seq))
continue;
/// evaluate the subsumption relationship between two TFSen
sbyte b = new Subsumption(existing, new_edge).Check();
if ((f_exact && b != opt) || (b & opt) == 0)
continue;
/// proactive packing (the existing edge subsumes or is equivalent to the new edge)
if ((b & Subsumption.FirstSubsumesSecond) > 0)
{
/// ???? DoSkippedItems() ????
if (deferred_retro != null)
{
// Console.Write("m");
DoSkippedItems(new_edge, deferred_retro);
deferred_retro = null;
}
/// for proactive, this is likely the last thing to do, so we should wait for it
if (existing.BeginTransact())
{
/// put the new, unpublished edge on HOLD before being added to the packing list, so
/// that it can be never witnessed as active in any context
//Interlocked.Increment(ref g_dseq);
new_edge.SetStateRaw(SequenceControl.HOLD);
existing.HoistPackingFrom(new_edge);
existing.EndTransact(SequenceControl.ACTIVE);
ctrl.stats.Parsing.Packing.RecordEvent(b);
return true;
}
continue;
}
/// retroactive packing (the new edge subsumes the existing edge)
if (b == Subsumption.SecondSubsumesFirst)
{
int _tmp = existing.TryTransact();
if (_tmp == SequenceControl.ACTIVE)
{
/// our claim is secure; release lock on 'old' as quickly as possible
existing.SetStateRaw(SequenceControl.HOLD);
new_edge.HoistPackingFrom(existing);
/// see notes above
//Interlocked.Increment(ref g_dseq);
//existing.EndTransact(SequenceControl.HOLD);
ctrl.stats.Parsing.Packing.RecordEvent(b);
}
else if ((_tmp & SequenceControl.stb_Blocked) > 0)
{
/// if a retroactive edge is busy, we'll just make a note of it to try again later
(deferred_retro = deferred_retro ?? new List<PassiveEdge>(7)).Add(existing);
}
}
}
if (deferred_retro != null)
DoSkippedItems(new_edge, deferred_retro);
return false;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Try to complete the retroactive packing for edges that were blocked earlier
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void DoSkippedItems(PassiveEdge new_edge, List<PassiveEdge> skipped)
{
int c = skipped.Count;
int ix = -1;
do
{
PassiveEdge existing;
do
if (++ix == skipped.Count)
ix = 0;
while ((existing = skipped[ix]) == null);
one_left:
int _tmp = existing.TryTransact();
if (_tmp == SequenceControl.ACTIVE)
{
existing.SetStateRaw(SequenceControl.HOLD);
new_edge.HoistPackingFrom(existing);
ctrl.stats.Parsing.Packing.RecordEvent(Subsumption.SecondSubsumesFirst);
}
else if ((_tmp & SequenceControl.stb_Blocked) > 0)
{
if (c == 1)
{
new SpinWait().SpinOnce();
goto one_left;
}
continue;
}
skipped[ix] = null;
}
while (--c > 0);
#if false
while (skipped.Count > 0)
{
PassiveEdge existing = skipped[ix = (ix + 1) % skipped.Count];
int _tmp = existing.TryTransact();
if (_tmp == SequenceControl.ACTIVE)
{
new_edge.HoistPackingFrom(existing);
/// see notes above
//Interlocked.Increment(ref g_dseq);
existing.EndTransact(SequenceControl.HOLD);
ctrl.stats.Parsing.Packing.RecordEvent(Subsumption.SecondSubsumesFirst);
}
else if ((_tmp & SequenceControl.stb_Blocked) > 0)
{
if (skipped.Count == 1)
new SpinWait().SpinOnce();
continue;
}
skipped.RemoveAt(ix);
}
#endif
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Exhaustive unpacking of all completed parses in the chart
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public ICollection<IDerivation> AllDerivations
{
get
{
ParseControl.Stats stats = ctrl.stats;
if (ctrl.parser_config.packing == Config.Parser.PackingOpts.None)
{
Debug.Assert(stats.Parsing.Chart.c_root_edges == _completed_edges.Count);
return new DeferredCollection<IDerivation>(
stats.Parsing.Chart.c_root_edges,
_completed_edges.Select(po => po.Derivations[0]));
}
else
{
ConcurrentList<IDerivation> ild = new ConcurrentList<IDerivation>();
foreach (var po in _completed_edges)
{
Debug.Assert(po.IsRestricted);
if (!po.IsActive())
continue;
if (ctrl.TryEnterMultithread())
{
Parallel.ForEach(po.Derivations, drv =>
{
if (drv.UnpackedTfs != null)
ild.Add(drv);
else
Interlocked.Increment(ref stats.Parsing.Unpacking.c_rejected_derivations);
});
ctrl.LeaveMultithread();
}
else
foreach (var drv in po.Derivations)
{
if (drv.UnpackedTfs != null)
ild.Add(drv);
else
stats.Parsing.Unpacking.c_rejected_derivations++;
}
}
stats.Parsing.Unpacking.c_derivations = ild.Count;
return ild;
}
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Exhaustive unpacking of all completed parses in the chart
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public int DerivationCountOnly
{
get
{
ParseControl.Stats stats = ctrl.stats;
if (ctrl.parser_config.packing == Config.Parser.PackingOpts.None)
{
Debug.Assert(stats.Parsing.Chart.c_root_edges == _completed_edges.Count);
return stats.Parsing.Chart.c_root_edges;
}
else
{
foreach (var po in _completed_edges)
{
Debug.Assert(po.IsRestricted);
if (!po.IsActive())
continue;
if (ctrl.TryEnterMultithread())
{
Parallel.ForEach(po.Derivations, drv =>
{
if (drv.UnpackedTfs == null)
Interlocked.Increment(ref stats.Parsing.Unpacking.c_rejected_derivations);
else
Interlocked.Increment(ref stats.Parsing.Unpacking.c_derivations);
});
ctrl.LeaveMultithread();
}
else
foreach (var drv in po.Derivations)
{
if (drv.UnpackedTfs == null)
stats.Parsing.Unpacking.c_rejected_derivations++;
else
stats.Parsing.Unpacking.c_derivations++;
}
}
return stats.Parsing.Unpacking.c_derivations;
}
}
}
};
}