123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401 |
- // Copyright (c) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE file in the project root for full license information.
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.Xml.Linq;
- using DocumentFormat.OpenXml.Packaging;
- namespace OpenXmlPowerTools
- {
- public static partial class WmlComparer
- {
- #region CreateComparisonUnitAtomList
- internal static ComparisonUnitAtom[] CreateComparisonUnitAtomList(
- OpenXmlPart part,
- XElement contentParent,
- WmlComparerSettings settings)
- {
- VerifyNoInvalidContent(contentParent);
- AssignUnidToAllElements(contentParent); // add the Guid id to every element
- MoveLastSectPrIntoLastParagraph(contentParent);
- ComparisonUnitAtom[] cal = CreateComparisonUnitAtomListInternal(part, contentParent, settings).ToArray();
- if (False)
- {
- var sb = new StringBuilder();
- foreach (ComparisonUnitAtom item in cal)
- sb.Append(item + Environment.NewLine);
- string sbs = sb.ToString();
- TestUtil.NotePad(sbs);
- }
- return cal;
- }
- private static void VerifyNoInvalidContent(XElement contentParent)
- {
- XElement invalidElement = contentParent.Descendants().FirstOrDefault(d => InvalidElements.Contains(d.Name));
- if (invalidElement == null)
- return;
- throw new NotSupportedException("Document contains " + invalidElement.Name.LocalName);
- }
- private static void MoveLastSectPrIntoLastParagraph(XElement contentParent)
- {
- List<XElement> lastSectPrList = contentParent.Elements(W.sectPr).ToList();
- if (lastSectPrList.Count() > 1)
- throw new OpenXmlPowerToolsException("Invalid document");
- XElement lastSectPr = lastSectPrList.FirstOrDefault();
- if (lastSectPr != null)
- {
- XElement lastParagraph = contentParent.Elements(W.p).LastOrDefault();
- if (lastParagraph == null)
- throw new OpenXmlPowerToolsException("Invalid document");
- XElement pPr = lastParagraph.Element(W.pPr);
- if (pPr == null)
- {
- pPr = new XElement(W.pPr);
- lastParagraph.AddFirst(W.pPr);
- }
- pPr.Add(lastSectPr);
- contentParent.Elements(W.sectPr).Remove();
- }
- }
- private static List<ComparisonUnitAtom> CreateComparisonUnitAtomListInternal(
- OpenXmlPart part,
- XElement contentParent,
- WmlComparerSettings settings)
- {
- var comparisonUnitAtomList = new List<ComparisonUnitAtom>();
- CreateComparisonUnitAtomListRecurse(part, contentParent, comparisonUnitAtomList, settings);
- return comparisonUnitAtomList;
- }
- private static void CreateComparisonUnitAtomListRecurse(
- OpenXmlPart part,
- XElement element,
- List<ComparisonUnitAtom> comparisonUnitAtomList,
- WmlComparerSettings settings)
- {
- if (element.Name == W.body || element.Name == W.footnote || element.Name == W.endnote)
- {
- foreach (XElement item in element.Elements())
- CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
- return;
- }
- if (element.Name == W.p)
- {
- IEnumerable<XElement> paraChildrenToProcess = element
- .Elements()
- .Where(e => e.Name != W.pPr);
- foreach (XElement item in paraChildrenToProcess)
- CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
- XElement paraProps = element.Element(W.pPr);
- if (paraProps == null)
- {
- var pPrComparisonUnitAtom = new ComparisonUnitAtom(
- new XElement(W.pPr),
- element.AncestorsAndSelf()
- .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
- .ToArray(),
- part,
- settings);
- comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
- }
- else
- {
- var pPrComparisonUnitAtom = new ComparisonUnitAtom(
- paraProps,
- element.AncestorsAndSelf()
- .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
- .ToArray(),
- part,
- settings);
- comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
- }
- return;
- }
- if (element.Name == W.r)
- {
- IEnumerable<XElement> runChildrenToProcess = element
- .Elements()
- .Where(e => e.Name != W.rPr);
- foreach (XElement item in runChildrenToProcess)
- CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
- return;
- }
- if (element.Name == W.t || element.Name == W.delText)
- {
- string val = element.Value;
- foreach (char ch in val)
- {
- var sr = new ComparisonUnitAtom(
- new XElement(element.Name, ch),
- element.AncestorsAndSelf()
- .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
- .ToArray(),
- part,
- settings);
- comparisonUnitAtomList.Add(sr);
- }
- return;
- }
- if (AllowableRunChildren.Contains(element.Name) || element.Name == W._object)
- {
- var sr3 = new ComparisonUnitAtom(
- element,
- element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes)
- .Reverse().ToArray(),
- part,
- settings);
- comparisonUnitAtomList.Add(sr3);
- return;
- }
- RecursionInfo re = RecursionElements.FirstOrDefault(z => z.ElementName == element.Name);
- if (re != null)
- {
- AnnotateElementWithProps(part, element, comparisonUnitAtomList, re.ChildElementPropertyNames, settings);
- return;
- }
- if (ElementsToThrowAway.Contains(element.Name))
- return;
- AnnotateElementWithProps(part, element, comparisonUnitAtomList, null, settings);
- }
- private static void AnnotateElementWithProps(
- OpenXmlPart part,
- XElement element,
- List<ComparisonUnitAtom> comparisonUnitAtomList,
- XName[] childElementPropertyNames,
- WmlComparerSettings settings)
- {
- IEnumerable<XElement> runChildrenToProcess;
- if (childElementPropertyNames == null)
- runChildrenToProcess = element.Elements();
- else
- runChildrenToProcess = element
- .Elements()
- .Where(e => !childElementPropertyNames.Contains(e.Name));
- foreach (XElement item in runChildrenToProcess)
- CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
- }
- #endregion CreateComparisonUnitAtomList
- #region GetComparisonUnitList
- // The following method must be made internal if we ever turn this part of the partial class
- // into its own class.
- private static ComparisonUnit[] GetComparisonUnitList(
- ComparisonUnitAtom[] comparisonUnitAtomList,
- WmlComparerSettings settings)
- {
- var seed = new Atgbw
- {
- Key = null,
- ComparisonUnitAtomMember = null,
- NextIndex = 0
- };
- IEnumerable<Atgbw> groupingKey = comparisonUnitAtomList
- .Rollup(seed, (sr, prevAtgbw, i) =>
- {
- int? key;
- int nextIndex = prevAtgbw.NextIndex;
- if (sr.ContentElement.Name == W.t)
- {
- string chr = sr.ContentElement.Value;
- char ch = chr[0];
- if (ch == '.' || ch == ',')
- {
- var beforeIsDigit = false;
- if (i > 0)
- {
- ComparisonUnitAtom prev = comparisonUnitAtomList[i - 1];
- if (prev.ContentElement.Name == W.t && char.IsDigit(prev.ContentElement.Value[0]))
- beforeIsDigit = true;
- }
- var afterIsDigit = false;
- if (i < comparisonUnitAtomList.Length - 1)
- {
- ComparisonUnitAtom next = comparisonUnitAtomList[i + 1];
- if (next.ContentElement.Name == W.t && char.IsDigit(next.ContentElement.Value[0]))
- afterIsDigit = true;
- }
- if (beforeIsDigit || afterIsDigit)
- {
- key = nextIndex;
- }
- else
- {
- nextIndex++;
- key = nextIndex;
- nextIndex++;
- }
- }
- else if (settings.WordSeparators.Contains(ch))
- {
- nextIndex++;
- key = nextIndex;
- nextIndex++;
- }
- else
- {
- key = nextIndex;
- }
- }
- else if (WordBreakElements.Contains(sr.ContentElement.Name))
- {
- nextIndex++;
- key = nextIndex;
- nextIndex++;
- }
- else
- {
- key = nextIndex;
- }
- return new Atgbw
- {
- Key = key,
- ComparisonUnitAtomMember = sr,
- NextIndex = nextIndex
- };
- })
- .ToArray();
- if (False)
- {
- var sb = new StringBuilder();
- foreach (Atgbw item in groupingKey)
- {
- sb.Append(item.Key + Environment.NewLine);
- sb.Append(" " + item.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
- }
- string sbs = sb.ToString();
- TestUtil.NotePad(sbs);
- }
- IEnumerable<IGrouping<int?, Atgbw>> groupedByWords = groupingKey
- .GroupAdjacent(gc => gc.Key)
- .ToArray();
- if (False)
- {
- var sb = new StringBuilder();
- foreach (IGrouping<int?, Atgbw> group in groupedByWords)
- {
- sb.Append("Group ===== " + @group.Key + Environment.NewLine);
- foreach (Atgbw gc in @group)
- {
- sb.Append(" " + gc.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
- }
- }
- string sbs = sb.ToString();
- TestUtil.NotePad(sbs);
- }
- WithHierarchicalGroupingKey[] withHierarchicalGroupingKey = groupedByWords
- .Select(g =>
- {
- string[] hierarchicalGroupingArray = g
- .First()
- .ComparisonUnitAtomMember
- .AncestorElements
- .Where(a => ComparisonGroupingElements.Contains(a.Name))
- .Select(a => a.Name.LocalName + ":" + (string) a.Attribute(PtOpenXml.Unid))
- .ToArray();
- return new WithHierarchicalGroupingKey
- {
- ComparisonUnitWord = new ComparisonUnitWord(g.Select(gc => gc.ComparisonUnitAtomMember)),
- HierarchicalGroupingArray = hierarchicalGroupingArray
- };
- }
- )
- .ToArray();
- if (False)
- {
- var sb = new StringBuilder();
- foreach (WithHierarchicalGroupingKey group in withHierarchicalGroupingKey)
- {
- sb.Append("Grouping Array: " +
- @group.HierarchicalGroupingArray.Select(gam => gam + " - ").StringConcatenate() +
- Environment.NewLine);
- foreach (ComparisonUnit gc in @group.ComparisonUnitWord.Contents)
- {
- sb.Append(" " + gc.ToString(0) + Environment.NewLine);
- }
- }
- string sbs = sb.ToString();
- TestUtil.NotePad(sbs);
- }
- ComparisonUnit[] cul = GetHierarchicalComparisonUnits(withHierarchicalGroupingKey, 0).ToArray();
- if (False)
- {
- string str = ComparisonUnit.ComparisonUnitListToString(cul);
- TestUtil.NotePad(str);
- }
- return cul;
- }
- private static IEnumerable<ComparisonUnit> GetHierarchicalComparisonUnits(
- IEnumerable<WithHierarchicalGroupingKey> input,
- int level)
- {
- IEnumerable<IGrouping<string, WithHierarchicalGroupingKey>> grouped = input
- .GroupAdjacent(
- whgk => level >= whgk.HierarchicalGroupingArray.Length ? "" : whgk.HierarchicalGroupingArray[level]);
- List<ComparisonUnit> retList = grouped
- .Select(gc =>
- {
- if (gc.Key == "")
- {
- return (IEnumerable<ComparisonUnit>) gc.Select(whgk => whgk.ComparisonUnitWord).ToList();
- }
- string[] spl = gc.Key.Split(':');
- ComparisonUnitGroupType groupType = WmlComparerUtil.ComparisonUnitGroupTypeFromLocalName(spl[0]);
- IEnumerable<ComparisonUnit> childHierarchicalComparisonUnits = GetHierarchicalComparisonUnits(gc, level + 1);
- var newCompUnitGroup = new ComparisonUnitGroup(childHierarchicalComparisonUnits, groupType, level);
- return new[] { newCompUnitGroup };
- })
- .SelectMany(m => m)
- .ToList();
- return retList;
- }
- #endregion GetComparisonUnitList
- }
- }
|