WmlComparer.Internal.Methods.ComparisonUnits.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. 
  2. // Copyright (c) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  4. using System;
  5. using System.Collections.Generic;
  6. using System.Linq;
  7. using System.Text;
  8. using System.Xml.Linq;
  9. using DocumentFormat.OpenXml.Packaging;
  10. using OpenXmlPowerTools.Previous;
  11. namespace OpenXmlPowerTools
  12. {
  13. public static partial class WmlComparer
  14. {
  15. #region CreateComparisonUnitAtomList
  16. internal static ComparisonUnitAtom[] CreateComparisonUnitAtomList(
  17. OpenXmlPart part,
  18. XElement contentParent,
  19. WmlComparerSettings settings)
  20. {
  21. VerifyNoInvalidContent(contentParent);
  22. AssignUnidToAllElements(contentParent); // add the Guid id to every element
  23. MoveLastSectPrIntoLastParagraph(contentParent);
  24. ComparisonUnitAtom[] cal = CreateComparisonUnitAtomListInternal(part, contentParent, settings).ToArray();
  25. if (False)
  26. {
  27. var sb = new StringBuilder();
  28. foreach (ComparisonUnitAtom item in cal)
  29. sb.Append(item + Environment.NewLine);
  30. string sbs = sb.ToString();
  31. TestUtil.NotePad(sbs);
  32. }
  33. return cal;
  34. }
  35. private static void VerifyNoInvalidContent(XElement contentParent)
  36. {
  37. XElement invalidElement = contentParent.Descendants().FirstOrDefault(d => InvalidElements.Contains(d.Name));
  38. if (invalidElement == null)
  39. return;
  40. throw new NotSupportedException("Document contains " + invalidElement.Name.LocalName);
  41. }
  42. private static void MoveLastSectPrIntoLastParagraph(XElement contentParent)
  43. {
  44. List<XElement> lastSectPrList = contentParent.Elements(W.sectPr).ToList();
  45. if (lastSectPrList.Count() > 1)
  46. throw new OpenXmlPowerToolsException("Invalid document");
  47. XElement lastSectPr = lastSectPrList.FirstOrDefault();
  48. if (lastSectPr != null)
  49. {
  50. XElement lastParagraph = contentParent.Elements(W.p).LastOrDefault();
  51. if (lastParagraph == null)
  52. throw new OpenXmlPowerToolsException("Invalid document");
  53. XElement pPr = lastParagraph.Element(W.pPr);
  54. if (pPr == null)
  55. {
  56. pPr = new XElement(W.pPr);
  57. lastParagraph.AddFirst(W.pPr);
  58. }
  59. pPr.Add(lastSectPr);
  60. contentParent.Elements(W.sectPr).Remove();
  61. }
  62. }
  63. private static List<ComparisonUnitAtom> CreateComparisonUnitAtomListInternal(
  64. OpenXmlPart part,
  65. XElement contentParent,
  66. WmlComparerSettings settings)
  67. {
  68. var comparisonUnitAtomList = new List<ComparisonUnitAtom>();
  69. CreateComparisonUnitAtomListRecurse(part, contentParent, comparisonUnitAtomList, settings);
  70. return comparisonUnitAtomList;
  71. }
  72. private static void CreateComparisonUnitAtomListRecurse(
  73. OpenXmlPart part,
  74. XElement element,
  75. List<ComparisonUnitAtom> comparisonUnitAtomList,
  76. WmlComparerSettings settings)
  77. {
  78. if (element.Name == W.body || element.Name == W.footnote || element.Name == W.endnote)
  79. {
  80. foreach (XElement item in element.Elements())
  81. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  82. return;
  83. }
  84. if (element.Name == W.p)
  85. {
  86. IEnumerable<XElement> paraChildrenToProcess = element
  87. .Elements()
  88. .Where(e => e.Name != W.pPr);
  89. foreach (XElement item in paraChildrenToProcess)
  90. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  91. XElement paraProps = element.Element(W.pPr);
  92. if (paraProps == null)
  93. {
  94. var pPrComparisonUnitAtom = new ComparisonUnitAtom(
  95. new XElement(W.pPr),
  96. element.AncestorsAndSelf()
  97. .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
  98. .ToArray(),
  99. part,
  100. settings);
  101. comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
  102. }
  103. else
  104. {
  105. var pPrComparisonUnitAtom = new ComparisonUnitAtom(
  106. paraProps,
  107. element.AncestorsAndSelf()
  108. .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
  109. .ToArray(),
  110. part,
  111. settings);
  112. comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
  113. }
  114. return;
  115. }
  116. if (element.Name == W.r)
  117. {
  118. IEnumerable<XElement> runChildrenToProcess = element
  119. .Elements()
  120. .Where(e => e.Name != W.rPr);
  121. foreach (XElement item in runChildrenToProcess)
  122. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  123. return;
  124. }
  125. if (element.Name == W.t || element.Name == W.delText)
  126. {
  127. string val = element.Value;
  128. foreach (char ch in val)
  129. {
  130. var sr = new ComparisonUnitAtom(
  131. new XElement(element.Name, ch),
  132. element.AncestorsAndSelf()
  133. .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
  134. .ToArray(),
  135. part,
  136. settings);
  137. comparisonUnitAtomList.Add(sr);
  138. }
  139. return;
  140. }
  141. if (AllowableRunChildren.Contains(element.Name) || element.Name == W._object)
  142. {
  143. var sr3 = new ComparisonUnitAtom(
  144. element,
  145. element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes)
  146. .Reverse().ToArray(),
  147. part,
  148. settings);
  149. comparisonUnitAtomList.Add(sr3);
  150. return;
  151. }
  152. RecursionInfo re = RecursionElements.FirstOrDefault(z => z.ElementName == element.Name);
  153. if (re != null)
  154. {
  155. AnnotateElementWithProps(part, element, comparisonUnitAtomList, re.ChildElementPropertyNames, settings);
  156. return;
  157. }
  158. if (ElementsToThrowAway.Contains(element.Name))
  159. return;
  160. AnnotateElementWithProps(part, element, comparisonUnitAtomList, null, settings);
  161. }
  162. private static void AnnotateElementWithProps(
  163. OpenXmlPart part,
  164. XElement element,
  165. List<ComparisonUnitAtom> comparisonUnitAtomList,
  166. XName[] childElementPropertyNames,
  167. WmlComparerSettings settings)
  168. {
  169. IEnumerable<XElement> runChildrenToProcess;
  170. if (childElementPropertyNames == null)
  171. runChildrenToProcess = element.Elements();
  172. else
  173. runChildrenToProcess = element
  174. .Elements()
  175. .Where(e => !childElementPropertyNames.Contains(e.Name));
  176. foreach (XElement item in runChildrenToProcess)
  177. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  178. }
  179. #endregion CreateComparisonUnitAtomList
  180. #region GetComparisonUnitList
  181. // The following method must be made internal if we ever turn this part of the partial class
  182. // into its own class.
  183. private static ComparisonUnit[] GetComparisonUnitList(
  184. ComparisonUnitAtom[] comparisonUnitAtomList,
  185. WmlComparerSettings settings)
  186. {
  187. var seed = new Atgbw
  188. {
  189. Key = null,
  190. ComparisonUnitAtomMember = null,
  191. NextIndex = 0
  192. };
  193. IEnumerable<Atgbw> groupingKey = comparisonUnitAtomList
  194. .Rollup(seed, (sr, prevAtgbw, i) =>
  195. {
  196. int? key;
  197. int nextIndex = prevAtgbw.NextIndex;
  198. if (sr.ContentElement.Name == W.t)
  199. {
  200. string chr = sr.ContentElement.Value;
  201. char ch = chr[0];
  202. if (ch == '.' || ch == ',')
  203. {
  204. var beforeIsDigit = false;
  205. if (i > 0)
  206. {
  207. ComparisonUnitAtom prev = comparisonUnitAtomList[i - 1];
  208. if (prev.ContentElement.Name == W.t && char.IsDigit(prev.ContentElement.Value[0]))
  209. beforeIsDigit = true;
  210. }
  211. var afterIsDigit = false;
  212. if (i < comparisonUnitAtomList.Length - 1)
  213. {
  214. ComparisonUnitAtom next = comparisonUnitAtomList[i + 1];
  215. if (next.ContentElement.Name == W.t && char.IsDigit(next.ContentElement.Value[0]))
  216. afterIsDigit = true;
  217. }
  218. if (beforeIsDigit || afterIsDigit)
  219. {
  220. key = nextIndex;
  221. }
  222. else
  223. {
  224. nextIndex++;
  225. key = nextIndex;
  226. nextIndex++;
  227. }
  228. }
  229. else if (settings.WordSeparators.Contains(ch))
  230. {
  231. nextIndex++;
  232. key = nextIndex;
  233. nextIndex++;
  234. }
  235. else
  236. {
  237. key = nextIndex;
  238. }
  239. }
  240. else if (WordBreakElements.Contains(sr.ContentElement.Name))
  241. {
  242. nextIndex++;
  243. key = nextIndex;
  244. nextIndex++;
  245. }
  246. else
  247. {
  248. key = nextIndex;
  249. }
  250. return new Atgbw
  251. {
  252. Key = key,
  253. ComparisonUnitAtomMember = sr,
  254. NextIndex = nextIndex
  255. };
  256. })
  257. .ToArray();
  258. if (False)
  259. {
  260. var sb = new StringBuilder();
  261. foreach (Atgbw item in groupingKey)
  262. {
  263. sb.Append(item.Key + Environment.NewLine);
  264. sb.Append(" " + item.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
  265. }
  266. string sbs = sb.ToString();
  267. TestUtil.NotePad(sbs);
  268. }
  269. IEnumerable<IGrouping<int?, Atgbw>> groupedByWords = groupingKey
  270. .GroupAdjacent(gc => gc.Key)
  271. .ToArray();
  272. if (False)
  273. {
  274. var sb = new StringBuilder();
  275. foreach (IGrouping<int?, Atgbw> group in groupedByWords)
  276. {
  277. sb.Append("Group ===== " + @group.Key + Environment.NewLine);
  278. foreach (Atgbw gc in @group)
  279. {
  280. sb.Append(" " + gc.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
  281. }
  282. }
  283. string sbs = sb.ToString();
  284. TestUtil.NotePad(sbs);
  285. }
  286. WithHierarchicalGroupingKey[] withHierarchicalGroupingKey = groupedByWords
  287. .Select(g =>
  288. {
  289. string[] hierarchicalGroupingArray = g
  290. .First()
  291. .ComparisonUnitAtomMember
  292. .AncestorElements
  293. .Where(a => ComparisonGroupingElements.Contains(a.Name))
  294. .Select(a => a.Name.LocalName + ":" + (string) a.Attribute(PtOpenXml.Unid))
  295. .ToArray();
  296. return new WithHierarchicalGroupingKey
  297. {
  298. ComparisonUnitWord = new ComparisonUnitWord(g.Select(gc => gc.ComparisonUnitAtomMember)),
  299. HierarchicalGroupingArray = hierarchicalGroupingArray
  300. };
  301. }
  302. )
  303. .ToArray();
  304. if (False)
  305. {
  306. var sb = new StringBuilder();
  307. foreach (WithHierarchicalGroupingKey group in withHierarchicalGroupingKey)
  308. {
  309. sb.Append("Grouping Array: " +
  310. @group.HierarchicalGroupingArray.Select(gam => gam + " - ").StringConcatenate() +
  311. Environment.NewLine);
  312. foreach (ComparisonUnit gc in @group.ComparisonUnitWord.Contents)
  313. {
  314. sb.Append(" " + gc.ToString(0) + Environment.NewLine);
  315. }
  316. }
  317. string sbs = sb.ToString();
  318. TestUtil.NotePad(sbs);
  319. }
  320. ComparisonUnit[] cul = GetHierarchicalComparisonUnits(withHierarchicalGroupingKey, 0).ToArray();
  321. if (False)
  322. {
  323. string str = ComparisonUnit.ComparisonUnitListToString(cul);
  324. TestUtil.NotePad(str);
  325. }
  326. return cul;
  327. }
  328. private static IEnumerable<ComparisonUnit> GetHierarchicalComparisonUnits(
  329. IEnumerable<WithHierarchicalGroupingKey> input,
  330. int level)
  331. {
  332. IEnumerable<IGrouping<string, WithHierarchicalGroupingKey>> grouped = input
  333. .GroupAdjacent(
  334. whgk => level >= whgk.HierarchicalGroupingArray.Length ? "" : whgk.HierarchicalGroupingArray[level]);
  335. List<ComparisonUnit> retList = grouped
  336. .Select(gc =>
  337. {
  338. if (gc.Key == "")
  339. {
  340. return (IEnumerable<ComparisonUnit>) gc.Select(whgk => whgk.ComparisonUnitWord).ToList();
  341. }
  342. string[] spl = gc.Key.Split(':');
  343. ComparisonUnitGroupType groupType = WmlComparerUtil.ComparisonUnitGroupTypeFromLocalName(spl[0]);
  344. IEnumerable<ComparisonUnit> childHierarchicalComparisonUnits = GetHierarchicalComparisonUnits(gc, level + 1);
  345. var newCompUnitGroup = new ComparisonUnitGroup(childHierarchicalComparisonUnits, groupType, level);
  346. return new[] { newCompUnitGroup };
  347. })
  348. .SelectMany(m => m)
  349. .ToList();
  350. return retList;
  351. }
  352. #endregion GetComparisonUnitList
  353. }
  354. }