WmlComparer.Internal.Methods.ComparisonUnits.cs 16 KB


  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. using System;
  4. using System.Collections.Generic;
  5. using System.Linq;
  6. using System.Text;
  7. using System.Xml.Linq;
  8. using DocumentFormat.OpenXml.Packaging;
  9. namespace OpenXmlPowerTools
  10. {
  11. public static partial class WmlComparer
  12. {
  13. #region CreateComparisonUnitAtomList
  14. internal static ComparisonUnitAtom[] CreateComparisonUnitAtomList(
  15. OpenXmlPart part,
  16. XElement contentParent,
  17. WmlComparerSettings settings)
  18. {
  19. VerifyNoInvalidContent(contentParent);
  20. AssignUnidToAllElements(contentParent); // add the Guid id to every element
  21. MoveLastSectPrIntoLastParagraph(contentParent);
  22. ComparisonUnitAtom[] cal = CreateComparisonUnitAtomListInternal(part, contentParent, settings).ToArray();
  23. if (False)
  24. {
  25. var sb = new StringBuilder();
  26. foreach (ComparisonUnitAtom item in cal)
  27. sb.Append(item + Environment.NewLine);
  28. string sbs = sb.ToString();
  29. TestUtil.NotePad(sbs);
  30. }
  31. return cal;
  32. }
  33. private static void VerifyNoInvalidContent(XElement contentParent)
  34. {
  35. XElement invalidElement = contentParent.Descendants().FirstOrDefault(d => InvalidElements.Contains(d.Name));
  36. if (invalidElement == null)
  37. return;
  38. throw new NotSupportedException("Document contains " + invalidElement.Name.LocalName);
  39. }
  40. private static void MoveLastSectPrIntoLastParagraph(XElement contentParent)
  41. {
  42. List<XElement> lastSectPrList = contentParent.Elements(W.sectPr).ToList();
  43. if (lastSectPrList.Count() > 1)
  44. throw new OpenXmlPowerToolsException("Invalid document");
  45. XElement lastSectPr = lastSectPrList.FirstOrDefault();
  46. if (lastSectPr != null)
  47. {
  48. XElement lastParagraph = contentParent.Elements(W.p).LastOrDefault();
  49. if (lastParagraph == null)
  50. throw new OpenXmlPowerToolsException("Invalid document");
  51. XElement pPr = lastParagraph.Element(W.pPr);
  52. if (pPr == null)
  53. {
  54. pPr = new XElement(W.pPr);
  55. lastParagraph.AddFirst(W.pPr);
  56. }
  57. pPr.Add(lastSectPr);
  58. contentParent.Elements(W.sectPr).Remove();
  59. }
  60. }
  61. private static List<ComparisonUnitAtom> CreateComparisonUnitAtomListInternal(
  62. OpenXmlPart part,
  63. XElement contentParent,
  64. WmlComparerSettings settings)
  65. {
  66. var comparisonUnitAtomList = new List<ComparisonUnitAtom>();
  67. CreateComparisonUnitAtomListRecurse(part, contentParent, comparisonUnitAtomList, settings);
  68. return comparisonUnitAtomList;
  69. }
  70. private static void CreateComparisonUnitAtomListRecurse(
  71. OpenXmlPart part,
  72. XElement element,
  73. List<ComparisonUnitAtom> comparisonUnitAtomList,
  74. WmlComparerSettings settings)
  75. {
  76. if (element.Name == W.body || element.Name == W.footnote || element.Name == W.endnote)
  77. {
  78. foreach (XElement item in element.Elements())
  79. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  80. return;
  81. }
  82. if (element.Name == W.p)
  83. {
  84. IEnumerable<XElement> paraChildrenToProcess = element
  85. .Elements()
  86. .Where(e => e.Name != W.pPr);
  87. foreach (XElement item in paraChildrenToProcess)
  88. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  89. XElement paraProps = element.Element(W.pPr);
  90. if (paraProps == null)
  91. {
  92. var pPrComparisonUnitAtom = new ComparisonUnitAtom(
  93. new XElement(W.pPr),
  94. element.AncestorsAndSelf()
  95. .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
  96. .ToArray(),
  97. part,
  98. settings);
  99. comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
  100. }
  101. else
  102. {
  103. var pPrComparisonUnitAtom = new ComparisonUnitAtom(
  104. paraProps,
  105. element.AncestorsAndSelf()
  106. .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
  107. .ToArray(),
  108. part,
  109. settings);
  110. comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
  111. }
  112. return;
  113. }
  114. if (element.Name == W.r)
  115. {
  116. IEnumerable<XElement> runChildrenToProcess = element
  117. .Elements()
  118. .Where(e => e.Name != W.rPr);
  119. foreach (XElement item in runChildrenToProcess)
  120. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  121. return;
  122. }
  123. if (element.Name == W.t || element.Name == W.delText)
  124. {
  125. string val = element.Value;
  126. foreach (char ch in val)
  127. {
  128. var sr = new ComparisonUnitAtom(
  129. new XElement(element.Name, ch),
  130. element.AncestorsAndSelf()
  131. .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
  132. .ToArray(),
  133. part,
  134. settings);
  135. comparisonUnitAtomList.Add(sr);
  136. }
  137. return;
  138. }
  139. if (AllowableRunChildren.Contains(element.Name) || element.Name == W._object)
  140. {
  141. var sr3 = new ComparisonUnitAtom(
  142. element,
  143. element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes)
  144. .Reverse().ToArray(),
  145. part,
  146. settings);
  147. comparisonUnitAtomList.Add(sr3);
  148. return;
  149. }
  150. RecursionInfo re = RecursionElements.FirstOrDefault(z => z.ElementName == element.Name);
  151. if (re != null)
  152. {
  153. AnnotateElementWithProps(part, element, comparisonUnitAtomList, re.ChildElementPropertyNames, settings);
  154. return;
  155. }
  156. if (ElementsToThrowAway.Contains(element.Name))
  157. return;
  158. AnnotateElementWithProps(part, element, comparisonUnitAtomList, null, settings);
  159. }
  160. private static void AnnotateElementWithProps(
  161. OpenXmlPart part,
  162. XElement element,
  163. List<ComparisonUnitAtom> comparisonUnitAtomList,
  164. XName[] childElementPropertyNames,
  165. WmlComparerSettings settings)
  166. {
  167. IEnumerable<XElement> runChildrenToProcess;
  168. if (childElementPropertyNames == null)
  169. runChildrenToProcess = element.Elements();
  170. else
  171. runChildrenToProcess = element
  172. .Elements()
  173. .Where(e => !childElementPropertyNames.Contains(e.Name));
  174. foreach (XElement item in runChildrenToProcess)
  175. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  176. }
  177. #endregion CreateComparisonUnitAtomList
  178. #region GetComparisonUnitList
  179. // The following method must be made internal if we ever turn this part of the partial class
  180. // into its own class.
  181. private static ComparisonUnit[] GetComparisonUnitList(
  182. ComparisonUnitAtom[] comparisonUnitAtomList,
  183. WmlComparerSettings settings)
  184. {
  185. var seed = new Atgbw
  186. {
  187. Key = null,
  188. ComparisonUnitAtomMember = null,
  189. NextIndex = 0
  190. };
  191. IEnumerable<Atgbw> groupingKey = comparisonUnitAtomList
  192. .Rollup(seed, (sr, prevAtgbw, i) =>
  193. {
  194. int? key;
  195. int nextIndex = prevAtgbw.NextIndex;
  196. if (sr.ContentElement.Name == W.t)
  197. {
  198. string chr = sr.ContentElement.Value;
  199. char ch = chr[0];
  200. if (ch == '.' || ch == ',')
  201. {
  202. var beforeIsDigit = false;
  203. if (i > 0)
  204. {
  205. ComparisonUnitAtom prev = comparisonUnitAtomList[i - 1];
  206. if (prev.ContentElement.Name == W.t && char.IsDigit(prev.ContentElement.Value[0]))
  207. beforeIsDigit = true;
  208. }
  209. var afterIsDigit = false;
  210. if (i < comparisonUnitAtomList.Length - 1)
  211. {
  212. ComparisonUnitAtom next = comparisonUnitAtomList[i + 1];
  213. if (next.ContentElement.Name == W.t && char.IsDigit(next.ContentElement.Value[0]))
  214. afterIsDigit = true;
  215. }
  216. if (beforeIsDigit || afterIsDigit)
  217. {
  218. key = nextIndex;
  219. }
  220. else
  221. {
  222. nextIndex++;
  223. key = nextIndex;
  224. nextIndex++;
  225. }
  226. }
  227. else if (settings.WordSeparators.Contains(ch))
  228. {
  229. nextIndex++;
  230. key = nextIndex;
  231. nextIndex++;
  232. }
  233. else
  234. {
  235. key = nextIndex;
  236. }
  237. }
  238. else if (WordBreakElements.Contains(sr.ContentElement.Name))
  239. {
  240. nextIndex++;
  241. key = nextIndex;
  242. nextIndex++;
  243. }
  244. else
  245. {
  246. key = nextIndex;
  247. }
  248. return new Atgbw
  249. {
  250. Key = key,
  251. ComparisonUnitAtomMember = sr,
  252. NextIndex = nextIndex
  253. };
  254. })
  255. .ToArray();
  256. if (False)
  257. {
  258. var sb = new StringBuilder();
  259. foreach (Atgbw item in groupingKey)
  260. {
  261. sb.Append(item.Key + Environment.NewLine);
  262. sb.Append(" " + item.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
  263. }
  264. string sbs = sb.ToString();
  265. TestUtil.NotePad(sbs);
  266. }
  267. IEnumerable<IGrouping<int?, Atgbw>> groupedByWords = groupingKey
  268. .GroupAdjacent(gc => gc.Key)
  269. .ToArray();
  270. if (False)
  271. {
  272. var sb = new StringBuilder();
  273. foreach (IGrouping<int?, Atgbw> group in groupedByWords)
  274. {
  275. sb.Append("Group ===== " + @group.Key + Environment.NewLine);
  276. foreach (Atgbw gc in @group)
  277. {
  278. sb.Append(" " + gc.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
  279. }
  280. }
  281. string sbs = sb.ToString();
  282. TestUtil.NotePad(sbs);
  283. }
  284. WithHierarchicalGroupingKey[] withHierarchicalGroupingKey = groupedByWords
  285. .Select(g =>
  286. {
  287. string[] hierarchicalGroupingArray = g
  288. .First()
  289. .ComparisonUnitAtomMember
  290. .AncestorElements
  291. .Where(a => ComparisonGroupingElements.Contains(a.Name))
  292. .Select(a => a.Name.LocalName + ":" + (string) a.Attribute(PtOpenXml.Unid))
  293. .ToArray();
  294. return new WithHierarchicalGroupingKey
  295. {
  296. ComparisonUnitWord = new ComparisonUnitWord(g.Select(gc => gc.ComparisonUnitAtomMember)),
  297. HierarchicalGroupingArray = hierarchicalGroupingArray
  298. };
  299. }
  300. )
  301. .ToArray();
  302. if (False)
  303. {
  304. var sb = new StringBuilder();
  305. foreach (WithHierarchicalGroupingKey group in withHierarchicalGroupingKey)
  306. {
  307. sb.Append("Grouping Array: " +
  308. @group.HierarchicalGroupingArray.Select(gam => gam + " - ").StringConcatenate() +
  309. Environment.NewLine);
  310. foreach (ComparisonUnit gc in @group.ComparisonUnitWord.Contents)
  311. {
  312. sb.Append(" " + gc.ToString(0) + Environment.NewLine);
  313. }
  314. }
  315. string sbs = sb.ToString();
  316. TestUtil.NotePad(sbs);
  317. }
  318. ComparisonUnit[] cul = GetHierarchicalComparisonUnits(withHierarchicalGroupingKey, 0).ToArray();
  319. if (False)
  320. {
  321. string str = ComparisonUnit.ComparisonUnitListToString(cul);
  322. TestUtil.NotePad(str);
  323. }
  324. return cul;
  325. }
  326. private static IEnumerable<ComparisonUnit> GetHierarchicalComparisonUnits(
  327. IEnumerable<WithHierarchicalGroupingKey> input,
  328. int level)
  329. {
  330. IEnumerable<IGrouping<string, WithHierarchicalGroupingKey>> grouped = input
  331. .GroupAdjacent(
  332. whgk => level >= whgk.HierarchicalGroupingArray.Length ? "" : whgk.HierarchicalGroupingArray[level]);
  333. List<ComparisonUnit> retList = grouped
  334. .Select(gc =>
  335. {
  336. if (gc.Key == "")
  337. {
  338. return (IEnumerable<ComparisonUnit>) gc.Select(whgk => whgk.ComparisonUnitWord).ToList();
  339. }
  340. string[] spl = gc.Key.Split(':');
  341. ComparisonUnitGroupType groupType = WmlComparerUtil.ComparisonUnitGroupTypeFromLocalName(spl[0]);
  342. IEnumerable<ComparisonUnit> childHierarchicalComparisonUnits = GetHierarchicalComparisonUnits(gc, level + 1);
  343. var newCompUnitGroup = new ComparisonUnitGroup(childHierarchicalComparisonUnits, groupType, level);
  344. return new[] { newCompUnitGroup };
  345. })
  346. .SelectMany(m => m)
  347. .ToList();
  348. return retList;
  349. }
  350. #endregion GetComparisonUnitList
  351. }
  352. }