WmlComparer.cs 360 KB


  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. using System;
  4. using System.Collections.Generic;
  5. using System.Linq;
  6. using System.Globalization;
  7. using System.IO;
  8. using System.IO.Packaging;
  9. using System.Text;
  10. using System.Xml.Linq;
  11. using DocumentFormat.OpenXml.Packaging;
  12. using System.Drawing;
  13. using System.Security.Cryptography;
  14. using OpenXmlPowerTools;
  15. // It is possible to optimize DescendantContentAtoms
  16. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  17. /// Currently, the unid is set at the beginning of the algorithm. It is used by the code that establishes correlation based on first rejecting
  18. /// tracked revisions, then correlating paragraphs/tables. It is requred for this algorithm - after finding a correlated sequence in the document with rejected
  19. /// revisions, it uses the unid to find the same paragraph in the document without rejected revisions, then sets the correlated sha1 hash in that document.
  20. ///
  21. /// But then when accepting tracked revisions, for certain paragraphs (where there are deleted paragraph marks) it is going to lose the unids. But this isn't a
  22. /// problem because when paragraph marks are deleted, the correlation is definitely no longer possible. Any paragraphs that are in a range of paragraphs that
  23. /// are coalesced can't be correlated to paragraphs in the other document via their hash. At that point we no longer care what their unids are.
  24. ///
  25. /// But after that it is only used to reconstruct the tree. It is also used in the debugging code that
  26. /// prints the various correlated sequences and comparison units - this is display for debugging purposes only.
  27. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  28. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  29. /// The key idea here is that a given paragraph will always have the same ancestors, and it doesn't matter whether the content was deleted from the old document,
  30. /// inserted into the new document, or set as equal. At this point, we identify a paragraph as a sequential list of content atoms, terminated by a paragraph mark.
  31. /// This entire list will for a single paragraph, regardless of whether the paragraph is a child of the body, or if the paragraph is in a cell in a table, or if
  32. /// the paragraph is in a text box. The list of ancestors, from the paragraph to the root of the XML tree will be the same for all content atoms in the paragraph.
  33. ///
  34. /// Therefore:
  35. ///
  36. /// Iterate through the list of content atoms backwards. When the loop sees a paragraph mark, it gets the ancestor unids from the paragraph mark to the top of the
  37. /// tree, and sets this as the same for all content atoms in the paragraph. For descendants of the paragraph mark, it doesn't really matter if content is put into
  38. /// separate runs or what not. We don't need to be concerned about what the unids are for descendants of the paragraph.
  39. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  40. namespace OpenXmlPowerTools.Previous
  41. {
  42. public class WmlComparerSettings
  43. {
  44. public char[] WordSeparators;
  45. public string AuthorForRevisions = "Open-Xml-PowerTools";
  46. public string DateTimeForRevisions = DateTime.Now.ToString("o");
  47. public double DetailThreshold = 0.15;
  48. public bool CaseInsensitive = false;
  49. public CultureInfo CultureInfo = null;
  50. public Action<string> LogCallback = null;
  51. public int StartingIdForFootnotesEndnotes = 1;
  52. public DirectoryInfo DebugTempFileDi;
  53. public WmlComparerSettings()
  54. {
  55. // note that , and . are processed explicitly to handle cases where they are in a number or word
  56. WordSeparators = new[] { ' ', '-', ')', '(', ';', ',' }; // todo need to fix this for complete list
  57. }
  58. }
  59. public class WmlComparerConsolidateSettings
  60. {
  61. public bool ConsolidateWithTable = true;
  62. }
  63. public class WmlRevisedDocumentInfo
  64. {
  65. public WmlDocument RevisedDocument;
  66. public string Revisor;
  67. public Color Color;
  68. }
  69. public static class WmlComparer
  70. {
  71. public static bool s_False = false;
  72. public static bool s_True = true;
  73. public static bool s_SaveIntermediateFilesForDebugging = false;
  74. public static WmlDocument Compare(WmlDocument source1, WmlDocument source2, WmlComparerSettings settings)
  75. {
  76. return CompareInternal(source1, source2, settings, true);
  77. }
  78. private static WmlDocument CompareInternal(WmlDocument source1, WmlDocument source2, WmlComparerSettings settings,
  79. bool preProcessMarkupInOriginal)
  80. {
  81. if (preProcessMarkupInOriginal)
  82. source1 = PreProcessMarkup(source1, settings.StartingIdForFootnotesEndnotes + 1000);
  83. source2 = PreProcessMarkup(source2, settings.StartingIdForFootnotesEndnotes + 2000);
  84. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  85. {
  86. var name1 = "Source1-Step1-PreProcess.docx";
  87. var name2 = "Source2-Step1-PreProcess.docx";
  88. var preProcFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  89. source1.SaveAs(preProcFi1.FullName);
  90. var preProcFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  91. source2.SaveAs(preProcFi2.FullName);
  92. }
  93. // at this point, both source1 and source2 have unid on every element. These are the values that will enable reassembly of the XML tree.
  94. // but we need other values.
  95. // In source1:
  96. // - accept tracked revisions
  97. // - determine hash code for every block-level element
  98. // - save as attribute on every element
  99. // - accept tracked revisions and reject tracked revisions leave the unids alone, where possible.
  100. // - after accepting and calculating the hash, then can use the unids to find the right block-level element in the unmodified source1, and install the hash
  101. // In source2:
  102. // - reject tracked revisions
  103. // - determine hash code for every block-level element
  104. // - save as an attribute on every element
  105. // - after rejecting and calculating the hash, then can use the unids to find the right block-level element in the unmodified source2, and install the hash
  106. // - sometimes after accepting or rejecting tracked revisions, several paragraphs will get coalesced into a single paragraph due to paragraph marks being inserted / deleted.
  107. // - in this case, some paragraphs will not get a hash injected onto them.
  108. // - if a paragraph doesn't have a hash, then it will never correspond to another paragraph, and such issues will need to be resolved in the normal execution of the LCS algorithm.
  109. // - note that when we do propagate the unid through for the first paragraph.
  110. // Establish correlation between the two.
  111. // Find the longest common sequence of block-level elements where hash codes are the same.
  112. // this sometimes will be every block level element in the document. Or sometimes will be just a fair number of them.
  113. // at the start of doing the LCS algorithm, we will match up content, and put them in corresponding unknown correlated comparison units. Those paragraphs will only ever be matched to their corresponding paragraph.
  114. // then the algorithm can proceed as usual.
  115. // need to call ChangeFootnoteEndnoteReferencesToUniqueRange before creating the wmlResult document, so that
  116. // the same GUID ids are used for footnote and endnote references in both the 'after' document, and in the
  117. // result document.
  118. var source1afterAccepting = RevisionProcessor.AcceptRevisions(source1);
  119. var source2afterRejecting = RevisionProcessor.RejectRevisions(source2);
  120. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  121. {
  122. var name1 = "Source1-Step2-AfterAccepting.docx";
  123. var name2 = "Source2-Step2-AfterRejecting.docx";
  124. var afterAcceptingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  125. source1afterAccepting.SaveAs(afterAcceptingFi1.FullName);
  126. var afterRejectingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  127. source2afterRejecting.SaveAs(afterRejectingFi2.FullName);
  128. }
  129. // this creates the correlated hash codes that enable us to match up ranges of paragraphs based on
  130. // accepting in source1, rejecting in source2
  131. source1 = HashBlockLevelContent(source1, source1afterAccepting, settings);
  132. source2 = HashBlockLevelContent(source2, source2afterRejecting, settings);
  133. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  134. {
  135. var name1 = "Source1-Step3-AfterHashing.docx";
  136. var name2 = "Source2-Step3-AfterHashing.docx";
  137. var afterHashingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  138. source1.SaveAs(afterHashingFi1.FullName);
  139. var afterHashingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  140. source2.SaveAs(afterHashingFi2.FullName);
  141. }
  142. // Accept revisions in before, and after
  143. source1 = RevisionProcessor.AcceptRevisions(source1);
  144. source2 = RevisionProcessor.AcceptRevisions(source2);
  145. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  146. {
  147. var name1 = "Source1-Step4-AfterAccepting.docx";
  148. var name2 = "Source2-Step4-AfterAccepting.docx";
  149. var afterAcceptingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  150. source1.SaveAs(afterAcceptingFi1.FullName);
  151. var afterAcceptingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  152. source2.SaveAs(afterAcceptingFi2.FullName);
  153. }
  154. // after accepting revisions, some unids may have been removed by revision accepter, along with the correlatedSHA1Hash codes,
  155. // this is as it should be.
  156. // but need to go back in and add guids to paragraphs that have had them removed.
  157. using (MemoryStream ms = new MemoryStream())
  158. {
  159. ms.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length);
  160. using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true))
  161. {
  162. AddUnidsToMarkupInContentParts(wDoc);
  163. }
  164. }
  165. WmlDocument wmlResult = new WmlDocument(source1);
  166. using (MemoryStream ms1 = new MemoryStream())
  167. using (MemoryStream ms2 = new MemoryStream())
  168. {
  169. ms1.Write(source1.DocumentByteArray, 0, source1.DocumentByteArray.Length);
  170. ms2.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length);
  171. WmlDocument producedDocument;
  172. using (WordprocessingDocument wDoc1 = WordprocessingDocument.Open(ms1, true))
  173. using (WordprocessingDocument wDoc2 = WordprocessingDocument.Open(ms2, true))
  174. {
  175. producedDocument = ProduceDocumentWithTrackedRevisions(settings, wmlResult, wDoc1, wDoc2);
  176. }
  177. if (s_False && settings.DebugTempFileDi != null)
  178. {
  179. var name1 = "Source1-Step5-AfterProducingDocWithRevTrk.docx";
  180. var name2 = "Source2-Step5-AfterProducingDocWithRevTrk.docx";
  181. var afterProducingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  182. var afterProducingWml1 = new WmlDocument("after1.docx", ms1.ToArray());
  183. afterProducingWml1.SaveAs(afterProducingFi1.FullName);
  184. var afterProducingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  185. var afterProducingWml2 = new WmlDocument("after2.docx", ms2.ToArray());
  186. afterProducingWml2.SaveAs(afterProducingFi2.FullName);
  187. }
  188. if (s_False && settings.DebugTempFileDi != null)
  189. {
  190. var cleanedSource = CleanPowerToolsAndRsid(source1);
  191. var name1 = "Cleaned-Source.docx";
  192. var cleanedSourceFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  193. cleanedSource.SaveAs(cleanedSourceFi1.FullName);
  194. var cleanedProduced = CleanPowerToolsAndRsid(producedDocument);
  195. var name2 = "Cleaned-Produced.docx";
  196. var cleanedProducedFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  197. cleanedProduced.SaveAs(cleanedProducedFi1.FullName);
  198. }
  199. return producedDocument;
  200. }
  201. }
  202. private static WmlDocument CleanPowerToolsAndRsid(WmlDocument producedDocument)
  203. {
  204. using (MemoryStream ms = new MemoryStream())
  205. {
  206. ms.Write(producedDocument.DocumentByteArray, 0, producedDocument.DocumentByteArray.Length);
  207. using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true))
  208. {
  209. foreach (var cp in wDoc.ContentParts())
  210. {
  211. var xd = cp.GetXDocument();
  212. var newRoot = CleanPartTransform(xd.Root);
  213. xd.Root.ReplaceWith(newRoot);
  214. cp.PutXDocument();
  215. }
  216. }
  217. var cleaned = new WmlDocument("cleaned.docx", ms.ToArray());
  218. return cleaned;
  219. }
  220. }
  221. private static WmlDocument HashBlockLevelContent(WmlDocument source, WmlDocument source1afterProcessingRevTracking, WmlComparerSettings settings)
  222. {
  223. using (MemoryStream msSource = new MemoryStream())
  224. using (MemoryStream msAfterProc = new MemoryStream())
  225. {
  226. msSource.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
  227. msAfterProc.Write(source1afterProcessingRevTracking.DocumentByteArray, 0, source1afterProcessingRevTracking.DocumentByteArray.Length);
  228. using (WordprocessingDocument wDocSource = WordprocessingDocument.Open(msSource, true))
  229. using (WordprocessingDocument wDocAfterProc = WordprocessingDocument.Open(msAfterProc, true))
  230. {
  231. // create Unid dictionary for source
  232. var sourceMainXDoc = wDocSource
  233. .MainDocumentPart
  234. .GetXDocument();
  235. var sourceUnidDict = sourceMainXDoc
  236. .Root
  237. .Descendants()
  238. .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr)
  239. .ToDictionary(d => (string)d.Attribute(PtOpenXml.Unid));
  240. var afterProcMainXDoc = wDocAfterProc
  241. .MainDocumentPart
  242. .GetXDocument();
  243. foreach (var blockLevelContent in afterProcMainXDoc.Root.Descendants().Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr))
  244. {
  245. var cloneBlockLevelContentForHashing = (XElement)CloneBlockLevelContentForHashing(wDocAfterProc.MainDocumentPart, blockLevelContent, true, settings);
  246. var shaString = cloneBlockLevelContentForHashing.ToString(SaveOptions.DisableFormatting)
  247. .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", "");
  248. var sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString);
  249. var thisUnid = (string)blockLevelContent.Attribute(PtOpenXml.Unid);
  250. if (thisUnid != null)
  251. {
  252. if (sourceUnidDict.ContainsKey(thisUnid))
  253. {
  254. var correlatedBlockLevelContent = sourceUnidDict[thisUnid];
  255. correlatedBlockLevelContent.Add(new XAttribute(PtOpenXml.CorrelatedSHA1Hash, sha1Hash));
  256. }
  257. }
  258. }
  259. wDocSource.MainDocumentPart.PutXDocument();
  260. }
  261. WmlDocument sourceWithCorrelatedSHA1Hash = new WmlDocument(source.FileName, msSource.ToArray());
  262. return sourceWithCorrelatedSHA1Hash;
  263. }
  264. }
  265. private static WmlDocument PreProcessMarkup(WmlDocument source, int startingIdForFootnotesEndnotes)
  266. {
  267. // open and close to get rid of MC content
  268. using (MemoryStream ms = new MemoryStream())
  269. {
  270. ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
  271. OpenSettings os = new OpenSettings();
  272. os.MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings(MarkupCompatibilityProcessMode.ProcessAllParts,
  273. DocumentFormat.OpenXml.FileFormatVersions.Office2007);
  274. using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os))
  275. {
  276. var doc = wDoc.MainDocumentPart.RootElement;
  277. if (wDoc.MainDocumentPart.FootnotesPart != null)
  278. {
  279. // contrary to what you might think, looking at the API, it is necessary to access the root element of each part to cause
  280. // the SDK to process MC markup.
  281. var fn = wDoc.MainDocumentPart.FootnotesPart.RootElement;
  282. }
  283. if (wDoc.MainDocumentPart.EndnotesPart != null)
  284. {
  285. var en = wDoc.MainDocumentPart.EndnotesPart.RootElement;
  286. }
  287. }
  288. source = new WmlDocument(source.FileName, ms.ToArray());
  289. }
  290. // open and close to get rid of MC content
  291. using (MemoryStream ms = new MemoryStream())
  292. {
  293. ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
  294. OpenSettings os = new OpenSettings();
  295. os.MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings(MarkupCompatibilityProcessMode.ProcessAllParts,
  296. DocumentFormat.OpenXml.FileFormatVersions.Office2007);
  297. using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os))
  298. {
  299. TestForInvalidContent(wDoc);
  300. RemoveExistingPowerToolsMarkup(wDoc);
  301. SimplifyMarkupSettings msSettings = new SimplifyMarkupSettings()
  302. {
  303. RemoveBookmarks = true,
  304. AcceptRevisions = false,
  305. RemoveComments = true,
  306. RemoveContentControls = true,
  307. RemoveFieldCodes = true,
  308. RemoveGoBackBookmark = true,
  309. RemoveLastRenderedPageBreak = true,
  310. RemovePermissions = true,
  311. RemoveProof = true,
  312. RemoveSmartTags = true,
  313. RemoveSoftHyphens = true,
  314. RemoveHyperlinks = true,
  315. };
  316. MarkupSimplifier.SimplifyMarkup(wDoc, msSettings);
  317. ChangeFootnoteEndnoteReferencesToUniqueRange(wDoc, startingIdForFootnotesEndnotes);
  318. AddUnidsToMarkupInContentParts(wDoc);
  319. AddFootnotesEndnotesParts(wDoc);
  320. FillInEmptyFootnotesEndnotes(wDoc);
  321. }
  322. return new WmlDocument(source.FileName, ms.ToArray());
  323. }
  324. }
  325. // somehow, sometimes a footnote or endnote contains absolutely nothing - no paragraph - nothing.
  326. // This messes up the algorithm, so in this case, insert an empty paragraph.
  327. // This is pretty wacky markup to find, and I don't know how this markup comes into existence, but this is an innocuous fix.
  328. private static void FillInEmptyFootnotesEndnotes(WordprocessingDocument wDoc)
  329. {
  330. XElement emptyFootnote = XElement.Parse(
  331. @"<w:p xmlns:w='http://schemas.openxmlformats.org/wordprocessingml/2006/main'>
  332. <w:pPr>
  333. <w:pStyle w:val='FootnoteText'/>
  334. </w:pPr>
  335. <w:r>
  336. <w:rPr>
  337. <w:rStyle w:val='FootnoteReference'/>
  338. </w:rPr>
  339. <w:footnoteRef/>
  340. </w:r>
  341. </w:p>");
  342. XElement emptyEndnote = XElement.Parse(
  343. @"<w:p xmlns:w='http://schemas.openxmlformats.org/wordprocessingml/2006/main'>
  344. <w:pPr>
  345. <w:pStyle w:val='EndnoteText'/>
  346. </w:pPr>
  347. <w:r>
  348. <w:rPr>
  349. <w:rStyle w:val='EndnoteReference'/>
  350. </w:rPr>
  351. <w:endnoteRef/>
  352. </w:r>
  353. </w:p>");
  354. var footnotePart = wDoc.MainDocumentPart.FootnotesPart;
  355. if (footnotePart != null)
  356. {
  357. var fnXDoc = footnotePart.GetXDocument();
  358. foreach (var fn in fnXDoc.Root.Elements(W.footnote))
  359. {
  360. if (!fn.HasElements)
  361. fn.Add(emptyFootnote);
  362. }
  363. footnotePart.PutXDocument();
  364. }
  365. var endnotePart = wDoc.MainDocumentPart.EndnotesPart;
  366. if (endnotePart != null)
  367. {
  368. var fnXDoc = endnotePart.GetXDocument();
  369. foreach (var fn in fnXDoc.Root.Elements(W.endnote))
  370. {
  371. if (!fn.HasElements)
  372. fn.Add(emptyEndnote);
  373. }
  374. endnotePart.PutXDocument();
  375. }
  376. }
  377. private static bool ContentContainsFootnoteEndnoteReferencesThatHaveRevisions(XElement element, WordprocessingDocument wDocDelta)
  378. {
  379. var footnoteEndnoteReferences = element.Descendants().Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference);
  380. if (!footnoteEndnoteReferences.Any())
  381. return false;
  382. var footnoteXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument();
  383. var endnoteXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument();
  384. foreach (var note in footnoteEndnoteReferences)
  385. {
  386. XElement fnen = null;
  387. if (note.Name == W.footnoteReference)
  388. {
  389. var id = (int)note.Attribute(W.id);
  390. fnen = footnoteXDoc
  391. .Root
  392. .Elements(W.footnote)
  393. .FirstOrDefault(n => (int)n.Attribute(W.id) == id);
  394. if (fnen.Descendants().Where(d => d.Name == W.ins || d.Name == W.del).Any())
  395. return true;
  396. }
  397. if (note.Name == W.endnoteReference)
  398. {
  399. var id = (int)note.Attribute(W.id);
  400. fnen = endnoteXDoc
  401. .Root
  402. .Elements(W.endnote)
  403. .FirstOrDefault(n => (int)n.Attribute(W.id) == id);
  404. if (fnen.Descendants().Where(d => d.Name == W.ins || d.Name == W.del).Any())
  405. return true;
  406. }
  407. }
  408. return false;
  409. }
  410. private static void AddUnidsToMarkupInContentParts(WordprocessingDocument wDoc)
  411. {
  412. var mdp = wDoc.MainDocumentPart.GetXDocument();
  413. AssignUnidToAllElements(mdp.Root);
  414. IgnorePt14Namespace(mdp.Root);
  415. wDoc.MainDocumentPart.PutXDocument();
  416. if (wDoc.MainDocumentPart.FootnotesPart != null)
  417. {
  418. var p = wDoc.MainDocumentPart.FootnotesPart.GetXDocument();
  419. AssignUnidToAllElements(p.Root);
  420. IgnorePt14Namespace(p.Root);
  421. wDoc.MainDocumentPart.FootnotesPart.PutXDocument();
  422. }
  423. if (wDoc.MainDocumentPart.EndnotesPart != null)
  424. {
  425. var p = wDoc.MainDocumentPart.EndnotesPart.GetXDocument();
  426. AssignUnidToAllElements(p.Root);
  427. IgnorePt14Namespace(p.Root);
  428. wDoc.MainDocumentPart.EndnotesPart.PutXDocument();
  429. }
  430. }
  431. private class ConsolidationInfo
  432. {
  433. public string Revisor;
  434. public Color Color;
  435. public XElement RevisionElement;
  436. public bool InsertBefore = false;
  437. public string RevisionHash;
  438. public XElement[] Footnotes;
  439. public XElement[] Endnotes;
  440. public string RevisionString; // for debugging purposes only
  441. }
  442. private static string nl = Environment.NewLine;
  443. /*****************************************************************************************************************/
  444. // Consolidate processes footnotes and endnotes in a particular fashion - if the unmodified document has a footnote
  445. // reference, and a delta has a footnote reference, we end up with two footnotes - one is unmodified, and is refered to
  446. // from the unmodified content. The footnote reference in the delta refers to the modified footnote. This is as it
  447. // should be.
  448. /*****************************************************************************************************************/
  449. public static WmlDocument Consolidate(WmlDocument original,
  450. List<WmlRevisedDocumentInfo> revisedDocumentInfoList,
  451. WmlComparerSettings settings)
  452. {
  453. var consolidateSettings = new WmlComparerConsolidateSettings();
  454. return Consolidate(original, revisedDocumentInfoList, settings, consolidateSettings);
  455. }
  456. public static WmlDocument Consolidate(WmlDocument original,
  457. List<WmlRevisedDocumentInfo> revisedDocumentInfoList,
  458. WmlComparerSettings settings, WmlComparerConsolidateSettings consolidateSettings)
  459. {
  460. // pre-process the original, so that it already has unids for all elements
  461. // then when comparing all documents to the original, each one will have the unid as appropriate
  462. // for all revision block-level content
  463. // set unid to look for
  464. // while true
  465. // determine where to insert
  466. // get the unid for the revision
  467. // look it up in the original. if find it, then insert after that element
  468. // if not in the original
  469. // look backwards in revised document, set unid to look for, do the loop again
  470. // if get to the beginning of the document
  471. // insert at beginning of document
  472. settings.StartingIdForFootnotesEndnotes = 3000;
  473. var originalWithUnids = PreProcessMarkup(original, settings.StartingIdForFootnotesEndnotes);
  474. WmlDocument consolidated = new WmlDocument(originalWithUnids);
  475. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  476. {
  477. var name1 = "Original-with-Unids.docx";
  478. var preProcFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  479. originalWithUnids.SaveAs(preProcFi1.FullName);
  480. }
  481. var revisedDocumentInfoListCount = revisedDocumentInfoList.Count();
  482. using (MemoryStream consolidatedMs = new MemoryStream())
  483. {
  484. consolidatedMs.Write(consolidated.DocumentByteArray, 0, consolidated.DocumentByteArray.Length);
  485. using (WordprocessingDocument consolidatedWDoc = WordprocessingDocument.Open(consolidatedMs, true))
  486. {
  487. var consolidatedMainDocPart = consolidatedWDoc.MainDocumentPart;
  488. var consolidatedMainDocPartXDoc = consolidatedMainDocPart.GetXDocument();
  489. // save away last sectPr
  490. XElement savedSectPr = consolidatedMainDocPartXDoc
  491. .Root
  492. .Element(W.body)
  493. .Elements(W.sectPr)
  494. .LastOrDefault();
  495. consolidatedMainDocPartXDoc
  496. .Root
  497. .Element(W.body)
  498. .Elements(W.sectPr)
  499. .Remove();
  500. var consolidatedByUnid = consolidatedMainDocPartXDoc
  501. .Descendants()
  502. .Where(d => (d.Name == W.p || d.Name == W.tbl) && d.Attribute(PtOpenXml.Unid) != null)
  503. .ToDictionary(d => (string)d.Attribute(PtOpenXml.Unid));
  504. int deltaNbr = 1;
  505. foreach (var revisedDocumentInfo in revisedDocumentInfoList)
  506. {
  507. settings.StartingIdForFootnotesEndnotes = (deltaNbr * 2000) + 3000;
  508. var delta = WmlComparer.CompareInternal(originalWithUnids, revisedDocumentInfo.RevisedDocument, settings, false);
  509. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  510. {
  511. var name1 = string.Format("Delta-{0}.docx", deltaNbr++);
  512. var deltaFi = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  513. delta.SaveAs(deltaFi.FullName);
  514. }
  515. var colorRgb = revisedDocumentInfo.Color.ToArgb();
  516. var colorString = colorRgb.ToString("X");
  517. if (colorString.Length == 8)
  518. colorString = colorString.Substring(2);
  519. using (MemoryStream msOriginalWithUnids = new MemoryStream())
  520. using (MemoryStream msDelta = new MemoryStream())
  521. {
  522. msOriginalWithUnids.Write(originalWithUnids.DocumentByteArray, 0, originalWithUnids.DocumentByteArray.Length);
  523. msDelta.Write(delta.DocumentByteArray, 0, delta.DocumentByteArray.Length);
  524. using (WordprocessingDocument wDocOriginalWithUnids = WordprocessingDocument.Open(msOriginalWithUnids, true))
  525. using (WordprocessingDocument wDocDelta = WordprocessingDocument.Open(msDelta, true))
  526. {
  527. var modMainDocPart = wDocDelta.MainDocumentPart;
  528. var modMainDocPartXDoc = modMainDocPart.GetXDocument();
  529. var blockLevelContentToMove = modMainDocPartXDoc
  530. .Root
  531. .DescendantsTrimmed(d => d.Name == W.txbxContent || d.Name == W.tr)
  532. .Where(d => d.Name == W.p || d.Name == W.tbl)
  533. .Where(d => d.Descendants().Any(z => z.Name == W.ins || z.Name == W.del) ||
  534. ContentContainsFootnoteEndnoteReferencesThatHaveRevisions(d, wDocDelta))
  535. .ToList();
  536. foreach (var revision in blockLevelContentToMove)
  537. {
  538. var elementLookingAt = revision;
  539. while (true)
  540. {
  541. var unid = (string)elementLookingAt.Attribute(PtOpenXml.Unid);
  542. if (unid == null)
  543. throw new OpenXmlPowerToolsException("Internal error");
  544. XElement elementToInsertAfter = null;
  545. if (consolidatedByUnid.ContainsKey(unid))
  546. elementToInsertAfter = consolidatedByUnid[unid];
  547. if (elementToInsertAfter != null)
  548. {
  549. ConsolidationInfo ci = new ConsolidationInfo();
  550. ci.Revisor = revisedDocumentInfo.Revisor;
  551. ci.Color = revisedDocumentInfo.Color;
  552. ci.RevisionElement = revision;
  553. ci.Footnotes = revision
  554. .Descendants(W.footnoteReference)
  555. .Select(fr =>
  556. {
  557. var id = (int)fr.Attribute(W.id);
  558. var fnXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument();
  559. var footnote = fnXDoc.Root.Elements(W.footnote).FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  560. if (footnote == null)
  561. throw new OpenXmlPowerToolsException("Internal Error");
  562. return footnote;
  563. })
  564. .ToArray();
  565. ci.Endnotes = revision
  566. .Descendants(W.endnoteReference)
  567. .Select(er =>
  568. {
  569. var id = (int)er.Attribute(W.id);
  570. var enXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument();
  571. var endnote = enXDoc.Root.Elements(W.endnote).FirstOrDefault(en => (int)en.Attribute(W.id) == id);
  572. if (endnote == null)
  573. throw new OpenXmlPowerToolsException("Internal Error");
  574. return endnote;
  575. })
  576. .ToArray();
  577. AddToAnnotation(
  578. wDocDelta,
  579. consolidatedWDoc,
  580. elementToInsertAfter,
  581. ci,
  582. settings);
  583. break;
  584. }
  585. else
  586. {
  587. // find an element to insert after
  588. var elementBeforeRevision = elementLookingAt
  589. .SiblingsBeforeSelfReverseDocumentOrder()
  590. .FirstOrDefault(e => e.Attribute(PtOpenXml.Unid) != null);
  591. if (elementBeforeRevision == null)
  592. {
  593. var firstElement = consolidatedMainDocPartXDoc
  594. .Root
  595. .Element(W.body)
  596. .Elements()
  597. .FirstOrDefault(e => e.Name == W.p || e.Name == W.tbl);
  598. ConsolidationInfo ci = new ConsolidationInfo();
  599. ci.Revisor = revisedDocumentInfo.Revisor;
  600. ci.Color = revisedDocumentInfo.Color;
  601. ci.RevisionElement = revision;
  602. ci.InsertBefore = true;
  603. ci.Footnotes = revision
  604. .Descendants(W.footnoteReference)
  605. .Select(fr =>
  606. {
  607. var id = (int)fr.Attribute(W.id);
  608. var fnXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument();
  609. var footnote = fnXDoc.Root.Elements(W.footnote).FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  610. if (footnote == null)
  611. throw new OpenXmlPowerToolsException("Internal Error");
  612. return footnote;
  613. })
  614. .ToArray();
  615. ci.Endnotes = revision
  616. .Descendants(W.endnoteReference)
  617. .Select(er =>
  618. {
  619. var id = (int)er.Attribute(W.id);
  620. var enXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument();
  621. var endnote = enXDoc.Root.Elements(W.endnote).FirstOrDefault(en => (int)en.Attribute(W.id) == id);
  622. if (endnote == null)
  623. throw new OpenXmlPowerToolsException("Internal Error");
  624. return endnote;
  625. })
  626. .ToArray();
  627. AddToAnnotation(
  628. wDocDelta,
  629. consolidatedWDoc,
  630. firstElement,
  631. ci,
  632. settings);
  633. break;
  634. }
  635. else
  636. {
  637. elementLookingAt = elementBeforeRevision;
  638. continue;
  639. }
  640. }
  641. }
  642. }
  643. CopyMissingStylesFromOneDocToAnother(wDocDelta, consolidatedWDoc);
  644. }
  645. }
  646. }
  647. // at this point, everything is added as an annotation, from all documents to be merged.
  648. // so now the process is to go through and add the annotations to the document
  649. var elementsToProcess = consolidatedMainDocPartXDoc
  650. .Root
  651. .Descendants()
  652. .Where(d => d.Annotation<List<ConsolidationInfo>>() != null)
  653. .ToList();
  654. var emptyParagraph = new XElement(W.p,
  655. new XElement(W.pPr,
  656. new XElement(W.spacing,
  657. new XAttribute(W.after, "0"),
  658. new XAttribute(W.line, "240"),
  659. new XAttribute(W.lineRule, "auto"))));
  660. foreach (var ele in elementsToProcess)
  661. {
  662. var lci = ele.Annotation<List<ConsolidationInfo>>();
  663. // process before
  664. var contentToAddBefore = lci
  665. .Where(ci => ci.InsertBefore == true)
  666. .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString())
  667. .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings));
  668. ele.AddBeforeSelf(contentToAddBefore);
  669. // process after
  670. // if all revisions from all revisors are exactly the same, then instead of adding multiple tables after
  671. // that contains the revisions, then simply replace the paragraph with the one with the revisions.
  672. // RC004 documents contain the test data to exercise this.
  673. var lciCount = lci.Where(ci => ci.InsertBefore == false).Count();
  674. if (lciCount > 1 && lciCount == revisedDocumentInfoListCount)
  675. {
  676. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  677. // This is the code that determines if revisions should be consolidated into one.
  678. var uniqueRevisions = lci
  679. .Where(ci => ci.InsertBefore == false)
  680. .GroupBy(ci =>
  681. {
  682. // Get a hash after first accepting revisions and compressing the text.
  683. var acceptedRevisionElement = RevisionProcessor.AcceptRevisionsForElement(ci.RevisionElement);
  684. var sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(acceptedRevisionElement.Value.Replace(" ", "").Replace(" ", "").Replace(" ", "").Replace("\n", "").Replace(".", "").Replace(",", "").ToUpper());
  685. return sha1Hash;
  686. })
  687. .OrderByDescending(g => g.Count())
  688. .ToList();
  689. var uniqueRevisionCount = uniqueRevisions.Count();
  690. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  691. if (uniqueRevisionCount == 1)
  692. {
  693. MoveFootnotesEndnotesForConsolidatedRevisions(lci.First(), consolidatedWDoc);
  694. var dummyElement = new XElement("dummy", lci.First().RevisionElement);
  695. foreach (var rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null))
  696. {
  697. var aut = rev.Attribute(W.author);
  698. aut.Value = "ITU";
  699. }
  700. ele.ReplaceWith(dummyElement.Elements());
  701. continue;
  702. }
  703. // this is the location where we have determined that there are the same number of revisions for this paragraph as there are revision documents.
  704. // however, the hash for all of them were not the same.
  705. // therefore, they would be added to the consolidated document as separate revisions.
  706. // create a log that shows what is different, in detail.
  707. if (settings.LogCallback != null)
  708. {
  709. StringBuilder sb = new StringBuilder();
  710. sb.Append("====================================================================================================" + nl);
  711. sb.Append("Non-Consolidated Revision" + nl);
  712. sb.Append("====================================================================================================" + nl);
  713. foreach (var urList in uniqueRevisions)
  714. {
  715. var revisorList = urList.Select(ur => ur.Revisor + " : ").StringConcatenate().TrimEnd(' ', ':');
  716. sb.Append("Revisors: " + revisorList + nl);
  717. var str = RevisionToLogFormTransform(urList.First().RevisionElement, 0, false);
  718. sb.Append(str);
  719. sb.Append("=========================" + nl);
  720. }
  721. sb.Append(nl);
  722. settings.LogCallback(sb.ToString());
  723. }
  724. }
  725. var contentToAddAfter = lci
  726. .Where(ci => ci.InsertBefore == false)
  727. .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString())
  728. .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings));
  729. ele.AddAfterSelf(contentToAddAfter);
  730. }
  731. #if false
  732. // old code
  733. foreach (var ele in elementsToProcess)
  734. {
  735. var lci = ele.Annotation<List<ConsolidationInfo>>();
  736. // if all revisions from all revisors are exactly the same, then instead of adding multiple tables after
  737. // that contains the revisions, then simply replace the paragraph with the one with the revisions.
  738. // RC004 documents contain the test data to exercise this.
  739. var lciCount = lci.Count();
  740. if (lci.Count() > 1 && lciCount == revisedDocumentInfoListCount)
  741. {
  742. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  743. // This is the code that determines if revisions should be consolidated into one.
  744. var uniqueRevisions = lci
  745. .GroupBy(ci =>
  746. {
  747. // Get a hash after first accepting revisions and compressing the text.
  748. var ciz = ci;
  749. var acceptedRevisionElement = RevisionProcessor.AcceptRevisionsForElement(ci.RevisionElement);
  750. var text = acceptedRevisionElement.Value
  751. .Replace(" ", "")
  752. .Replace(" ", "")
  753. .Replace(" ", "")
  754. .Replace("\n", "");
  755. var sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(text);
  756. return ci.InsertBefore.ToString() + sha1Hash;
  757. })
  758. .OrderByDescending(g => g.Count())
  759. .ToList();
  760. var uniqueRevisionCount = uniqueRevisions.Count();
  761. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  762. if (uniqueRevisionCount == 1)
  763. {
  764. MoveFootnotesEndnotesForConsolidatedRevisions(lci.First(), consolidatedWDoc);
  765. var dummyElement = new XElement("dummy", lci.First().RevisionElement);
  766. foreach(var rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null))
  767. {
  768. var aut = rev.Attribute(W.author);
  769. aut.Value = "ITU";
  770. }
  771. ele.ReplaceWith(dummyElement.Elements());
  772. continue;
  773. }
  774. // this is the location where we have determined that there are the same number of revisions for this paragraph as there are revision documents.
  775. // however, the hash for all of them were not the same.
  776. // therefore, they would be added to the consolidated document as separate revisions.
  777. // create a log that shows what is different, in detail.
  778. if (settings.LogCallback != null)
  779. {
  780. StringBuilder sb = new StringBuilder();
  781. sb.Append("====================================================================================================" + nl);
  782. sb.Append("Non-Consolidated Revision" + nl);
  783. sb.Append("====================================================================================================" + nl);
  784. foreach (var urList in uniqueRevisions)
  785. {
  786. var revisorList = urList.Select(ur => ur.Revisor + " : ").StringConcatenate().TrimEnd(' ', ':');
  787. sb.Append("Revisors: " + revisorList + nl);
  788. var str = RevisionToLogFormTransform(urList.First().RevisionElement, 0, false);
  789. sb.Append(str);
  790. sb.Append("=========================" + nl);
  791. }
  792. sb.Append(nl);
  793. settings.LogCallback(sb.ToString());
  794. }
  795. }
  796. var contentToAddBefore = lci
  797. .Where(ci => ci.InsertBefore == true)
  798. .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString())
  799. .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings));
  800. var contentToAddAfter = lci
  801. .Where(ci => ci.InsertBefore == false)
  802. .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString())
  803. .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings));
  804. ele.AddBeforeSelf(contentToAddBefore);
  805. ele.AddAfterSelf(contentToAddAfter);
  806. }
  807. #endif
  808. consolidatedMainDocPartXDoc
  809. .Root
  810. .Element(W.body)
  811. .Add(savedSectPr);
  812. AddTableGridStyleToStylesPart(consolidatedWDoc.MainDocumentPart.StyleDefinitionsPart);
  813. FixUpRevisionIds(consolidatedWDoc, consolidatedMainDocPartXDoc);
  814. IgnorePt14NamespaceForFootnotesEndnotes(consolidatedWDoc);
  815. FixUpDocPrIds(consolidatedWDoc);
  816. FixUpShapeIds(consolidatedWDoc);
  817. FixUpGroupIds(consolidatedWDoc);
  818. FixUpShapeTypeIds(consolidatedWDoc);
  819. WmlComparer.IgnorePt14Namespace(consolidatedMainDocPartXDoc.Root);
  820. consolidatedWDoc.MainDocumentPart.PutXDocument();
  821. AddFootnotesEndnotesStyles(consolidatedWDoc);
  822. }
  823. var newConsolidatedDocument = new WmlDocument("consolidated.docx", consolidatedMs.ToArray());
  824. return newConsolidatedDocument;
  825. }
  826. }
  827. private static void MoveFootnotesEndnotesForConsolidatedRevisions(ConsolidationInfo ci, WordprocessingDocument wDocConsolidated)
  828. {
  829. var consolidatedFootnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument();
  830. var consolidatedEndnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument();
  831. int maxFootnoteId = 1;
  832. if (consolidatedFootnoteXDoc.Root.Elements(W.footnote).Any())
  833. maxFootnoteId = consolidatedFootnoteXDoc.Root.Elements(W.footnote).Select(e => (int)e.Attribute(W.id)).Max();
  834. int maxEndnoteId = 1;
  835. if (consolidatedEndnoteXDoc.Root.Elements(W.endnote).Any())
  836. maxEndnoteId = consolidatedEndnoteXDoc.Root.Elements(W.endnote).Select(e => (int)e.Attribute(W.id)).Max(); ;
  837. /// At this point, content might contain a footnote or endnote reference.
  838. /// Need to add the footnote / endnote into the consolidated document (with the same guid id)
  839. /// Because of preprocessing of the documents, all footnote and endnote references will be unique at this point
  840. if (ci.RevisionElement.Descendants(W.footnoteReference).Any())
  841. {
  842. var footnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument();
  843. foreach (var footnoteReference in ci.RevisionElement.Descendants(W.footnoteReference))
  844. {
  845. var id = (int)footnoteReference.Attribute(W.id);
  846. var footnote = ci.Footnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  847. var newId = maxFootnoteId + 1;
  848. maxFootnoteId++;
  849. footnoteReference.Attribute(W.id).Value = newId.ToString();
  850. var clonedFootnote = new XElement(footnote);
  851. clonedFootnote.Attribute(W.id).Value = newId.ToString();
  852. footnoteXDoc.Root.Add(clonedFootnote);
  853. }
  854. wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument();
  855. }
  856. if (ci.RevisionElement.Descendants(W.endnoteReference).Any())
  857. {
  858. var endnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument();
  859. foreach (var endnoteReference in ci.RevisionElement.Descendants(W.endnoteReference))
  860. {
  861. var id = (int)endnoteReference.Attribute(W.id);
  862. var endnote = ci.Endnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  863. var newId = maxEndnoteId + 1;
  864. maxEndnoteId++;
  865. endnoteReference.Attribute(W.id).Value = newId.ToString();
  866. var clonedEndnote = new XElement(endnote);
  867. clonedEndnote.Attribute(W.id).Value = newId.ToString();
  868. endnoteXDoc.Root.Add(clonedEndnote);
  869. }
  870. wDocConsolidated.MainDocumentPart.EndnotesPart.PutXDocument();
  871. }
  872. }
  873. private static object CleanPartTransform(XNode node)
  874. {
  875. var element = node as XElement;
  876. if (element != null)
  877. {
  878. return new XElement(element.Name,
  879. element.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt &&
  880. !a.Name.LocalName.ToLower().Contains("rsid")),
  881. element.Nodes().Select(n => CleanPartTransform(n)));
  882. }
  883. return node;
  884. }
  885. private static string RevisionToLogFormTransform(XElement element, int depth, bool inserting)
  886. {
  887. if (element.Name == W.p)
  888. return "Paragraph" + nl + element.Elements().Select(e => RevisionToLogFormTransform(e, depth + 2, false)).StringConcatenate();
  889. if (element.Name == W.pPr || element.Name == W.rPr)
  890. return "";
  891. if (element.Name == W.r)
  892. return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, inserting)).StringConcatenate();
  893. if (element.Name == W.t)
  894. {
  895. if (inserting)
  896. return "".PadRight(depth) + "Inserted Text:" + QuoteIt((string)element) + nl;
  897. else
  898. return "".PadRight(depth) + "Text:" + QuoteIt((string)element) + nl;
  899. }
  900. if (element.Name == W.delText)
  901. return "".PadRight(depth) + "Deleted Text:" + QuoteIt((string)element) + nl;
  902. if (element.Name == W.ins)
  903. return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, true)).StringConcatenate();
  904. if (element.Name == W.del)
  905. return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, false)).StringConcatenate();
  906. return "";
  907. }
  908. private static string QuoteIt(string str)
  909. {
  910. var quoteString = "\"";
  911. if (str.Contains('\"'))
  912. quoteString = "\'";
  913. return quoteString + str + quoteString;
  914. }
  915. private static void IgnorePt14NamespaceForFootnotesEndnotes(WordprocessingDocument wDoc)
  916. {
  917. var footnotesPart = wDoc.MainDocumentPart.FootnotesPart;
  918. var endnotesPart = wDoc.MainDocumentPart.EndnotesPart;
  919. XDocument footnotesPartXDoc = null;
  920. if (footnotesPart != null)
  921. {
  922. footnotesPartXDoc = footnotesPart.GetXDocument();
  923. WmlComparer.IgnorePt14Namespace(footnotesPartXDoc.Root);
  924. }
  925. XDocument endnotesPartXDoc = null;
  926. if (endnotesPart != null)
  927. {
  928. endnotesPartXDoc = endnotesPart.GetXDocument();
  929. WmlComparer.IgnorePt14Namespace(endnotesPartXDoc.Root);
  930. }
  931. if (footnotesPart != null)
  932. footnotesPart.PutXDocument();
  933. if (endnotesPart != null)
  934. endnotesPart.PutXDocument();
  935. }
  936. private static XElement[] AssembledConjoinedRevisionContent(XElement emptyParagraph, IGrouping<string, ConsolidationInfo> groupedCi, int idx, WordprocessingDocument wDocConsolidated,
  937. WmlComparerConsolidateSettings consolidateSettings)
  938. {
  939. var consolidatedFootnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument();
  940. var consolidatedEndnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument();
  941. int maxFootnoteId = 1;
  942. if (consolidatedFootnoteXDoc.Root.Elements(W.footnote).Any())
  943. maxFootnoteId = consolidatedFootnoteXDoc.Root.Elements(W.footnote).Select(e => (int)e.Attribute(W.id)).Max();
  944. int maxEndnoteId = 1;
  945. if (consolidatedEndnoteXDoc.Root.Elements(W.endnote).Any())
  946. maxEndnoteId = consolidatedEndnoteXDoc.Root.Elements(W.endnote).Select(e => (int)e.Attribute(W.id)).Max(); ;
  947. var revisor = groupedCi.First().Revisor;
  948. var captionParagraph = new XElement(W.p,
  949. new XElement(W.pPr,
  950. new XElement(W.jc, new XAttribute(W.val, "both")),
  951. new XElement(W.rPr,
  952. new XElement(W.b),
  953. new XElement(W.bCs))),
  954. new XElement(W.r,
  955. new XElement(W.rPr,
  956. new XElement(W.b),
  957. new XElement(W.bCs)),
  958. new XElement(W.t, revisor)));
  959. var colorRgb = groupedCi.First().Color.ToArgb();
  960. var colorString = colorRgb.ToString("X");
  961. if (colorString.Length == 8)
  962. colorString = colorString.Substring(2);
  963. if (consolidateSettings.ConsolidateWithTable)
  964. {
  965. var table = new XElement(W.tbl,
  966. new XElement(W.tblPr,
  967. new XElement(W.tblStyle, new XAttribute(W.val, "TableGridForRevisions")),
  968. new XElement(W.tblW,
  969. new XAttribute(W._w, "0"),
  970. new XAttribute(W.type, "auto")),
  971. new XElement(W.shd,
  972. new XAttribute(W.val, "clear"),
  973. new XAttribute(W.color, "auto"),
  974. new XAttribute(W.fill, colorString)),
  975. new XElement(W.tblLook,
  976. new XAttribute(W.firstRow, "0"),
  977. new XAttribute(W.lastRow, "0"),
  978. new XAttribute(W.firstColumn, "0"),
  979. new XAttribute(W.lastColumn, "0"),
  980. new XAttribute(W.noHBand, "0"),
  981. new XAttribute(W.noVBand, "0"))),
  982. new XElement(W.tblGrid,
  983. new XElement(W.gridCol, new XAttribute(W._w, "9576"))),
  984. new XElement(W.tr,
  985. new XElement(W.tc,
  986. new XElement(W.tcPr,
  987. new XElement(W.shd,
  988. new XAttribute(W.val, "clear"),
  989. new XAttribute(W.color, "auto"),
  990. new XAttribute(W.fill, colorString))),
  991. captionParagraph,
  992. groupedCi.Select(ci =>
  993. {
  994. XElement paraAfter = null;
  995. if (ci.RevisionElement.Name == W.tbl)
  996. paraAfter = emptyParagraph;
  997. var revisionInTable = new[] {
  998. ci.RevisionElement,
  999. paraAfter,
  1000. };
  1001. /// At this point, content might contain a footnote or endnote reference.
  1002. /// Need to add the footnote / endnote into the consolidated document (with the same guid id)
  1003. /// Because of preprocessing of the documents, all footnote and endnote references will be unique at this point
  1004. if (ci.RevisionElement.Descendants(W.footnoteReference).Any())
  1005. {
  1006. var footnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument();
  1007. foreach (var footnoteReference in ci.RevisionElement.Descendants(W.footnoteReference))
  1008. {
  1009. var id = (int)footnoteReference.Attribute(W.id);
  1010. var footnote = ci.Footnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  1011. var newId = maxFootnoteId + 1;
  1012. maxFootnoteId++;
  1013. footnoteReference.Attribute(W.id).Value = newId.ToString();
  1014. var clonedFootnote = new XElement(footnote);
  1015. clonedFootnote.Attribute(W.id).Value = newId.ToString();
  1016. footnoteXDoc.Root.Add(clonedFootnote);
  1017. }
  1018. wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument();
  1019. }
  1020. if (ci.RevisionElement.Descendants(W.endnoteReference).Any())
  1021. {
  1022. var endnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument();
  1023. foreach (var endnoteReference in ci.RevisionElement.Descendants(W.endnoteReference))
  1024. {
  1025. var id = (int)endnoteReference.Attribute(W.id);
  1026. var endnote = ci.Endnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  1027. var newId = maxEndnoteId + 1;
  1028. maxEndnoteId++;
  1029. endnoteReference.Attribute(W.id).Value = newId.ToString();
  1030. var clonedEndnote = new XElement(endnote);
  1031. clonedEndnote.Attribute(W.id).Value = newId.ToString();
  1032. endnoteXDoc.Root.Add(clonedEndnote);
  1033. }
  1034. wDocConsolidated.MainDocumentPart.EndnotesPart.PutXDocument();
  1035. }
  1036. return revisionInTable;
  1037. }))));
  1038. // if the last paragraph has a deleted paragraph mark, then remove the deletion from the paragraph mark. This is to prevent Word from misbehaving.
  1039. // the last paragraph in a cell must not have a deleted paragraph mark.
  1040. var theCell = table
  1041. .Descendants(W.tc)
  1042. .FirstOrDefault();
  1043. var lastPara = theCell
  1044. .Elements(W.p)
  1045. .LastOrDefault();
  1046. if (lastPara != null)
  1047. {
  1048. var isDeleted = lastPara
  1049. .Elements(W.pPr)
  1050. .Elements(W.rPr)
  1051. .Elements(W.del)
  1052. .Any();
  1053. if (isDeleted)
  1054. lastPara
  1055. .Elements(W.pPr)
  1056. .Elements(W.rPr)
  1057. .Elements(W.del)
  1058. .Remove();
  1059. }
  1060. var content = new[] {
  1061. idx == 0 ? emptyParagraph : null,
  1062. table,
  1063. emptyParagraph,
  1064. };
  1065. return content;
  1066. }
  1067. else
  1068. {
  1069. var content = groupedCi.Select(ci =>
  1070. {
  1071. XElement paraAfter = null;
  1072. if (ci.RevisionElement.Name == W.tbl)
  1073. paraAfter = emptyParagraph;
  1074. var revisionInTable = new[] {
  1075. ci.RevisionElement,
  1076. paraAfter,
  1077. };
  1078. /// At this point, content might contain a footnote or endnote reference.
  1079. /// Need to add the footnote / endnote into the consolidated document (with the same guid id)
  1080. /// Because of preprocessing of the documents, all footnote and endnote references will be unique at this point
  1081. if (ci.RevisionElement.Descendants(W.footnoteReference).Any())
  1082. {
  1083. var footnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument();
  1084. foreach (var footnoteReference in ci.RevisionElement.Descendants(W.footnoteReference))
  1085. {
  1086. var id = (int)footnoteReference.Attribute(W.id);
  1087. var footnote = ci.Footnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  1088. var newId = maxFootnoteId + 1;
  1089. maxFootnoteId++;
  1090. footnoteReference.Attribute(W.id).Value = newId.ToString();
  1091. var clonedFootnote = new XElement(footnote);
  1092. clonedFootnote.Attribute(W.id).Value = newId.ToString();
  1093. footnoteXDoc.Root.Add(clonedFootnote);
  1094. }
  1095. wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument();
  1096. }
  1097. if (ci.RevisionElement.Descendants(W.endnoteReference).Any())
  1098. {
  1099. var endnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument();
  1100. foreach (var endnoteReference in ci.RevisionElement.Descendants(W.endnoteReference))
  1101. {
  1102. var id = (int)endnoteReference.Attribute(W.id);
  1103. var endnote = ci.Endnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  1104. var newId = maxEndnoteId + 1;
  1105. maxEndnoteId++;
  1106. endnoteReference.Attribute(W.id).Value = newId.ToString();
  1107. var clonedEndnote = new XElement(endnote);
  1108. clonedEndnote.Attribute(W.id).Value = newId.ToString();
  1109. endnoteXDoc.Root.Add(clonedEndnote);
  1110. }
  1111. wDocConsolidated.MainDocumentPart.EndnotesPart.PutXDocument();
  1112. }
  1113. return revisionInTable;
  1114. });
  1115. var dummyElement = new XElement("dummy",
  1116. content.SelectMany(m => m));
  1117. foreach (var rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null))
  1118. {
  1119. var aut = rev.Attribute(W.author);
  1120. aut.Value = revisor;
  1121. }
  1122. return dummyElement.Elements().ToArray();
  1123. }
  1124. }
  1125. private static void AddToAnnotation(
  1126. WordprocessingDocument wDocDelta,
  1127. WordprocessingDocument consolidatedWDoc,
  1128. XElement elementToInsertAfter,
  1129. ConsolidationInfo consolidationInfo,
  1130. WmlComparerSettings settings)
  1131. {
  1132. Package packageOfDeletedContent = wDocDelta.MainDocumentPart.OpenXmlPackage.Package;
  1133. Package packageOfNewContent = consolidatedWDoc.MainDocumentPart.OpenXmlPackage.Package;
  1134. PackagePart partInDeletedDocument = packageOfDeletedContent.GetPart(wDocDelta.MainDocumentPart.Uri);
  1135. PackagePart partInNewDocument = packageOfNewContent.GetPart(consolidatedWDoc.MainDocumentPart.Uri);
  1136. consolidationInfo.RevisionElement = MoveRelatedPartsToDestination(partInDeletedDocument, partInNewDocument, consolidationInfo.RevisionElement);
  1137. var clonedForHashing = (XElement)CloneBlockLevelContentForHashing(consolidatedWDoc.MainDocumentPart, consolidationInfo.RevisionElement, false, settings);
  1138. clonedForHashing.Descendants().Where(d => d.Name == W.ins || d.Name == W.del).Attributes(W.id).Remove();
  1139. var shaString = clonedForHashing.ToString(SaveOptions.DisableFormatting)
  1140. .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", "");
  1141. var sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString);
  1142. consolidationInfo.RevisionString = shaString;
  1143. consolidationInfo.RevisionHash = sha1Hash;
  1144. var annotationList = elementToInsertAfter.Annotation<List<ConsolidationInfo>>();
  1145. if (annotationList == null)
  1146. {
  1147. annotationList = new List<ConsolidationInfo>();
  1148. elementToInsertAfter.AddAnnotation(annotationList);
  1149. }
  1150. annotationList.Add(consolidationInfo);
  1151. }
  1152. private static void AddTableGridStyleToStylesPart(StyleDefinitionsPart styleDefinitionsPart)
  1153. {
  1154. var sXDoc = styleDefinitionsPart.GetXDocument();
  1155. var tableGridStyle = sXDoc
  1156. .Root
  1157. .Elements(W.style)
  1158. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "TableGridForRevisions");
  1159. if (tableGridStyle == null)
  1160. {
  1161. var tableGridForRevisionsStyleMarkup =
  1162. @"<w:style w:type=""table""
  1163. w:styleId=""TableGridForRevisions""
  1164. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1165. <w:name w:val=""Table Grid For Revisions""/>
  1166. <w:basedOn w:val=""TableNormal""/>
  1167. <w:rsid w:val=""0092121A""/>
  1168. <w:rPr>
  1169. <w:rFonts w:asciiTheme=""minorHAnsi""
  1170. w:eastAsiaTheme=""minorEastAsia""
  1171. w:hAnsiTheme=""minorHAnsi""
  1172. w:cstheme=""minorBidi""/>
  1173. <w:sz w:val=""22""/>
  1174. <w:szCs w:val=""22""/>
  1175. </w:rPr>
  1176. <w:tblPr>
  1177. <w:tblBorders>
  1178. <w:top w:val=""single""
  1179. w:sz=""4""
  1180. w:space=""0""
  1181. w:color=""auto""/>
  1182. <w:left w:val=""single""
  1183. w:sz=""4""
  1184. w:space=""0""
  1185. w:color=""auto""/>
  1186. <w:bottom w:val=""single""
  1187. w:sz=""4""
  1188. w:space=""0""
  1189. w:color=""auto""/>
  1190. <w:right w:val=""single""
  1191. w:sz=""4""
  1192. w:space=""0""
  1193. w:color=""auto""/>
  1194. <w:insideH w:val=""single""
  1195. w:sz=""4""
  1196. w:space=""0""
  1197. w:color=""auto""/>
  1198. <w:insideV w:val=""single""
  1199. w:sz=""4""
  1200. w:space=""0""
  1201. w:color=""auto""/>
  1202. </w:tblBorders>
  1203. </w:tblPr>
  1204. </w:style>";
  1205. var tgsElement = XElement.Parse(tableGridForRevisionsStyleMarkup);
  1206. sXDoc.Root.Add(tgsElement);
  1207. }
  1208. var tableNormalStyle = sXDoc
  1209. .Root
  1210. .Elements(W.style)
  1211. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "TableNormal");
  1212. if (tableNormalStyle == null)
  1213. {
  1214. var tableNormalStyleMarkup =
  1215. @"<w:style w:type=""table""
  1216. w:default=""1""
  1217. w:styleId=""TableNormal""
  1218. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1219. <w:name w:val=""Normal Table""/>
  1220. <w:uiPriority w:val=""99""/>
  1221. <w:semiHidden/>
  1222. <w:unhideWhenUsed/>
  1223. <w:tblPr>
  1224. <w:tblInd w:w=""0""
  1225. w:type=""dxa""/>
  1226. <w:tblCellMar>
  1227. <w:top w:w=""0""
  1228. w:type=""dxa""/>
  1229. <w:left w:w=""108""
  1230. w:type=""dxa""/>
  1231. <w:bottom w:w=""0""
  1232. w:type=""dxa""/>
  1233. <w:right w:w=""108""
  1234. w:type=""dxa""/>
  1235. </w:tblCellMar>
  1236. </w:tblPr>
  1237. </w:style>";
  1238. var tnsElement = XElement.Parse(tableNormalStyleMarkup);
  1239. sXDoc.Root.Add(tnsElement);
  1240. }
  1241. styleDefinitionsPart.PutXDocument();
  1242. }
  1243. private static XAttribute[] NamespaceAttributes =
  1244. {
  1245. new XAttribute(XNamespace.Xmlns + "wpc", WPC.wpc),
  1246. new XAttribute(XNamespace.Xmlns + "mc", MC.mc),
  1247. new XAttribute(XNamespace.Xmlns + "o", O.o),
  1248. new XAttribute(XNamespace.Xmlns + "r", R.r),
  1249. new XAttribute(XNamespace.Xmlns + "m", M.m),
  1250. new XAttribute(XNamespace.Xmlns + "v", VML.vml),
  1251. new XAttribute(XNamespace.Xmlns + "wp14", WP14.wp14),
  1252. new XAttribute(XNamespace.Xmlns + "wp", WP.wp),
  1253. new XAttribute(XNamespace.Xmlns + "w10", W10.w10),
  1254. new XAttribute(XNamespace.Xmlns + "w", W.w),
  1255. new XAttribute(XNamespace.Xmlns + "w14", W14.w14),
  1256. new XAttribute(XNamespace.Xmlns + "wpg", WPG.wpg),
  1257. new XAttribute(XNamespace.Xmlns + "wpi", WPI.wpi),
  1258. new XAttribute(XNamespace.Xmlns + "wne", WNE.wne),
  1259. new XAttribute(XNamespace.Xmlns + "wps", WPS.wps),
  1260. new XAttribute(MC.Ignorable, "w14 wp14"),
  1261. };
  1262. private static void AddFootnotesEndnotesParts(WordprocessingDocument wDoc)
  1263. {
  1264. var mdp = wDoc.MainDocumentPart;
  1265. if (mdp.FootnotesPart == null)
  1266. {
  1267. mdp.AddNewPart<FootnotesPart>();
  1268. var newFootnotes = wDoc.MainDocumentPart.FootnotesPart.GetXDocument();
  1269. newFootnotes.Declaration.Standalone = "yes";
  1270. newFootnotes.Declaration.Encoding = "UTF-8";
  1271. newFootnotes.Add(new XElement(W.footnotes, NamespaceAttributes));
  1272. mdp.FootnotesPart.PutXDocument();
  1273. }
  1274. if (mdp.EndnotesPart == null)
  1275. {
  1276. mdp.AddNewPart<EndnotesPart>();
  1277. var newEndnotes = wDoc.MainDocumentPart.EndnotesPart.GetXDocument();
  1278. newEndnotes.Declaration.Standalone = "yes";
  1279. newEndnotes.Declaration.Encoding = "UTF-8";
  1280. newEndnotes.Add(new XElement(W.endnotes, NamespaceAttributes));
  1281. mdp.EndnotesPart.PutXDocument();
  1282. }
  1283. }
  1284. private static void ChangeFootnoteEndnoteReferencesToUniqueRange(WordprocessingDocument wDoc, int startingIdForFootnotesEndnotes)
  1285. {
  1286. var mainDocPart = wDoc.MainDocumentPart;
  1287. var footnotesPart = wDoc.MainDocumentPart.FootnotesPart;
  1288. var endnotesPart = wDoc.MainDocumentPart.EndnotesPart;
  1289. var mainDocumentXDoc = mainDocPart.GetXDocument();
  1290. XDocument footnotesPartXDoc = null;
  1291. if (footnotesPart != null)
  1292. footnotesPartXDoc = footnotesPart.GetXDocument();
  1293. XDocument endnotesPartXDoc = null;
  1294. if (endnotesPart != null)
  1295. endnotesPartXDoc = endnotesPart.GetXDocument();
  1296. var references = mainDocumentXDoc
  1297. .Root
  1298. .Descendants()
  1299. .Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference);
  1300. var rnd = new Random();
  1301. foreach (var r in references)
  1302. {
  1303. var oldId = (string)r.Attribute(W.id);
  1304. var newId = startingIdForFootnotesEndnotes.ToString();
  1305. startingIdForFootnotesEndnotes++;
  1306. r.Attribute(W.id).Value = newId;
  1307. if (r.Name == W.footnoteReference)
  1308. {
  1309. var fn = footnotesPartXDoc
  1310. .Root
  1311. .Elements()
  1312. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  1313. if (fn == null)
  1314. throw new OpenXmlPowerToolsException("Invalid document");
  1315. fn.Attribute(W.id).Value = newId;
  1316. }
  1317. else
  1318. {
  1319. var en = endnotesPartXDoc
  1320. .Root
  1321. .Elements()
  1322. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  1323. if (en == null)
  1324. throw new OpenXmlPowerToolsException("Invalid document");
  1325. en.Attribute(W.id).Value = newId;
  1326. }
  1327. }
  1328. mainDocPart.PutXDocument();
  1329. if (footnotesPart != null)
  1330. footnotesPart.PutXDocument();
  1331. if (endnotesPart != null)
  1332. endnotesPart.PutXDocument();
  1333. }
  1334. private static WmlDocument ProduceDocumentWithTrackedRevisions(WmlComparerSettings settings, WmlDocument wmlResult, WordprocessingDocument wDoc1, WordprocessingDocument wDoc2)
  1335. {
  1336. // save away sectPr so that can set in the newly produced document.
  1337. var savedSectPr = wDoc1
  1338. .MainDocumentPart
  1339. .GetXDocument()
  1340. .Root
  1341. .Element(W.body)
  1342. .Element(W.sectPr);
  1343. var contentParent1 = wDoc1.MainDocumentPart.GetXDocument().Root.Element(W.body);
  1344. AddSha1HashToBlockLevelContent(wDoc1.MainDocumentPart, contentParent1, settings);
  1345. var contentParent2 = wDoc2.MainDocumentPart.GetXDocument().Root.Element(W.body);
  1346. AddSha1HashToBlockLevelContent(wDoc2.MainDocumentPart, contentParent2, settings);
  1347. var cal1 = WmlComparer.CreateComparisonUnitAtomList(wDoc1.MainDocumentPart, wDoc1.MainDocumentPart.GetXDocument().Root.Element(W.body), settings);
  1348. if (s_False)
  1349. {
  1350. var sb = new StringBuilder();
  1351. foreach (var item in cal1)
  1352. sb.Append(item.ToString() + Environment.NewLine);
  1353. var sbs = sb.ToString();
  1354. TestUtil.NotePad(sbs);
  1355. }
  1356. var cus1 = GetComparisonUnitList(cal1, settings);
  1357. if (s_False)
  1358. {
  1359. var sbs = ComparisonUnit.ComparisonUnitListToString(cus1);
  1360. TestUtil.NotePad(sbs);
  1361. }
  1362. var cal2 = WmlComparer.CreateComparisonUnitAtomList(wDoc2.MainDocumentPart, wDoc2.MainDocumentPart.GetXDocument().Root.Element(W.body), settings);
  1363. if (s_False)
  1364. {
  1365. var sb = new StringBuilder();
  1366. foreach (var item in cal2)
  1367. sb.Append(item.ToString() + Environment.NewLine);
  1368. var sbs = sb.ToString();
  1369. TestUtil.NotePad(sbs);
  1370. }
  1371. var cus2 = GetComparisonUnitList(cal2, settings);
  1372. if (s_False)
  1373. {
  1374. var sbs = ComparisonUnit.ComparisonUnitListToString(cus2);
  1375. TestUtil.NotePad(sbs);
  1376. }
  1377. if (s_False)
  1378. {
  1379. var sb3 = new StringBuilder();
  1380. sb3.Append("ComparisonUnitList 1 =====" + Environment.NewLine + Environment.NewLine);
  1381. sb3.Append(ComparisonUnit.ComparisonUnitListToString(cus1));
  1382. sb3.Append(Environment.NewLine);
  1383. sb3.Append("ComparisonUnitList 2 =====" + Environment.NewLine + Environment.NewLine);
  1384. sb3.Append(ComparisonUnit.ComparisonUnitListToString(cus2));
  1385. var sbs3 = sb3.ToString();
  1386. TestUtil.NotePad(sbs3);
  1387. }
  1388. var correlatedSequence = Lcs(cus1, cus2, settings);
  1389. if (s_False)
  1390. {
  1391. var sb = new StringBuilder();
  1392. foreach (var item in correlatedSequence)
  1393. sb.Append(item.ToString() + Environment.NewLine);
  1394. var sbs = sb.ToString();
  1395. TestUtil.NotePad(sbs);
  1396. }
  1397. // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore
  1398. // when generating the document, the appropriate row will be marked as deleted or inserted.
  1399. MarkRowsAsDeletedOrInserted(settings, correlatedSequence);
  1400. // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal
  1401. var listOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(correlatedSequence, settings);
  1402. if (s_False)
  1403. {
  1404. var sb = new StringBuilder();
  1405. foreach (var item in listOfComparisonUnitAtoms)
  1406. sb.Append(item.ToString() + Environment.NewLine);
  1407. var sbs = sb.ToString();
  1408. TestUtil.NotePad(sbs);
  1409. }
  1410. // note - we don't want to do the hack until after flattening all of the groups. At the end of the flattening, we should simply
  1411. // have a list of ComparisonUnitAtoms, appropriately marked as equal, inserted, or deleted.
  1412. // the table id will be hacked in the normal course of events.
  1413. // in the case where a row is deleted, not necessary to hack - the deleted row ID will do.
  1414. // in the case where a row is inserted, not necessary to hack - the inserted row ID will do as well.
  1415. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(listOfComparisonUnitAtoms);
  1416. if (s_False)
  1417. {
  1418. var sb = new StringBuilder();
  1419. foreach (var item in listOfComparisonUnitAtoms)
  1420. sb.Append(item.ToStringAncestorUnids() + Environment.NewLine);
  1421. var sbs = sb.ToString();
  1422. TestUtil.NotePad(sbs);
  1423. }
  1424. // and then finally can generate the document with revisions
  1425. using (MemoryStream ms = new MemoryStream())
  1426. {
  1427. ms.Write(wmlResult.DocumentByteArray, 0, wmlResult.DocumentByteArray.Length);
  1428. using (WordprocessingDocument wDocWithRevisions = WordprocessingDocument.Open(ms, true))
  1429. {
  1430. var xDoc = wDocWithRevisions.MainDocumentPart.GetXDocument();
  1431. var rootNamespaceAttributes = xDoc
  1432. .Root
  1433. .Attributes()
  1434. .Where(a => a.IsNamespaceDeclaration || a.Name.Namespace == MC.mc)
  1435. .ToList();
  1436. // ======================================
  1437. // The following produces a new valid WordprocessingML document from the listOfComparisonUnitAtoms
  1438. var newBodyChildren = ProduceNewWmlMarkupFromCorrelatedSequence(wDocWithRevisions.MainDocumentPart,
  1439. listOfComparisonUnitAtoms, settings);
  1440. XDocument newXDoc = new XDocument();
  1441. newXDoc.Add(
  1442. new XElement(W.document,
  1443. rootNamespaceAttributes,
  1444. new XElement(W.body, newBodyChildren)));
  1445. MarkContentAsDeletedOrInserted(newXDoc, settings);
  1446. CoalesceAdjacentRunsWithIdenticalFormatting(newXDoc);
  1447. IgnorePt14Namespace(newXDoc.Root);
  1448. ProcessFootnoteEndnote(settings,
  1449. listOfComparisonUnitAtoms,
  1450. wDoc1.MainDocumentPart,
  1451. wDoc2.MainDocumentPart,
  1452. newXDoc);
  1453. RectifyFootnoteEndnoteIds(
  1454. wDoc1.MainDocumentPart,
  1455. wDoc2.MainDocumentPart,
  1456. wDocWithRevisions.MainDocumentPart,
  1457. newXDoc,
  1458. settings);
  1459. ConjoinDeletedInsertedParagraphMarks(wDocWithRevisions.MainDocumentPart, newXDoc);
  1460. FixUpRevisionIds(wDocWithRevisions, newXDoc);
  1461. // little bit of cleanup
  1462. MoveLastSectPrToChildOfBody(newXDoc);
  1463. XElement newXDoc2Root = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(newXDoc.Root);
  1464. xDoc.Root.ReplaceWith(newXDoc2Root);
  1465. /**********************************************************************************************/
  1466. // temporary code to remove sections. When remove this code, get validation errors for some ITU documents.
  1467. xDoc.Root.Descendants(W.sectPr).Remove();
  1468. // move w:sectPr from source document into newly generated document.
  1469. if (savedSectPr != null)
  1470. {
  1471. var xd = wDocWithRevisions.MainDocumentPart.GetXDocument();
  1472. // add everything but headers/footers
  1473. var clonedSectPr = new XElement(W.sectPr,
  1474. savedSectPr.Attributes(),
  1475. savedSectPr.Element(W.type),
  1476. savedSectPr.Element(W.pgSz),
  1477. savedSectPr.Element(W.pgMar),
  1478. savedSectPr.Element(W.cols),
  1479. savedSectPr.Element(W.titlePg));
  1480. xd.Root.Element(W.body).Add(clonedSectPr);
  1481. }
  1482. /**********************************************************************************************/
  1483. wDocWithRevisions.MainDocumentPart.PutXDocument();
  1484. FixUpFootnotesEndnotesWithCustomMarkers(wDocWithRevisions);
  1485. FixUpRevMarkIds(wDocWithRevisions);
  1486. FixUpDocPrIds(wDocWithRevisions);
  1487. FixUpShapeIds(wDocWithRevisions);
  1488. FixUpShapeTypeIds(wDocWithRevisions);
  1489. AddFootnotesEndnotesStyles(wDocWithRevisions);
  1490. CopyMissingStylesFromOneDocToAnother(wDoc2, wDocWithRevisions);
  1491. DeleteFootnotePropertiesInSettings(wDocWithRevisions);
  1492. }
  1493. foreach (var part in wDoc1.ContentParts())
  1494. part.PutXDocument();
  1495. foreach (var part in wDoc2.ContentParts())
  1496. part.PutXDocument();
  1497. var updatedWmlResult = new WmlDocument("Dummy.docx", ms.ToArray());
  1498. return updatedWmlResult;
  1499. }
  1500. }
  1501. private static void DeleteFootnotePropertiesInSettings(WordprocessingDocument wDocWithRevisions)
  1502. {
  1503. var settingsPart = wDocWithRevisions.MainDocumentPart.DocumentSettingsPart;
  1504. if (settingsPart != null)
  1505. {
  1506. var sxDoc = settingsPart.GetXDocument();
  1507. sxDoc.Root.Elements().Where(e => e.Name == W.footnotePr || e.Name == W.endnotePr).Remove();
  1508. settingsPart.PutXDocument();
  1509. }
  1510. }
  1511. private static void FixUpFootnotesEndnotesWithCustomMarkers(WordprocessingDocument wDocWithRevisions)
  1512. {
  1513. #if FALSE
  1514. // this needs to change
  1515. <w:del w:author="Open-Xml-PowerTools"
  1516. w:id="7"
  1517. w:date="2017-06-07T12:23:22.8601285-07:00">
  1518. <w:r>
  1519. <w:rPr pt14:Unid="ec75a71361c84562a757eee8b28fc229">
  1520. <w:rFonts w:cs="Times New Roman Bold"
  1521. pt14:Unid="16bb355df5964ba09854f9152c97242b" />
  1522. <w:b w:val="0"
  1523. pt14:Unid="9abcec54ad414791a5627cbb198e8aa9" />
  1524. <w:bCs pt14:Unid="71ecd2eba85e4bfaa92b3d618e2f8829" />
  1525. <w:position w:val="6"
  1526. pt14:Unid="61793f6a5f494700b7f2a3a753ce9055" />
  1527. <w:sz w:val="16"
  1528. pt14:Unid="60b3cd020c214d0ea07e5a68ae0e4efe" />
  1529. <w:szCs w:val="16"
  1530. pt14:Unid="9ae61a724de44a75868180aac44ea380" />
  1531. </w:rPr>
  1532. <w:footnoteReference w:customMarkFollows="1"
  1533. w:id="1"
  1534. pt14:Status="Deleted" />
  1535. </w:r>
  1536. </w:del>
  1537. <w:del w:author="Open-Xml-PowerTools"
  1538. w:id="8"
  1539. w:date="2017-06-07T12:23:22.8601285-07:00">
  1540. <w:r>
  1541. <w:rPr pt14:Unid="445caef74a624e588e7adaa6d7775639">
  1542. <w:rFonts w:cs="Times New Roman Bold"
  1543. pt14:Unid="5920885f8ec44c53bcaece2de7eafda2" />
  1544. <w:b w:val="0"
  1545. pt14:Unid="023a29e2e6d44c3b8c5df47317ace4c6" />
  1546. <w:bCs pt14:Unid="e96e37daf9174b268ef4731df831df7d" />
  1547. <w:position w:val="6"
  1548. pt14:Unid="be3f8ff7ed0745ae9340bb2706b28b1f" />
  1549. <w:sz w:val="16"
  1550. pt14:Unid="6fbbde024e7c46b9b72435ae50065459" />
  1551. <w:szCs w:val="16"
  1552. pt14:Unid="cc82e7bd75f441f2b609eae0672fb285" />
  1553. </w:rPr>
  1554. <w:delText>1</w:delText>
  1555. </w:r>
  1556. </w:del>
  1557. // to this
  1558. <w:del w:author="Open-Xml-PowerTools"
  1559. w:id="7"
  1560. w:date="2017-06-07T12:23:22.8601285-07:00">
  1561. <w:r>
  1562. <w:rPr pt14:Unid="ec75a71361c84562a757eee8b28fc229">
  1563. <w:rFonts w:cs="Times New Roman Bold"
  1564. pt14:Unid="16bb355df5964ba09854f9152c97242b" />
  1565. <w:b w:val="0"
  1566. pt14:Unid="9abcec54ad414791a5627cbb198e8aa9" />
  1567. <w:bCs pt14:Unid="71ecd2eba85e4bfaa92b3d618e2f8829" />
  1568. <w:position w:val="6"
  1569. pt14:Unid="61793f6a5f494700b7f2a3a753ce9055" />
  1570. <w:sz w:val="16"
  1571. pt14:Unid="60b3cd020c214d0ea07e5a68ae0e4efe" />
  1572. <w:szCs w:val="16"
  1573. pt14:Unid="9ae61a724de44a75868180aac44ea380" />
  1574. </w:rPr>
  1575. <w:footnoteReference w:customMarkFollows="1"
  1576. w:id="1"
  1577. pt14:Status="Deleted" />
  1578. <w:delText>1</w:delText>
  1579. </w:r>
  1580. </w:del>
  1581. #endif
  1582. // this is pretty random - a bug in Word prevents display of a document if the delText element does not immediately follow the footnoteReference element, in the same run.
  1583. var mainXDoc = wDocWithRevisions.MainDocumentPart.GetXDocument();
  1584. var newRoot = (XElement)FootnoteEndnoteReferenceCleanupTransform(mainXDoc.Root);
  1585. mainXDoc.Root.ReplaceWith(newRoot);
  1586. wDocWithRevisions.MainDocumentPart.PutXDocument();
  1587. }
  1588. private static object FootnoteEndnoteReferenceCleanupTransform(XNode node)
  1589. {
  1590. var element = node as XElement;
  1591. if (element != null)
  1592. {
  1593. // small optimization to eliminate the work for most elements
  1594. if (element.Element(W.del) != null || element.Element(W.ins) != null)
  1595. {
  1596. var hasFootnoteEndnoteReferencesThatNeedCleanedUp = element
  1597. .Elements()
  1598. .Where(e => e.Name == W.del || e.Name == W.ins)
  1599. .Elements(W.r)
  1600. .Elements()
  1601. .Where(e => e.Name == W.footnoteReference || e.Name == W.endnoteReference)
  1602. .Attributes(W.customMarkFollows)
  1603. .Any();
  1604. if (hasFootnoteEndnoteReferencesThatNeedCleanedUp)
  1605. {
  1606. var clone = new XElement(element.Name,
  1607. element.Attributes(),
  1608. element.Nodes().Select(n => FootnoteEndnoteReferenceCleanupTransform(n)));
  1609. var footnoteEndnoteReferencesToAdjust = clone
  1610. .Descendants()
  1611. .Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference)
  1612. .Where(d => d.Attribute(W.customMarkFollows) != null);
  1613. foreach (var fnenr in footnoteEndnoteReferencesToAdjust)
  1614. {
  1615. var par = fnenr.Parent;
  1616. var gp = fnenr.Parent.Parent;
  1617. if (par.Name == W.r &&
  1618. gp.Name == W.del)
  1619. {
  1620. if (par.Element(W.delText) != null)
  1621. continue;
  1622. var afterGp = gp.ElementsAfterSelf().FirstOrDefault();
  1623. if (afterGp == null)
  1624. continue;
  1625. var afterGpDelText = afterGp.Elements(W.r).Elements(W.delText);
  1626. if (afterGpDelText.Any())
  1627. {
  1628. par.Add(afterGpDelText); // this will clone and add to run that contains the reference
  1629. afterGpDelText.Remove(); // this leaves an empty run, does not matter.
  1630. }
  1631. }
  1632. if (par.Name == W.r &&
  1633. gp.Name == W.ins)
  1634. {
  1635. if (par.Element(W.t) != null)
  1636. continue;
  1637. var afterGp = gp.ElementsAfterSelf().FirstOrDefault();
  1638. if (afterGp == null)
  1639. continue;
  1640. var afterGpText = afterGp.Elements(W.r).Elements(W.t);
  1641. if (afterGpText.Any())
  1642. {
  1643. par.Add(afterGpText); // this will clone and add to run that contains the reference
  1644. afterGpText.Remove(); // this leaves an empty run, does not matter.
  1645. }
  1646. }
  1647. }
  1648. return clone;
  1649. }
  1650. }
  1651. else
  1652. {
  1653. return new XElement(element.Name,
  1654. element.Attributes(),
  1655. element.Nodes().Select(n => FootnoteEndnoteReferenceCleanupTransform(n)));
  1656. }
  1657. }
  1658. return node;
  1659. }
  1660. private static void CopyMissingStylesFromOneDocToAnother(WordprocessingDocument wDocFrom, WordprocessingDocument wDocTo)
  1661. {
  1662. var revisionsStylesXDoc = wDocTo.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
  1663. var afterStylesXDoc = wDocFrom.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
  1664. foreach (var style in afterStylesXDoc.Root.Elements(W.style))
  1665. {
  1666. var type = (string)style.Attribute(W.type);
  1667. var styleId = (string)style.Attribute(W.styleId);
  1668. var styleInRevDoc = revisionsStylesXDoc
  1669. .Root
  1670. .Elements(W.style)
  1671. .FirstOrDefault(st => (string)st.Attribute(W.type) == type &&
  1672. (string)st.Attribute(W.styleId) == styleId);
  1673. if (styleInRevDoc != null)
  1674. continue;
  1675. var cloned = new XElement(style);
  1676. if (cloned.Attribute(W._default) != null)
  1677. cloned.Attribute(W._default).Remove();
  1678. revisionsStylesXDoc.Root.Add(cloned);
  1679. }
  1680. wDocTo.MainDocumentPart.StyleDefinitionsPart.PutXDocument();
  1681. }
  1682. private static void AddFootnotesEndnotesStyles(WordprocessingDocument wDocWithRevisions)
  1683. {
  1684. var mainXDoc = wDocWithRevisions.MainDocumentPart.GetXDocument();
  1685. var hasFootnotes = mainXDoc.Descendants(W.footnoteReference).Any();
  1686. var hasEndnotes = mainXDoc.Descendants(W.endnoteReference).Any();
  1687. var styleDefinitionsPart = wDocWithRevisions.MainDocumentPart.StyleDefinitionsPart;
  1688. var sXDoc = styleDefinitionsPart.GetXDocument();
  1689. if (hasFootnotes)
  1690. {
  1691. var footnoteTextStyle = sXDoc
  1692. .Root
  1693. .Elements(W.style)
  1694. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "FootnoteText");
  1695. if (footnoteTextStyle == null)
  1696. {
  1697. var footnoteTextStyleMarkup =
  1698. @"<w:style w:type=""paragraph""
  1699. w:styleId=""FootnoteText""
  1700. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1701. <w:name w:val=""footnote text""/>
  1702. <w:basedOn w:val=""Normal""/>
  1703. <w:link w:val=""FootnoteTextChar""/>
  1704. <w:uiPriority w:val=""99""/>
  1705. <w:semiHidden/>
  1706. <w:unhideWhenUsed/>
  1707. <w:pPr>
  1708. <w:spacing w:after=""0""
  1709. w:line=""240""
  1710. w:lineRule=""auto""/>
  1711. </w:pPr>
  1712. <w:rPr>
  1713. <w:sz w:val=""20""/>
  1714. <w:szCs w:val=""20""/>
  1715. </w:rPr>
  1716. </w:style>";
  1717. var ftsElement = XElement.Parse(footnoteTextStyleMarkup);
  1718. sXDoc.Root.Add(ftsElement);
  1719. }
  1720. var footnoteTextCharStyle = sXDoc
  1721. .Root
  1722. .Elements(W.style)
  1723. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "FootnoteTextChar");
  1724. if (footnoteTextCharStyle == null)
  1725. {
  1726. var footnoteTextCharStyleMarkup =
  1727. @"<w:style w:type=""character""
  1728. w:customStyle=""1""
  1729. w:styleId=""FootnoteTextChar""
  1730. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1731. <w:name w:val=""Footnote Text Char""/>
  1732. <w:basedOn w:val=""DefaultParagraphFont""/>
  1733. <w:link w:val=""FootnoteText""/>
  1734. <w:uiPriority w:val=""99""/>
  1735. <w:semiHidden/>
  1736. <w:rPr>
  1737. <w:sz w:val=""20""/>
  1738. <w:szCs w:val=""20""/>
  1739. </w:rPr>
  1740. </w:style>";
  1741. var fntcsElement = XElement.Parse(footnoteTextCharStyleMarkup);
  1742. sXDoc.Root.Add(fntcsElement);
  1743. }
  1744. var footnoteReferenceStyle = sXDoc
  1745. .Root
  1746. .Elements(W.style)
  1747. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "FootnoteReference");
  1748. if (footnoteReferenceStyle == null)
  1749. {
  1750. var footnoteReferenceStyleMarkup =
  1751. @"<w:style w:type=""character""
  1752. w:styleId=""FootnoteReference""
  1753. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1754. <w:name w:val=""footnote reference""/>
  1755. <w:basedOn w:val=""DefaultParagraphFont""/>
  1756. <w:uiPriority w:val=""99""/>
  1757. <w:semiHidden/>
  1758. <w:unhideWhenUsed/>
  1759. <w:rPr>
  1760. <w:vertAlign w:val=""superscript""/>
  1761. </w:rPr>
  1762. </w:style>";
  1763. var fnrsElement = XElement.Parse(footnoteReferenceStyleMarkup);
  1764. sXDoc.Root.Add(fnrsElement);
  1765. }
  1766. }
  1767. if (hasEndnotes)
  1768. {
  1769. var endnoteTextStyle = sXDoc
  1770. .Root
  1771. .Elements(W.style)
  1772. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "EndnoteText");
  1773. if (endnoteTextStyle == null)
  1774. {
  1775. var endnoteTextStyleMarkup =
  1776. @"<w:style w:type=""paragraph""
  1777. w:styleId=""EndnoteText""
  1778. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1779. <w:name w:val=""endnote text""/>
  1780. <w:basedOn w:val=""Normal""/>
  1781. <w:link w:val=""EndnoteTextChar""/>
  1782. <w:uiPriority w:val=""99""/>
  1783. <w:semiHidden/>
  1784. <w:unhideWhenUsed/>
  1785. <w:pPr>
  1786. <w:spacing w:after=""0""
  1787. w:line=""240""
  1788. w:lineRule=""auto""/>
  1789. </w:pPr>
  1790. <w:rPr>
  1791. <w:sz w:val=""20""/>
  1792. <w:szCs w:val=""20""/>
  1793. </w:rPr>
  1794. </w:style>";
  1795. var etsElement = XElement.Parse(endnoteTextStyleMarkup);
  1796. sXDoc.Root.Add(etsElement);
  1797. }
  1798. var endnoteTextCharStyle = sXDoc
  1799. .Root
  1800. .Elements(W.style)
  1801. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "EndnoteTextChar");
  1802. if (endnoteTextCharStyle == null)
  1803. {
  1804. var endnoteTextCharStyleMarkup =
  1805. @"<w:style w:type=""character""
  1806. w:customStyle=""1""
  1807. w:styleId=""EndnoteTextChar""
  1808. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1809. <w:name w:val=""Endnote Text Char""/>
  1810. <w:basedOn w:val=""DefaultParagraphFont""/>
  1811. <w:link w:val=""EndnoteText""/>
  1812. <w:uiPriority w:val=""99""/>
  1813. <w:semiHidden/>
  1814. <w:rPr>
  1815. <w:sz w:val=""20""/>
  1816. <w:szCs w:val=""20""/>
  1817. </w:rPr>
  1818. </w:style>";
  1819. var entcsElement = XElement.Parse(endnoteTextCharStyleMarkup);
  1820. sXDoc.Root.Add(entcsElement);
  1821. }
  1822. var endnoteReferenceStyle = sXDoc
  1823. .Root
  1824. .Elements(W.style)
  1825. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "EndnoteReference");
  1826. if (endnoteReferenceStyle == null)
  1827. {
  1828. var endnoteReferenceStyleMarkup =
  1829. @"<w:style w:type=""character""
  1830. w:styleId=""EndnoteReference""
  1831. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1832. <w:name w:val=""endnote reference""/>
  1833. <w:basedOn w:val=""DefaultParagraphFont""/>
  1834. <w:uiPriority w:val=""99""/>
  1835. <w:semiHidden/>
  1836. <w:unhideWhenUsed/>
  1837. <w:rPr>
  1838. <w:vertAlign w:val=""superscript""/>
  1839. </w:rPr>
  1840. </w:style>";
  1841. var enrsElement = XElement.Parse(endnoteReferenceStyleMarkup);
  1842. sXDoc.Root.Add(enrsElement);
  1843. }
  1844. }
  1845. if (hasFootnotes || hasEndnotes)
  1846. {
  1847. styleDefinitionsPart.PutXDocument();
  1848. }
  1849. }
  1850. // it is possible, per the algorithm, for the algorithm to find that the paragraph mark for a single paragraph has been
  1851. // inserted and deleted. If the algorithm sets them to equal, then sometimes it will equate paragraph marks that should
  1852. // not be equated.
  1853. private static void ConjoinDeletedInsertedParagraphMarks(MainDocumentPart mainDocumentPart, XDocument newXDoc)
  1854. {
  1855. ConjoinMultipleParagraphMarks(newXDoc);
  1856. if (mainDocumentPart.FootnotesPart != null)
  1857. {
  1858. var fnXDoc = mainDocumentPart.FootnotesPart.GetXDocument();
  1859. ConjoinMultipleParagraphMarks(fnXDoc);
  1860. mainDocumentPart.FootnotesPart.PutXDocument();
  1861. }
  1862. if (mainDocumentPart.EndnotesPart != null)
  1863. {
  1864. var fnXDoc = mainDocumentPart.EndnotesPart.GetXDocument();
  1865. ConjoinMultipleParagraphMarks(fnXDoc);
  1866. mainDocumentPart.EndnotesPart.PutXDocument();
  1867. }
  1868. }
  1869. private static void ConjoinMultipleParagraphMarks(XDocument xDoc)
  1870. {
  1871. var newRoot = ConjoinTransform(xDoc.Root);
  1872. xDoc.Root.ReplaceWith(newRoot);
  1873. }
  1874. private static object ConjoinTransform(XNode node)
  1875. {
  1876. var element = node as XElement;
  1877. if (element != null)
  1878. {
  1879. if (element.Name == W.p && element.Elements(W.pPr).Count() >= 2)
  1880. {
  1881. var pPr = new XElement(element.Element(W.pPr));
  1882. pPr.Elements(W.rPr).Elements().Where(r => r.Name == W.ins || r.Name == W.del).Remove();
  1883. pPr.Attributes(PtOpenXml.Status).Remove();
  1884. var newPara = new XElement(W.p,
  1885. element.Attributes(),
  1886. pPr,
  1887. element.Elements().Where(c => c.Name != W.pPr));
  1888. return newPara;
  1889. }
  1890. return new XElement(element.Name,
  1891. element.Attributes(),
  1892. element.Nodes().Select(n => ConjoinTransform(n)));
  1893. }
  1894. return node;
  1895. }
  1896. private static void MarkContentAsDeletedOrInserted(XDocument newXDoc, WmlComparerSettings settings)
  1897. {
  1898. var newRoot = MarkContentAsDeletedOrInsertedTransform(newXDoc.Root, settings);
  1899. newXDoc.Root.ReplaceWith(newRoot);
  1900. }
  1901. private static object MarkContentAsDeletedOrInsertedTransform(XNode node, WmlComparerSettings settings)
  1902. {
  1903. XElement element = node as XElement;
  1904. if (element != null)
  1905. {
  1906. if (element.Name == W.r)
  1907. {
  1908. var statusList = element
  1909. .DescendantsTrimmed(W.txbxContent)
  1910. .Where(d => d.Name == W.t || d.Name == W.delText || AllowableRunChildren.Contains(d.Name))
  1911. .Attributes(PtOpenXml.Status)
  1912. .Select(a => (string)a)
  1913. .Distinct()
  1914. .ToList();
  1915. if (statusList.Count() > 1)
  1916. throw new OpenXmlPowerToolsException("Internal error - have both deleted and inserted text elements in the same run.");
  1917. if (statusList.Count() == 0)
  1918. return new XElement(W.r,
  1919. element.Attributes(),
  1920. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)));
  1921. if (statusList.First() == "Deleted")
  1922. {
  1923. return new XElement(W.del,
  1924. new XAttribute(W.author, settings.AuthorForRevisions),
  1925. new XAttribute(W.id, s_MaxId++),
  1926. new XAttribute(W.date, settings.DateTimeForRevisions),
  1927. new XElement(W.r,
  1928. element.Attributes(),
  1929. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))));
  1930. }
  1931. else if (statusList.First() == "Inserted")
  1932. {
  1933. return new XElement(W.ins,
  1934. new XAttribute(W.author, settings.AuthorForRevisions),
  1935. new XAttribute(W.id, s_MaxId++),
  1936. new XAttribute(W.date, settings.DateTimeForRevisions),
  1937. new XElement(W.r,
  1938. element.Attributes(),
  1939. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))));
  1940. }
  1941. }
  1942. if (element.Name == W.pPr)
  1943. {
  1944. var status = (string)element.Attribute(PtOpenXml.Status);
  1945. if (status == null)
  1946. return new XElement(W.pPr,
  1947. element.Attributes(),
  1948. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)));
  1949. var pPr = new XElement(element);
  1950. if (status == "Deleted")
  1951. {
  1952. XElement rPr = pPr.Element(W.rPr);
  1953. if (rPr == null)
  1954. rPr = new XElement(W.rPr);
  1955. rPr.Add(new XElement(W.del,
  1956. new XAttribute(W.author, settings.AuthorForRevisions),
  1957. new XAttribute(W.id, s_MaxId++),
  1958. new XAttribute(W.date, settings.DateTimeForRevisions)));
  1959. if (pPr.Element(W.rPr) != null)
  1960. pPr.Element(W.rPr).ReplaceWith(rPr);
  1961. else
  1962. pPr.AddFirst(rPr);
  1963. }
  1964. else if (status == "Inserted")
  1965. {
  1966. XElement rPr = pPr.Element(W.rPr);
  1967. if (rPr == null)
  1968. rPr = new XElement(W.rPr);
  1969. rPr.Add(new XElement(W.ins,
  1970. new XAttribute(W.author, settings.AuthorForRevisions),
  1971. new XAttribute(W.id, s_MaxId++),
  1972. new XAttribute(W.date, settings.DateTimeForRevisions)));
  1973. if (pPr.Element(W.rPr) != null)
  1974. pPr.Element(W.rPr).ReplaceWith(rPr);
  1975. else
  1976. pPr.AddFirst(rPr);
  1977. }
  1978. else
  1979. throw new OpenXmlPowerToolsException("Internal error");
  1980. return pPr;
  1981. }
  1982. return new XElement(element.Name,
  1983. element.Attributes(),
  1984. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)));
  1985. }
  1986. return node;
  1987. }
  1988. private static void FixUpRevisionIds(WordprocessingDocument wDocWithRevisions, XDocument newXDoc)
  1989. {
  1990. IEnumerable<XElement> footnoteRevisions = Enumerable.Empty<XElement>();
  1991. if (wDocWithRevisions.MainDocumentPart.FootnotesPart != null)
  1992. {
  1993. var fnxd = wDocWithRevisions.MainDocumentPart.FootnotesPart.GetXDocument();
  1994. footnoteRevisions = fnxd
  1995. .Descendants()
  1996. .Where(d => d.Name == W.ins || d.Name == W.del);
  1997. }
  1998. IEnumerable<XElement> endnoteRevisions = Enumerable.Empty<XElement>();
  1999. if (wDocWithRevisions.MainDocumentPart.EndnotesPart != null)
  2000. {
  2001. var fnxd = wDocWithRevisions.MainDocumentPart.EndnotesPart.GetXDocument();
  2002. endnoteRevisions = fnxd
  2003. .Descendants()
  2004. .Where(d => d.Name == W.ins || d.Name == W.del);
  2005. }
  2006. var mainRevisions = newXDoc
  2007. .Descendants()
  2008. .Where(d => d.Name == W.ins || d.Name == W.del);
  2009. var allRevisions = mainRevisions
  2010. .Concat(footnoteRevisions)
  2011. .Concat(endnoteRevisions)
  2012. .Select((r, i) =>
  2013. {
  2014. return new
  2015. {
  2016. Rev = r,
  2017. Idx = i + 1,
  2018. };
  2019. });
  2020. foreach (var item in allRevisions)
  2021. item.Rev.Attribute(W.id).Value = item.Idx.ToString();
  2022. if (wDocWithRevisions.MainDocumentPart.FootnotesPart != null)
  2023. wDocWithRevisions.MainDocumentPart.FootnotesPart.PutXDocument();
  2024. if (wDocWithRevisions.MainDocumentPart.EndnotesPart != null)
  2025. wDocWithRevisions.MainDocumentPart.EndnotesPart.PutXDocument();
  2026. }
  2027. private static void IgnorePt14Namespace(XElement root)
  2028. {
  2029. if (root.Attribute(XNamespace.Xmlns + "pt14") == null)
  2030. {
  2031. root.Add(new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName));
  2032. }
  2033. var ignorable = (string)root.Attribute(MC.Ignorable);
  2034. if (ignorable != null)
  2035. {
  2036. var list = ignorable.Split(' ');
  2037. if (!list.Contains("pt14"))
  2038. {
  2039. ignorable += " pt14";
  2040. root.Attribute(MC.Ignorable).Value = ignorable;
  2041. }
  2042. }
  2043. else
  2044. {
  2045. root.Add(new XAttribute(MC.Ignorable, "pt14"));
  2046. }
  2047. }
  2048. private static void CoalesceAdjacentRunsWithIdenticalFormatting(XDocument xDoc)
  2049. {
  2050. var paras = xDoc.Root.DescendantsTrimmed(W.txbxContent).Where(d => d.Name == W.p);
  2051. foreach (var para in paras)
  2052. {
  2053. var newPara = WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(para);
  2054. para.ReplaceNodes(newPara.Nodes());
  2055. }
  2056. }
  2057. private static void ProcessFootnoteEndnote(
  2058. WmlComparerSettings settings,
  2059. List<ComparisonUnitAtom> listOfComparisonUnitAtoms,
  2060. MainDocumentPart mainDocumentPartBefore,
  2061. MainDocumentPart mainDocumentPartAfter,
  2062. XDocument mainDocumentXDoc)
  2063. {
  2064. var footnotesPartBefore = mainDocumentPartBefore.FootnotesPart;
  2065. var endnotesPartBefore = mainDocumentPartBefore.EndnotesPart;
  2066. var footnotesPartAfter = mainDocumentPartAfter.FootnotesPart;
  2067. var endnotesPartAfter = mainDocumentPartAfter.EndnotesPart;
  2068. XDocument footnotesPartBeforeXDoc = null;
  2069. if (footnotesPartBefore != null)
  2070. footnotesPartBeforeXDoc = footnotesPartBefore.GetXDocument();
  2071. XDocument footnotesPartAfterXDoc = null;
  2072. if (footnotesPartAfter != null)
  2073. footnotesPartAfterXDoc = footnotesPartAfter.GetXDocument();
  2074. XDocument endnotesPartBeforeXDoc = null;
  2075. if (endnotesPartBefore != null)
  2076. endnotesPartBeforeXDoc = endnotesPartBefore.GetXDocument();
  2077. XDocument endnotesPartAfterXDoc = null;
  2078. if (endnotesPartAfter != null)
  2079. endnotesPartAfterXDoc = endnotesPartAfter.GetXDocument();
  2080. var possiblyModifiedFootnotesEndNotes = listOfComparisonUnitAtoms
  2081. .Where(cua =>
  2082. cua.ContentElement.Name == W.footnoteReference ||
  2083. cua.ContentElement.Name == W.endnoteReference)
  2084. .ToList();
  2085. foreach (var fn in possiblyModifiedFootnotesEndNotes)
  2086. {
  2087. string beforeId = null;
  2088. if (fn.ContentElementBefore != null)
  2089. beforeId = (string)fn.ContentElementBefore.Attribute(W.id);
  2090. var afterId = (string)fn.ContentElement.Attribute(W.id);
  2091. XElement footnoteEndnoteBefore = null;
  2092. XElement footnoteEndnoteAfter = null;
  2093. OpenXmlPart partToUseBefore = null;
  2094. OpenXmlPart partToUseAfter = null;
  2095. XDocument partToUseBeforeXDoc = null;
  2096. XDocument partToUseAfterXDoc = null;
  2097. if (fn.CorrelationStatus == CorrelationStatus.Equal)
  2098. {
  2099. if (fn.ContentElement.Name == W.footnoteReference)
  2100. {
  2101. footnoteEndnoteBefore = footnotesPartBeforeXDoc
  2102. .Root
  2103. .Elements()
  2104. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == beforeId);
  2105. footnoteEndnoteAfter = footnotesPartAfterXDoc
  2106. .Root
  2107. .Elements()
  2108. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2109. partToUseBefore = footnotesPartBefore;
  2110. partToUseAfter = footnotesPartAfter;
  2111. partToUseBeforeXDoc = footnotesPartBeforeXDoc;
  2112. partToUseAfterXDoc = footnotesPartAfterXDoc;
  2113. }
  2114. else
  2115. {
  2116. footnoteEndnoteBefore = endnotesPartBeforeXDoc
  2117. .Root
  2118. .Elements()
  2119. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == beforeId);
  2120. footnoteEndnoteAfter = endnotesPartAfterXDoc
  2121. .Root
  2122. .Elements()
  2123. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2124. partToUseBefore = endnotesPartBefore;
  2125. partToUseAfter = endnotesPartAfter;
  2126. partToUseBeforeXDoc = endnotesPartBeforeXDoc;
  2127. partToUseAfterXDoc = endnotesPartAfterXDoc;
  2128. }
  2129. AddSha1HashToBlockLevelContent(partToUseBefore, footnoteEndnoteBefore, settings);
  2130. AddSha1HashToBlockLevelContent(partToUseAfter, footnoteEndnoteAfter, settings);
  2131. var fncal1 = WmlComparer.CreateComparisonUnitAtomList(partToUseBefore, footnoteEndnoteBefore, settings);
  2132. var fncus1 = GetComparisonUnitList(fncal1, settings);
  2133. var fncal2 = WmlComparer.CreateComparisonUnitAtomList(partToUseAfter, footnoteEndnoteAfter, settings);
  2134. var fncus2 = GetComparisonUnitList(fncal2, settings);
  2135. if (!(fncus1.Length == 0 && fncus2.Length == 0))
  2136. {
  2137. var fnCorrelatedSequence = Lcs(fncus1, fncus2, settings);
  2138. if (s_False)
  2139. {
  2140. var sb = new StringBuilder();
  2141. foreach (var item in fnCorrelatedSequence)
  2142. sb.Append(item.ToString()).Append(Environment.NewLine);
  2143. var sbs = sb.ToString();
  2144. TestUtil.NotePad(sbs);
  2145. }
  2146. // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore
  2147. // when generating the document, the appropriate row will be marked as deleted or inserted.
  2148. MarkRowsAsDeletedOrInserted(settings, fnCorrelatedSequence);
  2149. // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal
  2150. var fnListOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(fnCorrelatedSequence, settings);
  2151. if (s_False)
  2152. {
  2153. var sb = new StringBuilder();
  2154. foreach (var item in fnListOfComparisonUnitAtoms)
  2155. sb.Append(item.ToString() + Environment.NewLine);
  2156. var sbs = sb.ToString();
  2157. TestUtil.NotePad(sbs);
  2158. }
  2159. // hack = set the guid ID of the table, row, or cell from the 'before' document to be equal to the 'after' document.
  2160. // note - we don't want to do the hack until after flattening all of the groups. At the end of the flattening, we should simply
  2161. // have a list of ComparisonUnitAtoms, appropriately marked as equal, inserted, or deleted.
  2162. // the table id will be hacked in the normal course of events.
  2163. // in the case where a row is deleted, not necessary to hack - the deleted row ID will do.
  2164. // in the case where a row is inserted, not necessary to hack - the inserted row ID will do as well.
  2165. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms);
  2166. var newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseAfter, fnListOfComparisonUnitAtoms, settings);
  2167. var tempElement = new XElement(W.body, newFootnoteEndnoteChildren);
  2168. var hasFootnoteReference = tempElement.Descendants(W.r).Any(r =>
  2169. {
  2170. var b = false;
  2171. if ((string)r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == "FootnoteReference")
  2172. b = true;
  2173. if (r.Descendants(W.footnoteRef).Any())
  2174. b = true;
  2175. return b;
  2176. });
  2177. if (!hasFootnoteReference)
  2178. {
  2179. var firstPara = tempElement.Descendants(W.p).FirstOrDefault();
  2180. if (firstPara != null)
  2181. {
  2182. var firstRun = firstPara.Element(W.r);
  2183. if (firstRun != null)
  2184. {
  2185. if (fn.ContentElement.Name == W.footnoteReference)
  2186. firstRun.AddBeforeSelf(
  2187. new XElement(W.r,
  2188. new XElement(W.rPr,
  2189. new XElement(W.rStyle,
  2190. new XAttribute(W.val, "FootnoteReference"))),
  2191. new XElement(W.footnoteRef)));
  2192. else
  2193. firstRun.AddBeforeSelf(
  2194. new XElement(W.r,
  2195. new XElement(W.rPr,
  2196. new XElement(W.rStyle,
  2197. new XAttribute(W.val, "EndnoteReference"))),
  2198. new XElement(W.endnoteRef)));
  2199. }
  2200. }
  2201. }
  2202. XElement newTempElement = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement);
  2203. var newContentElement = newTempElement.Descendants().FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote);
  2204. if (newContentElement == null)
  2205. throw new OpenXmlPowerToolsException("Internal error");
  2206. footnoteEndnoteAfter.ReplaceNodes(newContentElement.Nodes());
  2207. }
  2208. }
  2209. else if (fn.CorrelationStatus == CorrelationStatus.Inserted)
  2210. {
  2211. if (fn.ContentElement.Name == W.footnoteReference)
  2212. {
  2213. footnoteEndnoteAfter = footnotesPartAfterXDoc
  2214. .Root
  2215. .Elements()
  2216. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2217. partToUseAfter = footnotesPartAfter;
  2218. partToUseAfterXDoc = footnotesPartAfterXDoc;
  2219. }
  2220. else
  2221. {
  2222. footnoteEndnoteAfter = endnotesPartAfterXDoc
  2223. .Root
  2224. .Elements()
  2225. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2226. partToUseAfter = endnotesPartAfter;
  2227. partToUseAfterXDoc = endnotesPartAfterXDoc;
  2228. }
  2229. AddSha1HashToBlockLevelContent(partToUseAfter, footnoteEndnoteAfter, settings);
  2230. var fncal2 = WmlComparer.CreateComparisonUnitAtomList(partToUseAfter, footnoteEndnoteAfter, settings);
  2231. var fncus2 = GetComparisonUnitList(fncal2, settings);
  2232. var insertedCorrSequ = new List<CorrelatedSequence>() {
  2233. new CorrelatedSequence()
  2234. {
  2235. ComparisonUnitArray1 = null,
  2236. ComparisonUnitArray2 = fncus2,
  2237. CorrelationStatus = CorrelationStatus.Inserted,
  2238. },
  2239. };
  2240. if (s_False)
  2241. {
  2242. var sb = new StringBuilder();
  2243. foreach (var item in insertedCorrSequ)
  2244. sb.Append(item.ToString()).Append(Environment.NewLine);
  2245. var sbs = sb.ToString();
  2246. TestUtil.NotePad(sbs);
  2247. }
  2248. MarkRowsAsDeletedOrInserted(settings, insertedCorrSequ);
  2249. var fnListOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(insertedCorrSequ, settings);
  2250. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms);
  2251. var newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseAfter,
  2252. fnListOfComparisonUnitAtoms, settings);
  2253. var tempElement = new XElement(W.body, newFootnoteEndnoteChildren);
  2254. var hasFootnoteReference = tempElement.Descendants(W.r).Any(r =>
  2255. {
  2256. var b = false;
  2257. if ((string)r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == "FootnoteReference")
  2258. b = true;
  2259. if (r.Descendants(W.footnoteRef).Any())
  2260. b = true;
  2261. return b;
  2262. });
  2263. if (!hasFootnoteReference)
  2264. {
  2265. var firstPara = tempElement.Descendants(W.p).FirstOrDefault();
  2266. if (firstPara != null)
  2267. {
  2268. var firstRun = firstPara.Descendants(W.r).FirstOrDefault();
  2269. if (firstRun != null)
  2270. {
  2271. if (fn.ContentElement.Name == W.footnoteReference)
  2272. firstRun.AddBeforeSelf(
  2273. new XElement(W.r,
  2274. new XElement(W.rPr,
  2275. new XElement(W.rStyle,
  2276. new XAttribute(W.val, "FootnoteReference"))),
  2277. new XElement(W.footnoteRef)));
  2278. else
  2279. firstRun.AddBeforeSelf(
  2280. new XElement(W.r,
  2281. new XElement(W.rPr,
  2282. new XElement(W.rStyle,
  2283. new XAttribute(W.val, "EndnoteReference"))),
  2284. new XElement(W.endnoteRef)));
  2285. }
  2286. }
  2287. }
  2288. XElement newTempElement = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement);
  2289. var newContentElement = newTempElement
  2290. .Descendants()
  2291. .FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote);
  2292. if (newContentElement == null)
  2293. throw new OpenXmlPowerToolsException("Internal error");
  2294. footnoteEndnoteAfter.ReplaceNodes(newContentElement.Nodes());
  2295. }
  2296. else if (fn.CorrelationStatus == CorrelationStatus.Deleted)
  2297. {
  2298. if (fn.ContentElement.Name == W.footnoteReference)
  2299. {
  2300. footnoteEndnoteBefore = footnotesPartBeforeXDoc
  2301. .Root
  2302. .Elements()
  2303. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2304. partToUseAfter = footnotesPartAfter;
  2305. partToUseAfterXDoc = footnotesPartAfterXDoc;
  2306. }
  2307. else
  2308. {
  2309. footnoteEndnoteBefore = endnotesPartBeforeXDoc
  2310. .Root
  2311. .Elements()
  2312. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2313. partToUseBefore = endnotesPartBefore;
  2314. partToUseBeforeXDoc = endnotesPartBeforeXDoc;
  2315. }
  2316. AddSha1HashToBlockLevelContent(partToUseBefore, footnoteEndnoteBefore, settings);
  2317. var fncal2 = WmlComparer.CreateComparisonUnitAtomList(partToUseBefore, footnoteEndnoteBefore, settings);
  2318. var fncus2 = GetComparisonUnitList(fncal2, settings);
  2319. var deletedCorrSequ = new List<CorrelatedSequence>() {
  2320. new CorrelatedSequence()
  2321. {
  2322. ComparisonUnitArray1 = fncus2,
  2323. ComparisonUnitArray2 = null,
  2324. CorrelationStatus = CorrelationStatus.Deleted,
  2325. },
  2326. };
  2327. if (s_False)
  2328. {
  2329. var sb = new StringBuilder();
  2330. foreach (var item in deletedCorrSequ)
  2331. sb.Append(item.ToString()).Append(Environment.NewLine);
  2332. var sbs = sb.ToString();
  2333. TestUtil.NotePad(sbs);
  2334. }
  2335. MarkRowsAsDeletedOrInserted(settings, deletedCorrSequ);
  2336. var fnListOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(deletedCorrSequ, settings);
  2337. if (fnListOfComparisonUnitAtoms.Any())
  2338. {
  2339. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms);
  2340. var newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseBefore,
  2341. fnListOfComparisonUnitAtoms, settings);
  2342. var tempElement = new XElement(W.body, newFootnoteEndnoteChildren);
  2343. var hasFootnoteReference = tempElement.Descendants(W.r).Any(r =>
  2344. {
  2345. var b = false;
  2346. if ((string)r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == "FootnoteReference")
  2347. b = true;
  2348. if (r.Descendants(W.footnoteRef).Any())
  2349. b = true;
  2350. return b;
  2351. });
  2352. if (!hasFootnoteReference)
  2353. {
  2354. var firstPara = tempElement.Descendants(W.p).FirstOrDefault();
  2355. if (firstPara != null)
  2356. {
  2357. var firstRun = firstPara.Descendants(W.r).FirstOrDefault();
  2358. if (firstRun != null)
  2359. {
  2360. if (fn.ContentElement.Name == W.footnoteReference)
  2361. firstRun.AddBeforeSelf(
  2362. new XElement(W.r,
  2363. new XElement(W.rPr,
  2364. new XElement(W.rStyle,
  2365. new XAttribute(W.val, "FootnoteReference"))),
  2366. new XElement(W.footnoteRef)));
  2367. else
  2368. firstRun.AddBeforeSelf(
  2369. new XElement(W.r,
  2370. new XElement(W.rPr,
  2371. new XElement(W.rStyle,
  2372. new XAttribute(W.val, "EndnoteReference"))),
  2373. new XElement(W.endnoteRef)));
  2374. }
  2375. }
  2376. }
  2377. XElement newTempElement = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement);
  2378. var newContentElement = newTempElement.Descendants().FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote);
  2379. if (newContentElement == null)
  2380. throw new OpenXmlPowerToolsException("Internal error");
  2381. footnoteEndnoteBefore.ReplaceNodes(newContentElement.Nodes());
  2382. }
  2383. }
  2384. else
  2385. throw new OpenXmlPowerToolsException("Internal error");
  2386. }
  2387. }
  2388. private static void RectifyFootnoteEndnoteIds(
  2389. MainDocumentPart mainDocumentPartBefore,
  2390. MainDocumentPart mainDocumentPartAfter,
  2391. MainDocumentPart mainDocumentPartWithRevisions,
  2392. XDocument mainDocumentXDoc,
  2393. WmlComparerSettings settings)
  2394. {
  2395. var footnotesPartBefore = mainDocumentPartBefore.FootnotesPart;
  2396. var endnotesPartBefore = mainDocumentPartBefore.EndnotesPart;
  2397. var footnotesPartAfter = mainDocumentPartAfter.FootnotesPart;
  2398. var endnotesPartAfter = mainDocumentPartAfter.EndnotesPart;
  2399. var footnotesPartWithRevisions = mainDocumentPartWithRevisions.FootnotesPart;
  2400. var endnotesPartWithRevisions = mainDocumentPartWithRevisions.EndnotesPart;
  2401. XDocument footnotesPartBeforeXDoc = null;
  2402. if (footnotesPartBefore != null)
  2403. footnotesPartBeforeXDoc = footnotesPartBefore.GetXDocument();
  2404. XDocument footnotesPartAfterXDoc = null;
  2405. if (footnotesPartAfter != null)
  2406. footnotesPartAfterXDoc = footnotesPartAfter.GetXDocument();
  2407. XDocument footnotesPartWithRevisionsXDoc = null;
  2408. if (footnotesPartWithRevisions != null)
  2409. {
  2410. footnotesPartWithRevisionsXDoc = footnotesPartWithRevisions.GetXDocument();
  2411. footnotesPartWithRevisionsXDoc
  2412. .Root
  2413. .Elements(W.footnote)
  2414. .Where(e => (string)e.Attribute(W.id) != "-1" && (string)e.Attribute(W.id) != "0")
  2415. .Remove();
  2416. }
  2417. XDocument endnotesPartBeforeXDoc = null;
  2418. if (endnotesPartBefore != null)
  2419. endnotesPartBeforeXDoc = endnotesPartBefore.GetXDocument();
  2420. XDocument endnotesPartAfterXDoc = null;
  2421. if (endnotesPartAfter != null)
  2422. endnotesPartAfterXDoc = endnotesPartAfter.GetXDocument();
  2423. XDocument endnotesPartWithRevisionsXDoc = null;
  2424. if (endnotesPartWithRevisions != null)
  2425. {
  2426. endnotesPartWithRevisionsXDoc = endnotesPartWithRevisions.GetXDocument();
  2427. endnotesPartWithRevisionsXDoc
  2428. .Root
  2429. .Elements(W.endnote)
  2430. .Where(e => (string)e.Attribute(W.id) != "-1" && (string)e.Attribute(W.id) != "0")
  2431. .Remove();
  2432. }
  2433. var footnotesRefs = mainDocumentXDoc
  2434. .Descendants(W.footnoteReference)
  2435. .Select((fn, idx) =>
  2436. {
  2437. return new
  2438. {
  2439. FootNote = fn,
  2440. Idx = idx,
  2441. };
  2442. });
  2443. foreach (var fn in footnotesRefs)
  2444. {
  2445. var oldId = (string)fn.FootNote.Attribute(W.id);
  2446. var newId = (fn.Idx + 1).ToString();
  2447. fn.FootNote.Attribute(W.id).Value = newId;
  2448. var footnote = footnotesPartAfterXDoc
  2449. .Root
  2450. .Elements()
  2451. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  2452. if (footnote == null)
  2453. {
  2454. footnote = footnotesPartBeforeXDoc
  2455. .Root
  2456. .Elements()
  2457. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  2458. }
  2459. if (footnote == null)
  2460. throw new OpenXmlPowerToolsException("Internal error");
  2461. var cloned = new XElement(footnote);
  2462. cloned.Attribute(W.id).Value = newId;
  2463. footnotesPartWithRevisionsXDoc
  2464. .Root
  2465. .Add(cloned);
  2466. }
  2467. var endnotesRefs = mainDocumentXDoc
  2468. .Descendants(W.endnoteReference)
  2469. .Select((fn, idx) =>
  2470. {
  2471. return new
  2472. {
  2473. Endnote = fn,
  2474. Idx = idx,
  2475. };
  2476. });
  2477. foreach (var fn in endnotesRefs)
  2478. {
  2479. var oldId = (string)fn.Endnote.Attribute(W.id);
  2480. var newId = (fn.Idx + 1).ToString();
  2481. fn.Endnote.Attribute(W.id).Value = newId;
  2482. var endnote = endnotesPartAfterXDoc
  2483. .Root
  2484. .Elements()
  2485. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  2486. if (endnote == null)
  2487. {
  2488. endnote = endnotesPartBeforeXDoc
  2489. .Root
  2490. .Elements()
  2491. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  2492. }
  2493. if (endnote == null)
  2494. throw new OpenXmlPowerToolsException("Internal error");
  2495. var cloned = new XElement(endnote);
  2496. cloned.Attribute(W.id).Value = newId;
  2497. endnotesPartWithRevisionsXDoc
  2498. .Root
  2499. .Add(cloned);
  2500. }
  2501. if (footnotesPartWithRevisionsXDoc != null)
  2502. {
  2503. MarkContentAsDeletedOrInserted(footnotesPartWithRevisionsXDoc, settings);
  2504. CoalesceAdjacentRunsWithIdenticalFormatting(footnotesPartWithRevisionsXDoc);
  2505. XElement newXDocRoot = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(footnotesPartWithRevisionsXDoc.Root);
  2506. footnotesPartWithRevisionsXDoc.Root.ReplaceWith(newXDocRoot);
  2507. IgnorePt14Namespace(footnotesPartWithRevisionsXDoc.Root);
  2508. footnotesPartWithRevisions.PutXDocument();
  2509. }
  2510. if (endnotesPartWithRevisionsXDoc != null)
  2511. {
  2512. MarkContentAsDeletedOrInserted(endnotesPartWithRevisionsXDoc, settings);
  2513. CoalesceAdjacentRunsWithIdenticalFormatting(endnotesPartWithRevisionsXDoc);
  2514. XElement newXDocRoot = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(endnotesPartWithRevisionsXDoc.Root);
  2515. endnotesPartWithRevisionsXDoc.Root.ReplaceWith(newXDocRoot);
  2516. IgnorePt14Namespace(endnotesPartWithRevisionsXDoc.Root);
  2517. endnotesPartWithRevisions.PutXDocument();
  2518. }
  2519. }
  2520. /// Here is the crux of the fix to the algorithm. After assembling the entire list of ComparisonUnitAtoms, we do the following:
  2521. /// - First, figure out the maximum hierarchy depth, considering only paragraphs, txbx, txbxContent, tables, rows, cells, and content controls.
  2522. /// - For documents that do not contain tables, nor text boxes, this maximum hierarchy depth will always be 1.
  2523. /// - For atoms within a table, the depth will be 4. The first level is the table, the second level is row, third is cell, fourth is paragraph.
  2524. /// - For atoms within a nested table, the depth will be 7: Table / Row / Cell / Table / Row / Cell / Paragraph
  2525. /// - For atoms within a text box, the depth will be 3: Paragraph / txbxContent / Paragraph
  2526. /// - For atoms within a table in a text box, the depth will be 5: Paragraph / txbxContent / Table / Row / Cell / Paragraph
  2527. /// In any case, we figure out the maximum depth.
  2528. ///
  2529. /// Then we iterate through the list of content atoms backwards. We do this n times, where n is the maximum depth.
  2530. ///
  2531. /// At each level, we find a paragraph mark, and working backwards, we set the guids in the hierarchy so that the content will be assembled together correctly.
  2532. ///
  2533. /// For each iteration, we only set unids at the level that we are working at.
  2534. ///
  2535. /// So first we will set all unids at level 1. When we find a paragraph mark, we get the unid for that level, and then working backwards, until we find another
  2536. /// paragraph mark, we set all unids at level 1 to the same unid as level 1 of the paragraph mark.
  2537. ///
  2538. /// Then we set all unids at level 2. When we find a paragraph mark, we get the unid for that level, and then working backwards, until we find another paragraph
  2539. /// mark, we set all unids at level 2 to the same unid as level 2 of the paragraph mark. At some point, we will find a paragraph mark with no level 2. This is
  2540. /// not a problem. We stop setting anything until we find another paragraph mark that has a level 2, at which point we resume setting values at level 2.
  2541. ///
  2542. /// Same process for level 3, and so on, until we have processed to the maximum depth of the hierarchy.
  2543. ///
  2544. /// At the end of this process, we will be able to do the coalsce recurse algorithm, and the content atom list will be put back together into a beautiful tree,
  2545. /// where every element is correctly positioned in the hierarchy.
  2546. ///
  2547. /// This should also properly assemble the test where just the paragraph marks have been deleted for a range of paragraphs.
  2548. ///
  2549. /// There is an interesting thought - it is possible that I have set two runs of text that were initially in the same paragraph, but then after
  2550. /// processing, they match up to text in different paragraphs. Therefore this will not work. We need to actually keep a list of reconstructed ancestor
  2551. /// Unids, because the same paragraph would get set to two different IDs - two ComparisonUnitAtoms need to be in separate paragraphs in the reconstructed
  2552. /// document, but their ancestors actually point to the same paragraph.
  2553. ///
  2554. /// Fix this in the algorithm, and also keep the appropriate list in ComparisonUnitAtom class.
  2555. private static void AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(List<ComparisonUnitAtom> comparisonUnitAtomList)
  2556. {
  2557. if (s_False)
  2558. {
  2559. var sb = new StringBuilder();
  2560. foreach (var item in comparisonUnitAtomList)
  2561. sb.Append(item.ToString()).Append(Environment.NewLine);
  2562. var sbs = sb.ToString();
  2563. TestUtil.NotePad(sbs);
  2564. }
  2565. // the following loop sets all ancestor unids in the after document to the unids in the before document for all pPr where the status is equal.
  2566. // this should always be true.
  2567. // one additional modification to make to this loop - where we find a pPr in a text box, we want to do this as well, regardless of whether the status is equal, inserted, or deleted.
  2568. // reason being that this module does not support insertion / deletion of text boxes themselves. If a text box is in the before or after document, it will be in the document that
  2569. // contains deltas. It may have inserted or deleted text, but regardless, it will be in the result document.
  2570. foreach (var cua in comparisonUnitAtomList)
  2571. {
  2572. var doSet = false;
  2573. if (cua.ContentElement.Name == W.pPr)
  2574. {
  2575. if (cua.AncestorElements.Any(ae => ae.Name == W.txbxContent))
  2576. doSet = true;
  2577. if (cua.CorrelationStatus == CorrelationStatus.Equal)
  2578. doSet = true;
  2579. }
  2580. if (doSet)
  2581. {
  2582. var cuaBefore = cua.ComparisonUnitAtomBefore;
  2583. var ancestorsAfter = cua.AncestorElements;
  2584. if (cuaBefore != null)
  2585. {
  2586. var ancestorsBefore = cuaBefore.AncestorElements;
  2587. if (ancestorsAfter.Length == ancestorsBefore.Length)
  2588. {
  2589. var zipped = ancestorsBefore.Zip(ancestorsAfter, (b, a) =>
  2590. new
  2591. {
  2592. After = a,
  2593. Before = b,
  2594. });
  2595. foreach (var z in zipped)
  2596. {
  2597. var afterUnidAtt = z.After.Attribute(PtOpenXml.Unid);
  2598. var beforeUnidAtt = z.Before.Attribute(PtOpenXml.Unid);
  2599. if (afterUnidAtt != null && beforeUnidAtt != null)
  2600. afterUnidAtt.Value = beforeUnidAtt.Value;
  2601. }
  2602. }
  2603. }
  2604. }
  2605. }
  2606. if (s_False)
  2607. {
  2608. var sb = new StringBuilder();
  2609. foreach (var item in comparisonUnitAtomList)
  2610. sb.Append(item.ToString()).Append(Environment.NewLine);
  2611. var sbs = sb.ToString();
  2612. TestUtil.NotePad(sbs);
  2613. }
  2614. var rComparisonUnitAtomList = ((IEnumerable<ComparisonUnitAtom>)comparisonUnitAtomList).Reverse().ToList();
  2615. // the following should always succeed, because there will always be at least one element in rComparisonUnitAtomList, and there will always be at least one
  2616. // ancestor in AncestorElements
  2617. var deepestAncestor = rComparisonUnitAtomList.First().AncestorElements.First();
  2618. var deepestAncestorName = deepestAncestor.Name;
  2619. string deepestAncestorUnid = null;
  2620. if (deepestAncestorName == W.footnote || deepestAncestorName == W.endnote)
  2621. {
  2622. deepestAncestorUnid = (string)deepestAncestor.Attribute(PtOpenXml.Unid);
  2623. }
  2624. /// If the following loop finds a pPr that is in a text box, then continue on, processing the pPr and all of its contents as though it were
  2625. /// content in the containing text box. This is going to leave it after this loop where the AncestorUnids for the content in the text box will be
  2626. /// incomplete. We then will need to go through the rComparisonUnitAtomList a second time, processing all of the text boxes.
  2627. /// Note that this makes the basic assumption that a text box can't be nested inside of a text box, which, as far as I know, is a good assumption.
  2628. /// This also makes the basic assumption that an endnote / footnote can't contain a text box, which I believe is a good assumption.
  2629. string[] currentAncestorUnids = null;
  2630. foreach (var cua in rComparisonUnitAtomList)
  2631. {
  2632. if (cua.ContentElement.Name == W.pPr)
  2633. {
  2634. var pPr_inTextBox = cua
  2635. .AncestorElements
  2636. .Any(ae => ae.Name == W.txbxContent);
  2637. if (!pPr_inTextBox)
  2638. {
  2639. // this will collect the ancestor unids for the paragraph.
  2640. // my hypothesis is that these ancestor unids should be the same for all content unit atoms within that paragraph.
  2641. currentAncestorUnids = cua
  2642. .AncestorElements
  2643. .Select(ae =>
  2644. {
  2645. var thisUnid = (string)ae.Attribute(PtOpenXml.Unid);
  2646. if (thisUnid == null)
  2647. throw new OpenXmlPowerToolsException("Internal error");
  2648. return thisUnid;
  2649. })
  2650. .ToArray();
  2651. cua.AncestorUnids = currentAncestorUnids;
  2652. if (deepestAncestorUnid != null)
  2653. cua.AncestorUnids[0] = deepestAncestorUnid;
  2654. continue;
  2655. }
  2656. }
  2657. var thisDepth = cua.AncestorElements.Length;
  2658. var additionalAncestorUnids = cua
  2659. .AncestorElements
  2660. .Skip(currentAncestorUnids.Length)
  2661. .Select(ae =>
  2662. {
  2663. var thisUnid = (string)ae.Attribute(PtOpenXml.Unid);
  2664. if (thisUnid == null)
  2665. Guid.NewGuid().ToString().Replace("-", "");
  2666. return thisUnid;
  2667. });
  2668. var thisAncestorUnids = currentAncestorUnids
  2669. .Concat(additionalAncestorUnids)
  2670. .ToArray();
  2671. cua.AncestorUnids = thisAncestorUnids;
  2672. if (deepestAncestorUnid != null)
  2673. cua.AncestorUnids[0] = deepestAncestorUnid;
  2674. }
  2675. if (s_False)
  2676. {
  2677. var sb = new StringBuilder();
  2678. foreach (var item in comparisonUnitAtomList)
  2679. sb.Append(item.ToString()).Append(Environment.NewLine);
  2680. var sbs = sb.ToString();
  2681. TestUtil.NotePad(sbs);
  2682. }
  2683. // this is the second loop that processes all text boxes.
  2684. currentAncestorUnids = null;
  2685. bool skipUntilNextPpr = false;
  2686. foreach (var cua in rComparisonUnitAtomList)
  2687. {
  2688. if (currentAncestorUnids != null && cua.AncestorElements.Length < currentAncestorUnids.Length)
  2689. {
  2690. skipUntilNextPpr = true;
  2691. currentAncestorUnids = null;
  2692. continue;
  2693. }
  2694. if (cua.ContentElement.Name == W.pPr)
  2695. {
  2696. //if (s_True)
  2697. //{
  2698. // var sb = new StringBuilder();
  2699. // foreach (var item in comparisonUnitAtomList)
  2700. // sb.Append(item.ToString()).Append(Environment.NewLine);
  2701. // var sbs = sb.ToString();
  2702. // TestUtil.NotePad(sbs);
  2703. //}
  2704. var pPr_inTextBox = cua
  2705. .AncestorElements
  2706. .Any(ae => ae.Name == W.txbxContent);
  2707. if (!pPr_inTextBox)
  2708. {
  2709. skipUntilNextPpr = true;
  2710. currentAncestorUnids = null;
  2711. continue;
  2712. }
  2713. else
  2714. {
  2715. skipUntilNextPpr = false;
  2716. currentAncestorUnids = cua
  2717. .AncestorElements
  2718. .Select(ae =>
  2719. {
  2720. var thisUnid = (string)ae.Attribute(PtOpenXml.Unid);
  2721. if (thisUnid == null)
  2722. throw new OpenXmlPowerToolsException("Internal error");
  2723. return thisUnid;
  2724. })
  2725. .ToArray();
  2726. cua.AncestorUnids = currentAncestorUnids;
  2727. continue;
  2728. }
  2729. }
  2730. if (skipUntilNextPpr)
  2731. continue;
  2732. var thisDepth = cua.AncestorElements.Length;
  2733. var additionalAncestorUnids = cua
  2734. .AncestorElements
  2735. .Skip(currentAncestorUnids.Length)
  2736. .Select(ae =>
  2737. {
  2738. var thisUnid = (string)ae.Attribute(PtOpenXml.Unid);
  2739. if (thisUnid == null)
  2740. Guid.NewGuid().ToString().Replace("-", "");
  2741. return thisUnid;
  2742. });
  2743. var thisAncestorUnids = currentAncestorUnids
  2744. .Concat(additionalAncestorUnids)
  2745. .ToArray();
  2746. cua.AncestorUnids = thisAncestorUnids;
  2747. }
  2748. if (s_False)
  2749. {
  2750. var sb = new StringBuilder();
  2751. foreach (var item in comparisonUnitAtomList)
  2752. sb.Append(item.ToStringAncestorUnids()).Append(Environment.NewLine);
  2753. var sbs = sb.ToString();
  2754. TestUtil.NotePad(sbs);
  2755. }
  2756. }
  2757. // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal
  2758. private static List<ComparisonUnitAtom> FlattenToComparisonUnitAtomList(List<CorrelatedSequence> correlatedSequence, WmlComparerSettings settings)
  2759. {
  2760. var listOfComparisonUnitAtoms = correlatedSequence
  2761. .Select(cs =>
  2762. {
  2763. // need to write some code here to find out if we are assembling a paragraph (or anything) that contains the following unid.
  2764. // why do are we dropping content???????
  2765. //string searchFor = "0ecb9184";
  2766. if (cs.CorrelationStatus == CorrelationStatus.Equal)
  2767. {
  2768. var contentAtomsBefore = cs
  2769. .ComparisonUnitArray1
  2770. .Select(ca => ca.DescendantContentAtoms())
  2771. .SelectMany(m => m);
  2772. var contentAtomsAfter = cs
  2773. .ComparisonUnitArray2
  2774. .Select(ca => ca.DescendantContentAtoms())
  2775. .SelectMany(m => m);
  2776. var comparisonUnitAtomList = contentAtomsBefore
  2777. .Zip(contentAtomsAfter,
  2778. (before, after) =>
  2779. {
  2780. return new ComparisonUnitAtom(after.ContentElement, after.AncestorElements, after.Part, settings)
  2781. {
  2782. CorrelationStatus = CorrelationStatus.Equal,
  2783. ContentElementBefore = before.ContentElement,
  2784. ComparisonUnitAtomBefore = before,
  2785. };
  2786. })
  2787. .ToList();
  2788. return comparisonUnitAtomList;
  2789. }
  2790. else if (cs.CorrelationStatus == CorrelationStatus.Deleted)
  2791. {
  2792. var comparisonUnitAtomList = cs
  2793. .ComparisonUnitArray1
  2794. .Select(ca => ca.DescendantContentAtoms())
  2795. .SelectMany(m => m)
  2796. .Select(ca =>
  2797. new ComparisonUnitAtom(ca.ContentElement, ca.AncestorElements, ca.Part, settings)
  2798. {
  2799. CorrelationStatus = CorrelationStatus.Deleted,
  2800. });
  2801. return comparisonUnitAtomList;
  2802. }
  2803. else if (cs.CorrelationStatus == CorrelationStatus.Inserted)
  2804. {
  2805. var comparisonUnitAtomList = cs
  2806. .ComparisonUnitArray2
  2807. .Select(ca => ca.DescendantContentAtoms())
  2808. .SelectMany(m => m)
  2809. .Select(ca =>
  2810. new ComparisonUnitAtom(ca.ContentElement, ca.AncestorElements, ca.Part, settings)
  2811. {
  2812. CorrelationStatus = CorrelationStatus.Inserted,
  2813. });
  2814. return comparisonUnitAtomList;
  2815. }
  2816. else
  2817. throw new OpenXmlPowerToolsException("Internal error");
  2818. })
  2819. .SelectMany(m => m)
  2820. .ToList();
  2821. if (s_False)
  2822. {
  2823. var sb = new StringBuilder();
  2824. foreach (var item in listOfComparisonUnitAtoms)
  2825. sb.Append(item.ToString()).Append(Environment.NewLine);
  2826. var sbs = sb.ToString();
  2827. TestUtil.NotePad(sbs);
  2828. }
  2829. return listOfComparisonUnitAtoms;
  2830. }
  2831. // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore
  2832. // when generating the document, the appropriate row will be marked as deleted or inserted.
  2833. private static void MarkRowsAsDeletedOrInserted(WmlComparerSettings settings, List<CorrelatedSequence> correlatedSequence)
  2834. {
  2835. foreach (var dcs in correlatedSequence.Where(cs =>
  2836. cs.CorrelationStatus == CorrelationStatus.Deleted || cs.CorrelationStatus == CorrelationStatus.Inserted))
  2837. {
  2838. // iterate through all deleted/inserted items in dcs.ComparisonUnitArray1/ComparisonUnitArray2
  2839. var toIterateThrough = dcs.ComparisonUnitArray1;
  2840. if (dcs.CorrelationStatus == CorrelationStatus.Inserted)
  2841. toIterateThrough = dcs.ComparisonUnitArray2;
  2842. foreach (var ca in toIterateThrough)
  2843. {
  2844. var cug = ca as ComparisonUnitGroup;
  2845. // this works because we will never see a table in this list, only rows. If tables were in this list, would need to recursively
  2846. // go into children, but tables are always flattened in the LCS process.
  2847. // when we have a row, it is only necessary to find the first content atom of the row, then find the row ancestor, and then tweak
  2848. // the w:trPr
  2849. if (cug != null && cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  2850. {
  2851. var firstContentAtom = cug.DescendantContentAtoms().FirstOrDefault();
  2852. if (firstContentAtom == null)
  2853. throw new OpenXmlPowerToolsException("Internal error");
  2854. var tr = firstContentAtom
  2855. .AncestorElements
  2856. .Reverse()
  2857. .FirstOrDefault(a => a.Name == W.tr);
  2858. if (tr == null)
  2859. throw new OpenXmlPowerToolsException("Internal error");
  2860. var trPr = tr.Element(W.trPr);
  2861. if (trPr == null)
  2862. {
  2863. trPr = new XElement(W.trPr);
  2864. tr.AddFirst(trPr);
  2865. }
  2866. XName revTrackElementName = null;
  2867. if (dcs.CorrelationStatus == CorrelationStatus.Deleted)
  2868. revTrackElementName = W.del;
  2869. else if (dcs.CorrelationStatus == CorrelationStatus.Inserted)
  2870. revTrackElementName = W.ins;
  2871. trPr.Add(new XElement(revTrackElementName,
  2872. new XAttribute(W.author, settings.AuthorForRevisions),
  2873. new XAttribute(W.id, s_MaxId++),
  2874. new XAttribute(W.date, settings.DateTimeForRevisions)));
  2875. }
  2876. }
  2877. }
  2878. }
  2879. public enum WmlComparerRevisionType
  2880. {
  2881. Inserted,
  2882. Deleted,
  2883. }
  2884. public class WmlComparerRevision
  2885. {
  2886. public WmlComparerRevisionType RevisionType;
  2887. public string Text;
  2888. public string Author;
  2889. public string Date;
  2890. public XElement ContentXElement;
  2891. public XElement RevisionXElement;
  2892. public Uri PartUri;
  2893. public string PartContentType;
  2894. }
  2895. private static XName[] RevElementsWithNoText = new XName[] {
  2896. M.oMath,
  2897. M.oMathPara,
  2898. W.drawing,
  2899. };
  2900. public static List<WmlComparerRevision> GetRevisions(WmlDocument source, WmlComparerSettings settings)
  2901. {
  2902. using (MemoryStream ms = new MemoryStream())
  2903. {
  2904. ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
  2905. using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true))
  2906. {
  2907. TestForInvalidContent(wDoc);
  2908. RemoveExistingPowerToolsMarkup(wDoc);
  2909. var contentParent = wDoc.MainDocumentPart.GetXDocument().Root.Element(W.body);
  2910. var atomList = WmlComparer.CreateComparisonUnitAtomList(wDoc.MainDocumentPart, contentParent, settings).ToArray();
  2911. if (s_False)
  2912. {
  2913. var sb = new StringBuilder();
  2914. foreach (var item in atomList)
  2915. sb.Append(item.ToString() + Environment.NewLine);
  2916. var sbs = sb.ToString();
  2917. TestUtil.NotePad(sbs);
  2918. }
  2919. var grouped = atomList
  2920. .GroupAdjacent(a =>
  2921. {
  2922. var key = a.CorrelationStatus.ToString();
  2923. if (a.CorrelationStatus != CorrelationStatus.Equal)
  2924. {
  2925. var rt = new XElement(a.RevTrackElement.Name,
  2926. new XAttribute(XNamespace.Xmlns + "w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"),
  2927. a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid));
  2928. key += rt.ToString(SaveOptions.DisableFormatting);
  2929. }
  2930. return key;
  2931. })
  2932. .ToList();
  2933. var revisions = grouped
  2934. .Where(k => k.Key != "Equal")
  2935. .ToList();
  2936. if (s_False)
  2937. {
  2938. var sb = new StringBuilder();
  2939. foreach (var item in revisions)
  2940. sb.Append(item.Key + Environment.NewLine);
  2941. var sbs = sb.ToString();
  2942. TestUtil.NotePad(sbs);
  2943. }
  2944. var mainDocPartRevisionList = revisions
  2945. .Select(rg =>
  2946. {
  2947. var rev = new WmlComparerRevision();
  2948. if (rg.Key.StartsWith("Inserted"))
  2949. rev.RevisionType = WmlComparerRevisionType.Inserted;
  2950. else if (rg.Key.StartsWith("Deleted"))
  2951. rev.RevisionType = WmlComparerRevisionType.Deleted;
  2952. var revTrackElement = rg.First().RevTrackElement;
  2953. rev.RevisionXElement = revTrackElement;
  2954. rev.Author = (string)revTrackElement.Attribute(W.author);
  2955. rev.ContentXElement = rg.First().ContentElement;
  2956. rev.Date = (string)revTrackElement.Attribute(W.date);
  2957. rev.PartUri = wDoc.MainDocumentPart.Uri;
  2958. rev.PartContentType = wDoc.MainDocumentPart.ContentType;
  2959. if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name))
  2960. {
  2961. rev.Text = rg
  2962. .Select(rgc =>
  2963. {
  2964. if (rgc.ContentElement.Name == W.pPr)
  2965. return Environment.NewLine;
  2966. return rgc.ContentElement.Value;
  2967. })
  2968. .StringConcatenate();
  2969. }
  2970. return rev;
  2971. })
  2972. .ToList();
  2973. var footnotesRevisionList = GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.FootnotesPart, W.footnote, settings);
  2974. var endnotesRevisionList = GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.EndnotesPart, W.endnote, settings);
  2975. var finalRevisionList = mainDocPartRevisionList.Concat(footnotesRevisionList).Concat(endnotesRevisionList).ToList();
  2976. return finalRevisionList;
  2977. }
  2978. }
  2979. }
  2980. private static IEnumerable<WmlComparerRevision> GetFootnoteEndnoteRevisionList(OpenXmlPart footnotesEndnotesPart,
  2981. XName footnoteEndnoteElementName,
  2982. WmlComparerSettings settings)
  2983. {
  2984. if (footnotesEndnotesPart == null)
  2985. return Enumerable.Empty<WmlComparerRevision>();
  2986. var xDoc = footnotesEndnotesPart.GetXDocument();
  2987. var footnotesEndnotes = xDoc.Root.Elements(footnoteEndnoteElementName);
  2988. List<WmlComparerRevision> revisionsForPart = new List<WmlComparerRevision>();
  2989. foreach (var fn in footnotesEndnotes)
  2990. {
  2991. var atomList = WmlComparer.CreateComparisonUnitAtomList(footnotesEndnotesPart, fn, settings).ToArray();
  2992. if (s_False)
  2993. {
  2994. var sb = new StringBuilder();
  2995. foreach (var item in atomList)
  2996. sb.Append(item.ToString() + Environment.NewLine);
  2997. var sbs = sb.ToString();
  2998. TestUtil.NotePad(sbs);
  2999. }
  3000. var grouped = atomList
  3001. .GroupAdjacent(a =>
  3002. {
  3003. var key = a.CorrelationStatus.ToString();
  3004. if (a.CorrelationStatus != CorrelationStatus.Equal)
  3005. {
  3006. var rt = new XElement(a.RevTrackElement.Name,
  3007. new XAttribute(XNamespace.Xmlns + "w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"),
  3008. a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid));
  3009. key += rt.ToString(SaveOptions.DisableFormatting);
  3010. }
  3011. return key;
  3012. })
  3013. .ToList();
  3014. var revisions = grouped
  3015. .Where(k => k.Key != "Equal")
  3016. .ToList();
  3017. var thisNoteRevisionList = revisions
  3018. .Select(rg =>
  3019. {
  3020. var rev = new WmlComparerRevision();
  3021. if (rg.Key.StartsWith("Inserted"))
  3022. rev.RevisionType = WmlComparerRevisionType.Inserted;
  3023. else if (rg.Key.StartsWith("Deleted"))
  3024. rev.RevisionType = WmlComparerRevisionType.Deleted;
  3025. var revTrackElement = rg.First().RevTrackElement;
  3026. rev.RevisionXElement = revTrackElement;
  3027. rev.Author = (string)revTrackElement.Attribute(W.author);
  3028. rev.ContentXElement = rg.First().ContentElement;
  3029. rev.Date = (string)revTrackElement.Attribute(W.date);
  3030. rev.PartUri = footnotesEndnotesPart.Uri;
  3031. rev.PartContentType = footnotesEndnotesPart.ContentType;
  3032. if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name))
  3033. {
  3034. rev.Text = rg
  3035. .Select(rgc =>
  3036. {
  3037. if (rgc.ContentElement.Name == W.pPr)
  3038. return Environment.NewLine;
  3039. return rgc.ContentElement.Value;
  3040. })
  3041. .StringConcatenate();
  3042. }
  3043. return rev;
  3044. });
  3045. foreach (var item in thisNoteRevisionList)
  3046. revisionsForPart.Add(item);
  3047. }
  3048. return revisionsForPart;
  3049. }
  3050. // prohibit
  3051. // - altChunk
  3052. // - subDoc
  3053. // - contentPart
  3054. private static void TestForInvalidContent(WordprocessingDocument wDoc)
  3055. {
  3056. foreach (var part in wDoc.ContentParts())
  3057. {
  3058. var xDoc = part.GetXDocument();
  3059. if (xDoc.Descendants(W.altChunk).Any())
  3060. throw new OpenXmlPowerToolsException("Unsupported document, contains w:altChunk");
  3061. if (xDoc.Descendants(W.subDoc).Any())
  3062. throw new OpenXmlPowerToolsException("Unsupported document, contains w:subDoc");
  3063. if (xDoc.Descendants(W.contentPart).Any())
  3064. throw new OpenXmlPowerToolsException("Unsupported document, contains w:contentPart");
  3065. }
  3066. }
  3067. private static void RemoveExistingPowerToolsMarkup(WordprocessingDocument wDoc)
  3068. {
  3069. wDoc.MainDocumentPart
  3070. .GetXDocument()
  3071. .Root
  3072. .Descendants()
  3073. .Attributes()
  3074. .Where(a => a.Name.Namespace == PtOpenXml.pt)
  3075. .Where(a => a.Name != PtOpenXml.Unid)
  3076. .Remove();
  3077. wDoc.MainDocumentPart.PutXDocument();
  3078. var fnPart = wDoc.MainDocumentPart.FootnotesPart;
  3079. if (fnPart != null)
  3080. {
  3081. var fnXDoc = fnPart.GetXDocument();
  3082. fnXDoc
  3083. .Root
  3084. .Descendants()
  3085. .Attributes()
  3086. .Where(a => a.Name.Namespace == PtOpenXml.pt)
  3087. .Where(a => a.Name != PtOpenXml.Unid)
  3088. .Remove();
  3089. fnPart.PutXDocument();
  3090. }
  3091. var enPart = wDoc.MainDocumentPart.EndnotesPart;
  3092. if (enPart != null)
  3093. {
  3094. var enXDoc = enPart.GetXDocument();
  3095. enXDoc
  3096. .Root
  3097. .Descendants()
  3098. .Attributes()
  3099. .Where(a => a.Name.Namespace == PtOpenXml.pt)
  3100. .Where(a => a.Name != PtOpenXml.Unid)
  3101. .Remove();
  3102. enPart.PutXDocument();
  3103. }
  3104. }
  3105. private static void AddSha1HashToBlockLevelContent(OpenXmlPart part, XElement contentParent, WmlComparerSettings settings)
  3106. {
  3107. var blockLevelContentToAnnotate = contentParent
  3108. .Descendants()
  3109. .Where(d => ElementsToHaveSha1Hash.Contains(d.Name));
  3110. foreach (var blockLevelContent in blockLevelContentToAnnotate)
  3111. {
  3112. var cloneBlockLevelContentForHashing = (XElement)CloneBlockLevelContentForHashing(part, blockLevelContent, true, settings);
  3113. var shaString = cloneBlockLevelContentForHashing.ToString(SaveOptions.DisableFormatting)
  3114. .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", "");
  3115. var sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString);
  3116. blockLevelContent.Add(new XAttribute(PtOpenXml.SHA1Hash, sha1Hash));
  3117. if (blockLevelContent.Name == W.tbl ||
  3118. blockLevelContent.Name == W.tr)
  3119. {
  3120. var clonedForStructureHash = (XElement)CloneForStructureHash(cloneBlockLevelContentForHashing);
  3121. // this is a convenient place to look at why tables are being compared as different.
  3122. //if (blockLevelContent.Name == W.tbl)
  3123. // Console.WriteLine();
  3124. var shaString2 = clonedForStructureHash.ToString(SaveOptions.DisableFormatting)
  3125. .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", "");
  3126. var sha1Hash2 = WmlComparerUtil.SHA1HashStringForUTF8String(shaString2);
  3127. blockLevelContent.Add(new XAttribute(PtOpenXml.StructureSHA1Hash, sha1Hash2));
  3128. }
  3129. }
  3130. }
  3131. // This strips all text nodes from the XML tree, thereby leaving only the structure.
  3132. private static object CloneForStructureHash(XNode node)
  3133. {
  3134. XElement element = node as XElement;
  3135. if (element != null)
  3136. {
  3137. return new XElement(element.Name,
  3138. element.Attributes(),
  3139. element.Elements().Select(e => CloneForStructureHash(e)));
  3140. }
  3141. return null;
  3142. }
  3143. static XName[] AttributesToTrimWhenCloning = new XName[] {
  3144. WP14.anchorId,
  3145. WP14.editId,
  3146. "ObjectID",
  3147. "ShapeID",
  3148. "id",
  3149. "type",
  3150. };
  3151. private static object CloneBlockLevelContentForHashing(OpenXmlPart mainDocumentPart, XNode node, bool includeRelatedParts, WmlComparerSettings settings)
  3152. {
  3153. var element = node as XElement;
  3154. if (element != null)
  3155. {
  3156. if (element.Name == W.bookmarkStart ||
  3157. element.Name == W.bookmarkEnd ||
  3158. element.Name == W.pPr ||
  3159. element.Name == W.rPr)
  3160. return null;
  3161. if (element.Name == W.p)
  3162. {
  3163. var clonedPara = new XElement(element.Name,
  3164. element.Attributes().Where(a => a.Name != W.rsid &&
  3165. a.Name != W.rsidDel &&
  3166. a.Name != W.rsidP &&
  3167. a.Name != W.rsidR &&
  3168. a.Name != W.rsidRDefault &&
  3169. a.Name != W.rsidRPr &&
  3170. a.Name != W.rsidSect &&
  3171. a.Name != W.rsidTr &&
  3172. a.Name.Namespace != PtOpenXml.pt),
  3173. element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3174. var groupedRuns = clonedPara
  3175. .Elements()
  3176. .GroupAdjacent(e => e.Name == W.r &&
  3177. e.Elements().Count() == 1 &&
  3178. e.Element(W.t) != null);
  3179. var clonedParaWithGroupedRuns = new XElement(element.Name,
  3180. groupedRuns.Select(g =>
  3181. {
  3182. if (g.Key)
  3183. {
  3184. var text = g.Select(t => t.Value).StringConcatenate();
  3185. if (settings.CaseInsensitive)
  3186. text = text.ToUpper(settings.CultureInfo);
  3187. var newRun = (object)new XElement(W.r,
  3188. new XElement(W.t,
  3189. text));
  3190. return newRun;
  3191. }
  3192. return g;
  3193. }));
  3194. return clonedParaWithGroupedRuns;
  3195. }
  3196. if (element.Name == W.r)
  3197. {
  3198. var clonedRuns = element
  3199. .Elements()
  3200. .Where(e => e.Name != W.rPr)
  3201. .Select(rc => new XElement(W.r, CloneBlockLevelContentForHashing(mainDocumentPart, rc, includeRelatedParts, settings)));
  3202. return clonedRuns;
  3203. }
  3204. if (element.Name == W.tbl)
  3205. {
  3206. var clonedTable = new XElement(W.tbl,
  3207. element.Elements(W.tr).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3208. return clonedTable;
  3209. }
  3210. if (element.Name == W.tr)
  3211. {
  3212. var clonedRow = new XElement(W.tr,
  3213. element.Elements(W.tc).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3214. return clonedRow;
  3215. }
  3216. if (element.Name == W.tc)
  3217. {
  3218. var clonedCell = new XElement(W.tc,
  3219. element.Elements().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3220. return clonedCell;
  3221. }
  3222. if (element.Name == W.tcPr)
  3223. {
  3224. var clonedCellProps = new XElement(W.tcPr,
  3225. element.Elements(W.gridSpan).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3226. return clonedCellProps;
  3227. }
  3228. if (element.Name == W.gridSpan)
  3229. {
  3230. var clonedGridSpan = new XElement(W.gridSpan,
  3231. new XAttribute("val", (string)element.Attribute(W.val)));
  3232. return clonedGridSpan;
  3233. }
  3234. if (element.Name == W.txbxContent)
  3235. {
  3236. var clonedTextbox = new XElement(W.txbxContent,
  3237. element.Elements().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3238. return clonedTextbox;
  3239. }
  3240. if (includeRelatedParts)
  3241. {
  3242. if (ComparisonUnitWord.s_ElementsWithRelationshipIds.Contains(element.Name))
  3243. {
  3244. var newElement = new XElement(element.Name,
  3245. element.Attributes()
  3246. .Where(a => a.Name.Namespace != PtOpenXml.pt)
  3247. .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name))
  3248. .Select(a =>
  3249. {
  3250. if (!ComparisonUnitWord.s_RelationshipAttributeNames.Contains(a.Name))
  3251. return a;
  3252. var rId = (string)a;
  3253. // could be an hyperlink relationship
  3254. try
  3255. {
  3256. OpenXmlPart oxp = mainDocumentPart.GetPartById(rId);
  3257. if (oxp == null)
  3258. throw new FileFormatException("Invalid WordprocessingML Document");
  3259. var anno = oxp.Annotation<PartSHA1HashAnnotation>();
  3260. if (anno != null)
  3261. return new XAttribute(a.Name, anno.Hash);
  3262. if (!oxp.ContentType.EndsWith("xml"))
  3263. {
  3264. using (var str = oxp.GetStream())
  3265. {
  3266. byte[] ba;
  3267. using (BinaryReader br = new BinaryReader(str))
  3268. {
  3269. ba = br.ReadBytes((int)str.Length);
  3270. }
  3271. var sha1 = WmlComparerUtil.SHA1HashStringForByteArray(ba);
  3272. oxp.AddAnnotation(new PartSHA1HashAnnotation(sha1));
  3273. return new XAttribute(a.Name, sha1);
  3274. }
  3275. }
  3276. }
  3277. catch (ArgumentOutOfRangeException)
  3278. {
  3279. HyperlinkRelationship hr = mainDocumentPart.HyperlinkRelationships.FirstOrDefault(z => z.Id == rId);
  3280. if (hr != null)
  3281. {
  3282. var str = hr.Uri.ToString();
  3283. return new XAttribute(a.Name, str);
  3284. }
  3285. // could be an external relationship
  3286. ExternalRelationship er = mainDocumentPart.ExternalRelationships.FirstOrDefault(z => z.Id == rId);
  3287. if (er != null)
  3288. {
  3289. var str = er.Uri.ToString();
  3290. return new XAttribute(a.Name, str);
  3291. }
  3292. return new XAttribute(a.Name, "NULL Relationship");
  3293. }
  3294. return null;
  3295. }),
  3296. element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3297. return newElement;
  3298. }
  3299. }
  3300. if (element.Name == VML.shape)
  3301. {
  3302. return new XElement(element.Name,
  3303. element.Attributes()
  3304. .Where(a => a.Name.Namespace != PtOpenXml.pt)
  3305. .Where(a => a.Name != "style" && a.Name != "id" && a.Name != "type"),
  3306. element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3307. }
  3308. if (element.Name == O.OLEObject)
  3309. {
  3310. var o = new XElement(element.Name,
  3311. element.Attributes()
  3312. .Where(a => a.Name.Namespace != PtOpenXml.pt)
  3313. .Where(a => a.Name != "ObjectID" && a.Name != R.id),
  3314. element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3315. return o;
  3316. }
  3317. if (element.Name == W._object)
  3318. {
  3319. var o = new XElement(element.Name,
  3320. element.Attributes()
  3321. .Where(a => a.Name.Namespace != PtOpenXml.pt),
  3322. element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3323. return o;
  3324. }
  3325. if (element.Name == WP.docPr)
  3326. {
  3327. return new XElement(element.Name,
  3328. element.Attributes()
  3329. .Where(a => a.Name.Namespace != PtOpenXml.pt && a.Name != "id"),
  3330. element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3331. }
  3332. return new XElement(element.Name,
  3333. element.Attributes()
  3334. .Where(a => a.Name.Namespace != PtOpenXml.pt)
  3335. .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name)),
  3336. element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
  3337. }
  3338. if (settings.CaseInsensitive)
  3339. {
  3340. var xt = node as XText;
  3341. if (xt != null)
  3342. {
  3343. var newText = xt.Value.ToUpper(settings.CultureInfo);
  3344. return new XText(newText);
  3345. }
  3346. }
  3347. return node;
  3348. }
  3349. private static List<CorrelatedSequence> FindCommonAtBeginningAndEnd(CorrelatedSequence unknown, WmlComparerSettings settings)
  3350. {
  3351. int lengthToCompare = Math.Min(unknown.ComparisonUnitArray1.Length, unknown.ComparisonUnitArray2.Length);
  3352. var countCommonAtBeginning = unknown
  3353. .ComparisonUnitArray1
  3354. .Take(lengthToCompare)
  3355. .Zip(unknown.ComparisonUnitArray2,
  3356. (pu1, pu2) =>
  3357. {
  3358. return new
  3359. {
  3360. Pu1 = pu1,
  3361. Pu2 = pu2,
  3362. };
  3363. })
  3364. .TakeWhile(pair => pair.Pu1.SHA1Hash == pair.Pu2.SHA1Hash)
  3365. .Count();
  3366. if (countCommonAtBeginning != 0 && ((double)countCommonAtBeginning / (double)lengthToCompare) < settings.DetailThreshold)
  3367. countCommonAtBeginning = 0;
  3368. if (countCommonAtBeginning != 0)
  3369. {
  3370. var newSequence = new List<CorrelatedSequence>();
  3371. CorrelatedSequence csEqual = new CorrelatedSequence();
  3372. csEqual.CorrelationStatus = CorrelationStatus.Equal;
  3373. csEqual.ComparisonUnitArray1 = unknown
  3374. .ComparisonUnitArray1
  3375. .Take(countCommonAtBeginning)
  3376. .ToArray();
  3377. csEqual.ComparisonUnitArray2 = unknown
  3378. .ComparisonUnitArray2
  3379. .Take(countCommonAtBeginning)
  3380. .ToArray();
  3381. newSequence.Add(csEqual);
  3382. var remainingLeft = unknown.ComparisonUnitArray1.Length - countCommonAtBeginning;
  3383. var remainingRight = unknown.ComparisonUnitArray2.Length - countCommonAtBeginning;
  3384. if (remainingLeft != 0 && remainingRight == 0)
  3385. {
  3386. CorrelatedSequence csDeleted = new CorrelatedSequence();
  3387. csDeleted.CorrelationStatus = CorrelationStatus.Deleted;
  3388. csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(countCommonAtBeginning).ToArray();
  3389. csDeleted.ComparisonUnitArray2 = null;
  3390. newSequence.Add(csDeleted);
  3391. }
  3392. else if (remainingLeft == 0 && remainingRight != 0)
  3393. {
  3394. CorrelatedSequence csInserted = new CorrelatedSequence();
  3395. csInserted.CorrelationStatus = CorrelationStatus.Inserted;
  3396. csInserted.ComparisonUnitArray1 = null;
  3397. csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(countCommonAtBeginning).ToArray();
  3398. newSequence.Add(csInserted);
  3399. }
  3400. else if (remainingLeft != 0 && remainingRight != 0)
  3401. {
  3402. var first1 = unknown.ComparisonUnitArray1[0] as ComparisonUnitWord;
  3403. var first2 = unknown.ComparisonUnitArray2[0] as ComparisonUnitWord;
  3404. if (first1 != null && first2 != null)
  3405. {
  3406. // if operating at the word level and
  3407. // if the last word on the left != pPr && last word on right != pPr
  3408. // then create an unknown for the rest of the paragraph, and create an unknown for the rest of the unknown
  3409. // if the last word on the left != pPr and last word on right == pPr
  3410. // then create deleted for the left, and create an unknown for the rest of the unknown
  3411. // if the last word on the left == pPr and last word on right != pPr
  3412. // then create inserted for the right, and create an unknown for the rest of the unknown
  3413. // if the last word on the left == pPr and last word on right == pPr
  3414. // then create an unknown for the rest of the unknown
  3415. var remainingInLeft = unknown
  3416. .ComparisonUnitArray1
  3417. .Skip(countCommonAtBeginning)
  3418. .ToArray();
  3419. var remainingInRight = unknown
  3420. .ComparisonUnitArray2
  3421. .Skip(countCommonAtBeginning)
  3422. .ToArray();
  3423. var lastContentAtomLeft = unknown.ComparisonUnitArray1[countCommonAtBeginning - 1].DescendantContentAtoms().FirstOrDefault();
  3424. var lastContentAtomRight = unknown.ComparisonUnitArray2[countCommonAtBeginning - 1].DescendantContentAtoms().FirstOrDefault();
  3425. if (lastContentAtomLeft.ContentElement.Name != W.pPr && lastContentAtomRight.ContentElement.Name != W.pPr)
  3426. {
  3427. var split1 = SplitAtParagraphMark(remainingInLeft);
  3428. var split2 = SplitAtParagraphMark(remainingInRight);
  3429. if (split1.Count() == 1 && split2.Count() == 1)
  3430. {
  3431. CorrelatedSequence csUnknown2 = new CorrelatedSequence();
  3432. csUnknown2.CorrelationStatus = CorrelationStatus.Unknown;
  3433. csUnknown2.ComparisonUnitArray1 = split1.First();
  3434. csUnknown2.ComparisonUnitArray2 = split2.First();
  3435. newSequence.Add(csUnknown2);
  3436. return newSequence;
  3437. }
  3438. else if (split1.Count == 2 && split2.Count == 2)
  3439. {
  3440. CorrelatedSequence csUnknown2 = new CorrelatedSequence();
  3441. csUnknown2.CorrelationStatus = CorrelationStatus.Unknown;
  3442. csUnknown2.ComparisonUnitArray1 = split1.First();
  3443. csUnknown2.ComparisonUnitArray2 = split2.First();
  3444. newSequence.Add(csUnknown2);
  3445. CorrelatedSequence csUnknown3 = new CorrelatedSequence();
  3446. csUnknown3.CorrelationStatus = CorrelationStatus.Unknown;
  3447. csUnknown3.ComparisonUnitArray1 = split1.Skip(1).First();
  3448. csUnknown3.ComparisonUnitArray2 = split2.Skip(1).First();
  3449. newSequence.Add(csUnknown3);
  3450. return newSequence;
  3451. }
  3452. }
  3453. }
  3454. CorrelatedSequence csUnknown = new CorrelatedSequence();
  3455. csUnknown.CorrelationStatus = CorrelationStatus.Unknown;
  3456. csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(countCommonAtBeginning).ToArray();
  3457. csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(countCommonAtBeginning).ToArray();
  3458. newSequence.Add(csUnknown);
  3459. }
  3460. else if (remainingLeft == 0 && remainingRight == 0)
  3461. {
  3462. // nothing to do
  3463. }
  3464. return newSequence;
  3465. }
  3466. // if we get to here, then countCommonAtBeginning == 0
  3467. var countCommonAtEnd = unknown
  3468. .ComparisonUnitArray1
  3469. .Reverse()
  3470. .Take(lengthToCompare)
  3471. .Zip(unknown
  3472. .ComparisonUnitArray2
  3473. .Reverse()
  3474. .Take(lengthToCompare),
  3475. (pu1, pu2) =>
  3476. {
  3477. return new
  3478. {
  3479. Pu1 = pu1,
  3480. Pu2 = pu2,
  3481. };
  3482. })
  3483. .TakeWhile(pair => pair.Pu1.SHA1Hash == pair.Pu2.SHA1Hash)
  3484. .Count();
  3485. // never start a common section with a paragraph mark. However, it is OK to set two paragraph marks as equal.
  3486. while (true)
  3487. {
  3488. if (countCommonAtEnd <= 1)
  3489. break;
  3490. var firstCommon = unknown
  3491. .ComparisonUnitArray1
  3492. .Reverse()
  3493. .Take(countCommonAtEnd)
  3494. .LastOrDefault();
  3495. var firstCommonWord = firstCommon as ComparisonUnitWord;
  3496. if (firstCommonWord == null)
  3497. break;
  3498. // if the word contains more than one atom, then not a paragraph mark
  3499. if (firstCommonWord.Contents.Count() != 1)
  3500. break;
  3501. var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom;
  3502. if (firstCommonAtom == null)
  3503. break;
  3504. if (firstCommonAtom.ContentElement.Name != W.pPr)
  3505. break;
  3506. countCommonAtEnd--;
  3507. }
  3508. bool isOnlyParagraphMark = false;
  3509. if (countCommonAtEnd == 1)
  3510. {
  3511. var firstCommon = unknown
  3512. .ComparisonUnitArray1
  3513. .Reverse()
  3514. .Take(countCommonAtEnd)
  3515. .LastOrDefault();
  3516. var firstCommonWord = firstCommon as ComparisonUnitWord;
  3517. if (firstCommonWord != null)
  3518. {
  3519. // if the word contains more than one atom, then not a paragraph mark
  3520. if (firstCommonWord.Contents.Count() == 1)
  3521. {
  3522. var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom;
  3523. if (firstCommonAtom != null)
  3524. {
  3525. if (firstCommonAtom.ContentElement.Name == W.pPr)
  3526. isOnlyParagraphMark = true;
  3527. }
  3528. }
  3529. }
  3530. }
  3531. if (countCommonAtEnd == 2)
  3532. {
  3533. var firstCommon = unknown
  3534. .ComparisonUnitArray1
  3535. .Reverse()
  3536. .Take(countCommonAtEnd)
  3537. .LastOrDefault();
  3538. var secondCommon = unknown
  3539. .ComparisonUnitArray1
  3540. .Reverse()
  3541. .Take(countCommonAtEnd)
  3542. .FirstOrDefault();
  3543. var firstCommonWord = firstCommon as ComparisonUnitWord;
  3544. var secondCommonWord = secondCommon as ComparisonUnitWord;
  3545. if (firstCommonWord != null && secondCommonWord != null)
  3546. {
  3547. // if the word contains more than one atom, then not a paragraph mark
  3548. if (firstCommonWord.Contents.Count() == 1 && secondCommonWord.Contents.Count() == 1)
  3549. {
  3550. var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom;
  3551. var secondCommonAtom = secondCommonWord.Contents.First() as ComparisonUnitAtom;
  3552. if (firstCommonAtom != null && secondCommonAtom != null)
  3553. {
  3554. if (secondCommonAtom.ContentElement.Name == W.pPr)
  3555. isOnlyParagraphMark = true;
  3556. }
  3557. }
  3558. }
  3559. }
  3560. if (!isOnlyParagraphMark && countCommonAtEnd != 0 && ((double)countCommonAtEnd / (double)lengthToCompare) < settings.DetailThreshold)
  3561. countCommonAtEnd = 0;
  3562. // If the following test is not there, the test below sets the end paragraph mark of the entire document equal to the end paragraph
  3563. // mark of the first paragraph in the other document, causing lines to be out of order.
  3564. // [InlineData("WC010-Para-Before-Table-Unmodified.docx", "WC010-Para-Before-Table-Mod.docx", 3)]
  3565. if (isOnlyParagraphMark)
  3566. countCommonAtEnd = 0;
  3567. if (countCommonAtEnd == 0)
  3568. return null;
  3569. // if countCommonAtEnd != 0, and if it contains a paragraph mark, then if there are comparison units in the same paragraph before the common at end (in either version)
  3570. // then we want to put all of those comparison units into a single unknown, where they must be resolved against each other. We don't want those comparison units to go into the middle unknown comparison unit.
  3571. if (countCommonAtEnd != 0)
  3572. {
  3573. int remainingInLeftParagraph = 0;
  3574. int remainingInRightParagraph = 0;
  3575. var commonEndSeq = unknown
  3576. .ComparisonUnitArray1
  3577. .Reverse()
  3578. .Take(countCommonAtEnd)
  3579. .Reverse()
  3580. .ToList();
  3581. var firstOfCommonEndSeq = commonEndSeq.First();
  3582. if (firstOfCommonEndSeq is ComparisonUnitWord)
  3583. {
  3584. // are there any paragraph marks in the common seq at end?
  3585. //if (commonEndSeq.Any(cu => cu.Contents.OfType<ComparisonUnitAtom>().First().ContentElement.Name == W.pPr))
  3586. if (commonEndSeq.Any(cu =>
  3587. {
  3588. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  3589. if (firstComparisonUnitAtom == null)
  3590. return false;
  3591. return firstComparisonUnitAtom.ContentElement.Name == W.pPr;
  3592. }))
  3593. {
  3594. remainingInLeftParagraph = unknown
  3595. .ComparisonUnitArray1
  3596. .Reverse()
  3597. .Skip(countCommonAtEnd)
  3598. .TakeWhile(cu =>
  3599. {
  3600. if (!(cu is ComparisonUnitWord))
  3601. return false;
  3602. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  3603. if (firstComparisonUnitAtom == null)
  3604. return true;
  3605. return firstComparisonUnitAtom.ContentElement.Name != W.pPr;
  3606. })
  3607. .Count();
  3608. remainingInRightParagraph = unknown
  3609. .ComparisonUnitArray2
  3610. .Reverse()
  3611. .Skip(countCommonAtEnd)
  3612. .TakeWhile(cu =>
  3613. {
  3614. if (!(cu is ComparisonUnitWord))
  3615. return false;
  3616. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  3617. if (firstComparisonUnitAtom == null)
  3618. return true;
  3619. return firstComparisonUnitAtom.ContentElement.Name != W.pPr;
  3620. })
  3621. .Count();
  3622. }
  3623. }
  3624. var newSequence = new List<CorrelatedSequence>();
  3625. int beforeCommonParagraphLeft = unknown.ComparisonUnitArray1.Length - remainingInLeftParagraph - countCommonAtEnd;
  3626. int beforeCommonParagraphRight = unknown.ComparisonUnitArray2.Length - remainingInRightParagraph - countCommonAtEnd;
  3627. if (beforeCommonParagraphLeft != 0 && beforeCommonParagraphRight == 0)
  3628. {
  3629. CorrelatedSequence csDeleted = new CorrelatedSequence();
  3630. csDeleted.CorrelationStatus = CorrelationStatus.Deleted;
  3631. csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Take(beforeCommonParagraphLeft).ToArray();
  3632. csDeleted.ComparisonUnitArray2 = null;
  3633. newSequence.Add(csDeleted);
  3634. }
  3635. else if (beforeCommonParagraphLeft == 0 && beforeCommonParagraphRight != 0)
  3636. {
  3637. CorrelatedSequence csInserted = new CorrelatedSequence();
  3638. csInserted.CorrelationStatus = CorrelationStatus.Inserted;
  3639. csInserted.ComparisonUnitArray1 = null;
  3640. csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Take(beforeCommonParagraphRight).ToArray();
  3641. newSequence.Add(csInserted);
  3642. }
  3643. else if (beforeCommonParagraphLeft != 0 && beforeCommonParagraphRight != 0)
  3644. {
  3645. CorrelatedSequence csUnknown = new CorrelatedSequence();
  3646. csUnknown.CorrelationStatus = CorrelationStatus.Unknown;
  3647. csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Take(beforeCommonParagraphLeft).ToArray();
  3648. csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Take(beforeCommonParagraphRight).ToArray();
  3649. newSequence.Add(csUnknown);
  3650. }
  3651. else if (beforeCommonParagraphLeft == 0 && beforeCommonParagraphRight == 0)
  3652. {
  3653. // nothing to do
  3654. }
  3655. if (remainingInLeftParagraph != 0 && remainingInRightParagraph == 0)
  3656. {
  3657. CorrelatedSequence csDeleted = new CorrelatedSequence();
  3658. csDeleted.CorrelationStatus = CorrelationStatus.Deleted;
  3659. csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(beforeCommonParagraphLeft).Take(remainingInLeftParagraph).ToArray();
  3660. csDeleted.ComparisonUnitArray2 = null;
  3661. newSequence.Add(csDeleted);
  3662. }
  3663. else if (remainingInLeftParagraph == 0 && remainingInRightParagraph != 0)
  3664. {
  3665. CorrelatedSequence csInserted = new CorrelatedSequence();
  3666. csInserted.CorrelationStatus = CorrelationStatus.Inserted;
  3667. csInserted.ComparisonUnitArray1 = null;
  3668. csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(beforeCommonParagraphRight).Take(remainingInRightParagraph).ToArray();
  3669. newSequence.Add(csInserted);
  3670. }
  3671. else if (remainingInLeftParagraph != 0 && remainingInRightParagraph != 0)
  3672. {
  3673. CorrelatedSequence csUnknown = new CorrelatedSequence();
  3674. csUnknown.CorrelationStatus = CorrelationStatus.Unknown;
  3675. csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(beforeCommonParagraphLeft).Take(remainingInLeftParagraph).ToArray();
  3676. csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(beforeCommonParagraphRight).Take(remainingInRightParagraph).ToArray();
  3677. newSequence.Add(csUnknown);
  3678. }
  3679. else if (remainingInLeftParagraph == 0 && remainingInRightParagraph == 0)
  3680. {
  3681. // nothing to do
  3682. }
  3683. CorrelatedSequence csEqual = new CorrelatedSequence();
  3684. csEqual.CorrelationStatus = CorrelationStatus.Equal;
  3685. csEqual.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(unknown.ComparisonUnitArray1.Length - countCommonAtEnd).ToArray();
  3686. csEqual.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(unknown.ComparisonUnitArray2.Length - countCommonAtEnd).ToArray();
  3687. newSequence.Add(csEqual);
  3688. return newSequence;
  3689. }
  3690. return null;
  3691. #if false
  3692. var middleLeft = unknown
  3693. .ComparisonUnitArray1
  3694. .Skip(countCommonAtBeginning)
  3695. .SkipLast(remainingInLeftParagraph)
  3696. .SkipLast(countCommonAtEnd)
  3697. .ToArray();
  3698. var middleRight = unknown
  3699. .ComparisonUnitArray2
  3700. .Skip(countCommonAtBeginning)
  3701. .SkipLast(remainingInRightParagraph)
  3702. .SkipLast(countCommonAtEnd)
  3703. .ToArray();
  3704. if (middleLeft.Length > 0 && middleRight.Length == 0)
  3705. {
  3706. CorrelatedSequence cs = new CorrelatedSequence();
  3707. cs.CorrelationStatus = CorrelationStatus.Deleted;
  3708. cs.ComparisonUnitArray1 = middleLeft;
  3709. cs.ComparisonUnitArray2 = null;
  3710. newSequence.Add(cs);
  3711. }
  3712. else if (middleLeft.Length == 0 && middleRight.Length > 0)
  3713. {
  3714. CorrelatedSequence cs = new CorrelatedSequence();
  3715. cs.CorrelationStatus = CorrelationStatus.Inserted;
  3716. cs.ComparisonUnitArray1 = null;
  3717. cs.ComparisonUnitArray2 = middleRight;
  3718. newSequence.Add(cs);
  3719. }
  3720. else if (middleLeft.Length > 0 && middleRight.Length > 0)
  3721. {
  3722. CorrelatedSequence cs = new CorrelatedSequence();
  3723. cs.CorrelationStatus = CorrelationStatus.Unknown;
  3724. cs.ComparisonUnitArray1 = middleLeft;
  3725. cs.ComparisonUnitArray2 = middleRight;
  3726. newSequence.Add(cs);
  3727. }
  3728. var remainingInParaLeft = unknown
  3729. .ComparisonUnitArray1
  3730. .Skip(countCommonAtBeginning)
  3731. .Skip(middleLeft.Length)
  3732. .Take(remainingInLeftParagraph)
  3733. .ToArray();
  3734. var remainingInParaRight = unknown
  3735. .ComparisonUnitArray2
  3736. .Skip(countCommonAtBeginning)
  3737. .Skip(middleRight.Length)
  3738. .Take(remainingInRightParagraph)
  3739. .ToArray();
  3740. if (remainingInParaLeft.Length > 0 && remainingInParaRight.Length == 0)
  3741. {
  3742. CorrelatedSequence cs = new CorrelatedSequence();
  3743. cs.CorrelationStatus = CorrelationStatus.Deleted;
  3744. cs.ComparisonUnitArray1 = remainingInParaLeft;
  3745. cs.ComparisonUnitArray2 = null;
  3746. newSequence.Add(cs);
  3747. }
  3748. else if (remainingInParaLeft.Length == 0 && remainingInParaRight.Length > 0)
  3749. {
  3750. CorrelatedSequence cs = new CorrelatedSequence();
  3751. cs.CorrelationStatus = CorrelationStatus.Inserted;
  3752. cs.ComparisonUnitArray1 = null;
  3753. cs.ComparisonUnitArray2 = remainingInParaRight;
  3754. newSequence.Add(cs);
  3755. }
  3756. else if (remainingInParaLeft.Length > 0 && remainingInParaRight.Length > 0)
  3757. {
  3758. CorrelatedSequence cs = new CorrelatedSequence();
  3759. cs.CorrelationStatus = CorrelationStatus.Unknown;
  3760. cs.ComparisonUnitArray1 = remainingInParaLeft;
  3761. cs.ComparisonUnitArray2 = remainingInParaRight;
  3762. newSequence.Add(cs);
  3763. }
  3764. if (countCommonAtEnd != 0)
  3765. {
  3766. CorrelatedSequence cs = new CorrelatedSequence();
  3767. cs.CorrelationStatus = CorrelationStatus.Equal;
  3768. cs.ComparisonUnitArray1 = unknown
  3769. .ComparisonUnitArray1
  3770. .Skip(countCommonAtBeginning + middleLeft.Length + remainingInParaLeft.Length)
  3771. .ToArray();
  3772. cs.ComparisonUnitArray2 = unknown
  3773. .ComparisonUnitArray2
  3774. .Skip(countCommonAtBeginning + middleRight.Length + remainingInParaRight.Length)
  3775. .ToArray();
  3776. if (cs.ComparisonUnitArray1.Length != cs.ComparisonUnitArray2.Length)
  3777. throw new OpenXmlPowerToolsException("Internal error");
  3778. newSequence.Add(cs);
  3779. }
  3780. return newSequence;
  3781. #endif
  3782. }
  3783. private static List<ComparisonUnit[]> SplitAtParagraphMark(ComparisonUnit[] cua)
  3784. {
  3785. int i;
  3786. for (i = 0; i < cua.Length; i++)
  3787. {
  3788. var atom = cua[i].DescendantContentAtoms().FirstOrDefault();
  3789. if (atom != null && atom.ContentElement.Name == W.pPr)
  3790. break;
  3791. }
  3792. if (i == cua.Length)
  3793. {
  3794. return new List<ComparisonUnit[]>()
  3795. {
  3796. cua
  3797. };
  3798. }
  3799. return new List<ComparisonUnit[]>()
  3800. {
  3801. cua.Take(i).ToArray(),
  3802. cua.Skip(i).ToArray(),
  3803. };
  3804. }
  3805. private static void MoveLastSectPrToChildOfBody(XDocument newXDoc)
  3806. {
  3807. var lastParaWithSectPr = newXDoc
  3808. .Root
  3809. .Elements(W.body)
  3810. .Elements(W.p)
  3811. .Where(p => p.Elements(W.pPr).Elements(W.sectPr).Any())
  3812. .LastOrDefault();
  3813. if (lastParaWithSectPr != null)
  3814. {
  3815. newXDoc.Root.Element(W.body).Add(lastParaWithSectPr.Elements(W.pPr).Elements(W.sectPr));
  3816. lastParaWithSectPr.Elements(W.pPr).Elements(W.sectPr).Remove();
  3817. }
  3818. }
  3819. private static int s_MaxId = 0;
  3820. private static object ProduceNewWmlMarkupFromCorrelatedSequence(OpenXmlPart part,
  3821. IEnumerable<ComparisonUnitAtom> comparisonUnitAtomList,
  3822. WmlComparerSettings settings)
  3823. {
  3824. // fabricate new MainDocumentPart from correlatedSequence
  3825. s_MaxId = 0;
  3826. var newBodyChildren = CoalesceRecurse(part, comparisonUnitAtomList, 0, settings);
  3827. return newBodyChildren;
  3828. }
  3829. private static void FixUpDocPrIds(WordprocessingDocument wDoc)
  3830. {
  3831. var elementToFind = WP.docPr;
  3832. var docPrToChange = wDoc
  3833. .ContentParts()
  3834. .Select(cp => cp.GetXDocument())
  3835. .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind))
  3836. .SelectMany(m => m);
  3837. var nextId = 1;
  3838. foreach (var item in docPrToChange)
  3839. {
  3840. var idAtt = item.Attribute("id");
  3841. if (idAtt != null)
  3842. idAtt.Value = (nextId++).ToString();
  3843. }
  3844. foreach (var cp in wDoc.ContentParts())
  3845. cp.PutXDocument();
  3846. }
  3847. private static void FixUpRevMarkIds(WordprocessingDocument wDoc)
  3848. {
  3849. var revMarksToChange = wDoc
  3850. .ContentParts()
  3851. .Select(cp => cp.GetXDocument())
  3852. .Select(xd => xd.Descendants().Where(d => d.Name == W.ins || d.Name == W.del))
  3853. .SelectMany(m => m);
  3854. var nextId = 0;
  3855. foreach (var item in revMarksToChange)
  3856. {
  3857. var idAtt = item.Attribute(W.id);
  3858. if (idAtt != null)
  3859. idAtt.Value = (nextId++).ToString();
  3860. }
  3861. foreach (var cp in wDoc.ContentParts())
  3862. cp.PutXDocument();
  3863. }
  3864. private static void FixUpShapeIds(WordprocessingDocument wDoc)
  3865. {
  3866. var elementToFind = VML.shape;
  3867. var shapeIdsToChange = wDoc
  3868. .ContentParts()
  3869. .Select(cp => cp.GetXDocument())
  3870. .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind))
  3871. .SelectMany(m => m);
  3872. var nextId = 1;
  3873. foreach (var item in shapeIdsToChange)
  3874. {
  3875. var thisId = nextId++;
  3876. var idAtt = item.Attribute("id");
  3877. if (idAtt != null)
  3878. idAtt.Value = thisId.ToString();
  3879. var oleObject = item.Parent.Element(O.OLEObject);
  3880. if (oleObject != null)
  3881. {
  3882. var shapeIdAtt = oleObject.Attribute("ShapeID");
  3883. if (shapeIdAtt != null)
  3884. shapeIdAtt.Value = thisId.ToString();
  3885. }
  3886. }
  3887. foreach (var cp in wDoc.ContentParts())
  3888. cp.PutXDocument();
  3889. }
  3890. private static void FixUpGroupIds(WordprocessingDocument wDoc)
  3891. {
  3892. var elementToFind = VML.group;
  3893. var groupIdsToChange = wDoc
  3894. .ContentParts()
  3895. .Select(cp => cp.GetXDocument())
  3896. .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind))
  3897. .SelectMany(m => m);
  3898. var nextId = 1;
  3899. foreach (var item in groupIdsToChange)
  3900. {
  3901. var thisId = nextId++;
  3902. var idAtt = item.Attribute("id");
  3903. if (idAtt != null)
  3904. idAtt.Value = thisId.ToString();
  3905. }
  3906. foreach (var cp in wDoc.ContentParts())
  3907. cp.PutXDocument();
  3908. }
  3909. private static void FixUpShapeTypeIds(WordprocessingDocument wDoc)
  3910. {
  3911. var elementToFind = VML.shapetype;
  3912. var shapeTypeIdsToChange = wDoc
  3913. .ContentParts()
  3914. .Select(cp => cp.GetXDocument())
  3915. .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind))
  3916. .SelectMany(m => m);
  3917. var nextId = 1;
  3918. foreach (var item in shapeTypeIdsToChange)
  3919. {
  3920. var thisId = nextId++;
  3921. var idAtt = item.Attribute("id");
  3922. if (idAtt != null)
  3923. idAtt.Value = thisId.ToString();
  3924. var shape = item.Parent.Element(VML.shape);
  3925. if (shape != null)
  3926. {
  3927. var typeAtt = shape.Attribute("type");
  3928. if (typeAtt != null)
  3929. typeAtt.Value = thisId.ToString();
  3930. }
  3931. }
  3932. foreach (var cp in wDoc.ContentParts())
  3933. cp.PutXDocument();
  3934. }
  3935. private static object CoalesceRecurse(OpenXmlPart part, IEnumerable<ComparisonUnitAtom> list, int level, WmlComparerSettings settings)
  3936. {
  3937. var grouped = list.GroupBy(ca =>
  3938. {
  3939. if (level >= ca.AncestorElements.Length)
  3940. return "";
  3941. return ca.AncestorUnids[level];
  3942. })
  3943. .Where(g => g.Key != "");
  3944. // if there are no deeper children, then we're done.
  3945. if (!grouped.Any())
  3946. return null;
  3947. if (s_False)
  3948. {
  3949. var sb = new StringBuilder();
  3950. foreach (var group in grouped)
  3951. {
  3952. sb.AppendFormat("Group Key: {0}", group.Key);
  3953. sb.Append(Environment.NewLine);
  3954. foreach (var groupChildItem in group)
  3955. {
  3956. sb.Append(" ");
  3957. sb.Append(groupChildItem.ToString(0));
  3958. sb.Append(Environment.NewLine);
  3959. }
  3960. sb.Append(Environment.NewLine);
  3961. }
  3962. var sbs = sb.ToString();
  3963. TestUtil.NotePad(sbs);
  3964. }
  3965. var elementList = grouped
  3966. .Select(g =>
  3967. {
  3968. var ancestorBeingConstructed = g.First().AncestorElements[level]; // these will all be the same, by definition
  3969. // need to group by corr stat
  3970. var groupedChildren = g
  3971. .GroupAdjacent(gc =>
  3972. {
  3973. var key = "";
  3974. if (level < (gc.AncestorElements.Length - 1))
  3975. {
  3976. key = gc.AncestorUnids[level + 1];
  3977. }
  3978. if (gc.AncestorElements.Skip(level).Any(ae => ae.Name == W.txbxContent))
  3979. key += "|" + CorrelationStatus.Equal.ToString();
  3980. else
  3981. key += "|" + gc.CorrelationStatus.ToString();
  3982. return key;
  3983. })
  3984. .ToList();
  3985. if (ancestorBeingConstructed.Name == W.p)
  3986. {
  3987. var newChildElements = groupedChildren
  3988. .Select(gc =>
  3989. {
  3990. var spl = gc.Key.Split('|');
  3991. if (spl[0] == "")
  3992. return (object)gc.Select(gcc =>
  3993. {
  3994. var dup = new XElement(gcc.ContentElement);
  3995. if (spl[1] == "Deleted")
  3996. dup.Add(new XAttribute(PtOpenXml.Status, "Deleted"));
  3997. else if (spl[1] == "Inserted")
  3998. dup.Add(new XAttribute(PtOpenXml.Status, "Inserted"));
  3999. return dup;
  4000. });
  4001. else
  4002. {
  4003. return CoalesceRecurse(part, gc, level + 1, settings);
  4004. }
  4005. })
  4006. .ToList();
  4007. var newPara = new XElement(W.p,
  4008. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  4009. new XAttribute(PtOpenXml.Unid, g.Key),
  4010. newChildElements);
  4011. return newPara;
  4012. }
  4013. if (ancestorBeingConstructed.Name == W.r)
  4014. {
  4015. var newChildElements = groupedChildren
  4016. .Select(gc =>
  4017. {
  4018. var spl = gc.Key.Split('|');
  4019. if (spl[0] == "")
  4020. return (object)gc.Select(gcc =>
  4021. {
  4022. var dup = new XElement(gcc.ContentElement);
  4023. if (spl[1] == "Deleted")
  4024. dup.Add(new XAttribute(PtOpenXml.Status, "Deleted"));
  4025. else if (spl[1] == "Inserted")
  4026. dup.Add(new XAttribute(PtOpenXml.Status, "Inserted"));
  4027. return dup;
  4028. });
  4029. else
  4030. {
  4031. return CoalesceRecurse(part, gc, level + 1, settings);
  4032. }
  4033. })
  4034. .ToList();
  4035. XElement rPr = ancestorBeingConstructed.Element(W.rPr);
  4036. var newRun = new XElement(W.r,
  4037. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  4038. rPr,
  4039. newChildElements);
  4040. return newRun;
  4041. }
  4042. if (ancestorBeingConstructed.Name == W.t)
  4043. {
  4044. var newChildElements = groupedChildren
  4045. .Select(gc =>
  4046. {
  4047. var textOfTextElement = gc.Select(gce => gce.ContentElement.Value).StringConcatenate();
  4048. var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  4049. var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  4050. if (del)
  4051. return (object)(new XElement(W.delText,
  4052. new XAttribute(PtOpenXml.Status, "Deleted"),
  4053. GetXmlSpaceAttribute(textOfTextElement),
  4054. textOfTextElement));
  4055. else if (ins)
  4056. return (object)(new XElement(W.t,
  4057. new XAttribute(PtOpenXml.Status, "Inserted"),
  4058. GetXmlSpaceAttribute(textOfTextElement),
  4059. textOfTextElement));
  4060. else
  4061. return (object)(new XElement(W.t,
  4062. GetXmlSpaceAttribute(textOfTextElement),
  4063. textOfTextElement));
  4064. })
  4065. .ToList();
  4066. return newChildElements;
  4067. }
  4068. if (ancestorBeingConstructed.Name == W.drawing)
  4069. {
  4070. var newChildElements = groupedChildren
  4071. .Select(gc =>
  4072. {
  4073. var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  4074. var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  4075. if (del)
  4076. {
  4077. return (object)gc.Select(gcc =>
  4078. {
  4079. var newDrawing = new XElement(gcc.ContentElement);
  4080. newDrawing.Add(new XAttribute(PtOpenXml.Status, "Deleted"));
  4081. var openXmlPartOfDeletedContent = gc.First().Part;
  4082. var openXmlPartInNewDocument = part;
  4083. return gc.Select(gce =>
  4084. {
  4085. Package packageOfDeletedContent = openXmlPartOfDeletedContent.OpenXmlPackage.Package;
  4086. Package packageOfNewContent = openXmlPartInNewDocument.OpenXmlPackage.Package;
  4087. PackagePart partInDeletedDocument = packageOfDeletedContent.GetPart(part.Uri);
  4088. PackagePart partInNewDocument = packageOfNewContent.GetPart(part.Uri);
  4089. return MoveRelatedPartsToDestination(partInDeletedDocument, partInNewDocument, newDrawing);
  4090. });
  4091. });
  4092. }
  4093. else if (ins)
  4094. {
  4095. return gc.Select(gcc =>
  4096. {
  4097. var newDrawing = new XElement(gcc.ContentElement);
  4098. newDrawing.Add(new XAttribute(PtOpenXml.Status, "Inserted"));
  4099. var openXmlPartOfInsertedContent = gc.First().Part;
  4100. var openXmlPartInNewDocument = part;
  4101. return gc.Select(gce =>
  4102. {
  4103. Package packageOfSourceContent = openXmlPartOfInsertedContent.OpenXmlPackage.Package;
  4104. Package packageOfNewContent = openXmlPartInNewDocument.OpenXmlPackage.Package;
  4105. PackagePart partInDeletedDocument = packageOfSourceContent.GetPart(part.Uri);
  4106. PackagePart partInNewDocument = packageOfNewContent.GetPart(part.Uri);
  4107. return MoveRelatedPartsToDestination(partInDeletedDocument, partInNewDocument, newDrawing);
  4108. });
  4109. });
  4110. }
  4111. else
  4112. {
  4113. return gc.Select(gcc =>
  4114. {
  4115. return gcc.ContentElement;
  4116. });
  4117. }
  4118. })
  4119. .ToList();
  4120. return newChildElements;
  4121. }
  4122. if (ancestorBeingConstructed.Name == M.oMath || ancestorBeingConstructed.Name == M.oMathPara)
  4123. {
  4124. var newChildElements = groupedChildren
  4125. .Select(gc =>
  4126. {
  4127. var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  4128. var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  4129. if (del)
  4130. {
  4131. return gc.Select(gcc =>
  4132. {
  4133. return new XElement(W.del,
  4134. new XAttribute(W.author, settings.AuthorForRevisions),
  4135. new XAttribute(W.id, s_MaxId++),
  4136. new XAttribute(W.date, settings.DateTimeForRevisions),
  4137. gcc.ContentElement);
  4138. });
  4139. }
  4140. else if (ins)
  4141. {
  4142. return gc.Select(gcc =>
  4143. {
  4144. return new XElement(W.ins,
  4145. new XAttribute(W.author, settings.AuthorForRevisions),
  4146. new XAttribute(W.id, s_MaxId++),
  4147. new XAttribute(W.date, settings.DateTimeForRevisions),
  4148. gcc.ContentElement);
  4149. });
  4150. }
  4151. else
  4152. {
  4153. return gc.Select(gcc => gcc.ContentElement);
  4154. }
  4155. })
  4156. .ToList();
  4157. return newChildElements;
  4158. }
  4159. if (AllowableRunChildren.Contains(ancestorBeingConstructed.Name))
  4160. {
  4161. var newChildElements = groupedChildren
  4162. .Select(gc =>
  4163. {
  4164. var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  4165. var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  4166. if (del)
  4167. {
  4168. return gc.Select(gcc =>
  4169. {
  4170. var dup = new XElement(ancestorBeingConstructed.Name,
  4171. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  4172. new XAttribute(PtOpenXml.Status, "Deleted"));
  4173. return dup;
  4174. });
  4175. }
  4176. else if (ins)
  4177. {
  4178. return gc.Select(gcc =>
  4179. {
  4180. var dup = new XElement(ancestorBeingConstructed.Name,
  4181. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  4182. new XAttribute(PtOpenXml.Status, "Inserted"));
  4183. return dup;
  4184. });
  4185. }
  4186. else
  4187. {
  4188. return gc.Select(gcc => gcc.ContentElement);
  4189. }
  4190. })
  4191. .ToList();
  4192. return newChildElements;
  4193. }
  4194. if (ancestorBeingConstructed.Name == W.tbl)
  4195. return ReconstructElement(part, g, ancestorBeingConstructed, W.tblPr, W.tblGrid, null, level, settings);
  4196. if (ancestorBeingConstructed.Name == W.tr)
  4197. return ReconstructElement(part, g, ancestorBeingConstructed, W.trPr, null, null, level, settings);
  4198. if (ancestorBeingConstructed.Name == W.tc)
  4199. return ReconstructElement(part, g, ancestorBeingConstructed, W.tcPr, null, null, level, settings);
  4200. if (ancestorBeingConstructed.Name == W.sdt)
  4201. return ReconstructElement(part, g, ancestorBeingConstructed, W.sdtPr, W.sdtEndPr, null, level, settings);
  4202. if (ancestorBeingConstructed.Name == W.pict)
  4203. return ReconstructElement(part, g, ancestorBeingConstructed, VML.shapetype, null, null, level, settings);
  4204. if (ancestorBeingConstructed.Name == VML.shape)
  4205. return ReconstructElement(part, g, ancestorBeingConstructed, W10.wrap, null, null, level, settings);
  4206. if (ancestorBeingConstructed.Name == W._object)
  4207. return ReconstructElement(part, g, ancestorBeingConstructed, VML.shapetype, VML.shape, O.OLEObject, level, settings);
  4208. if (ancestorBeingConstructed.Name == W.ruby)
  4209. return ReconstructElement(part, g, ancestorBeingConstructed, W.rubyPr, null, null, level, settings);
  4210. return (object)ReconstructElement(part, g, ancestorBeingConstructed, null, null, null, level, settings);
  4211. })
  4212. .ToList();
  4213. return elementList;
  4214. }
  4215. private static XElement MoveRelatedPartsToDestination(PackagePart partOfDeletedContent, PackagePart partInNewDocument,
  4216. XElement contentElement)
  4217. {
  4218. var elementsToUpdate = contentElement
  4219. .Descendants()
  4220. .Where(d => d.Attributes().Any(a => ComparisonUnitWord.s_RelationshipAttributeNames.Contains(a.Name)))
  4221. .ToList();
  4222. foreach (var element in elementsToUpdate)
  4223. {
  4224. var attributesToUpdate = element
  4225. .Attributes()
  4226. .Where(a => ComparisonUnitWord.s_RelationshipAttributeNames.Contains(a.Name))
  4227. .ToList();
  4228. foreach (var att in attributesToUpdate)
  4229. {
  4230. var rId = (string)att;
  4231. var relationshipForDeletedPart = partOfDeletedContent.GetRelationship(rId);
  4232. if (relationshipForDeletedPart == null)
  4233. throw new FileFormatException("Invalid document");
  4234. Uri targetUri = PackUriHelper
  4235. .ResolvePartUri(
  4236. new Uri(partOfDeletedContent.Uri.ToString(), UriKind.Relative),
  4237. relationshipForDeletedPart.TargetUri);
  4238. var relatedPackagePart = partOfDeletedContent.Package.GetPart(targetUri);
  4239. var uriSplit = relatedPackagePart.Uri.ToString().Split('/');
  4240. var last = uriSplit[uriSplit.Length - 1].Split('.');
  4241. string uriString = null;
  4242. if (last.Length == 2)
  4243. {
  4244. uriString = uriSplit.SkipLast(1).Select(p => p + "/").StringConcatenate() +
  4245. "P" + Guid.NewGuid().ToString().Replace("-", "") + "." + last[1];
  4246. }
  4247. else
  4248. {
  4249. uriString = uriSplit.SkipLast(1).Select(p => p + "/").StringConcatenate() +
  4250. "P" + Guid.NewGuid().ToString().Replace("-", "");
  4251. }
  4252. Uri uri = null;
  4253. if (relatedPackagePart.Uri.IsAbsoluteUri)
  4254. uri = new Uri(uriString, UriKind.Absolute);
  4255. else
  4256. uri = new Uri(uriString, UriKind.Relative);
  4257. var newPart = partInNewDocument.Package.CreatePart(uri, relatedPackagePart.ContentType);
  4258. using (var oldPartStream = relatedPackagePart.GetStream())
  4259. using (var newPartStream = newPart.GetStream())
  4260. FileUtils.CopyStream(oldPartStream, newPartStream);
  4261. var newRid = "R" + Guid.NewGuid().ToString().Replace("-", "");
  4262. partInNewDocument.CreateRelationship(newPart.Uri, TargetMode.Internal, relationshipForDeletedPart.RelationshipType, newRid);
  4263. att.Value = newRid;
  4264. if (newPart.ContentType.EndsWith("xml"))
  4265. {
  4266. XDocument newPartXDoc = null;
  4267. using (var stream = newPart.GetStream())
  4268. {
  4269. newPartXDoc = XDocument.Load(stream);
  4270. MoveRelatedPartsToDestination(relatedPackagePart, newPart, newPartXDoc.Root);
  4271. }
  4272. using (var stream = newPart.GetStream())
  4273. newPartXDoc.Save(stream);
  4274. }
  4275. }
  4276. }
  4277. return contentElement;
  4278. }
  4279. private static XAttribute GetXmlSpaceAttribute(string textOfTextElement)
  4280. {
  4281. if (char.IsWhiteSpace(textOfTextElement[0]) ||
  4282. char.IsWhiteSpace(textOfTextElement[textOfTextElement.Length - 1]))
  4283. return new XAttribute(XNamespace.Xml + "space", "preserve");
  4284. return null;
  4285. }
  4286. private static XElement ReconstructElement(OpenXmlPart part, IGrouping<string, ComparisonUnitAtom> g, XElement ancestorBeingConstructed, XName props1XName,
  4287. XName props2XName, XName props3XName, int level, WmlComparerSettings settings)
  4288. {
  4289. var newChildElements = CoalesceRecurse(part, g, level + 1, settings);
  4290. object props1 = null;
  4291. if (props1XName != null)
  4292. props1 = ancestorBeingConstructed.Elements(props1XName);
  4293. object props2 = null;
  4294. if (props2XName != null)
  4295. props2 = ancestorBeingConstructed.Elements(props2XName);
  4296. object props3 = null;
  4297. if (props3XName != null)
  4298. props3 = ancestorBeingConstructed.Elements(props3XName);
  4299. var reconstructedElement = new XElement(ancestorBeingConstructed.Name,
  4300. ancestorBeingConstructed.Attributes(),
  4301. props1, props2, props3, newChildElements);
  4302. return reconstructedElement;
  4303. }
  4304. private static List<CorrelatedSequence> Lcs(ComparisonUnit[] cu1, ComparisonUnit[] cu2, WmlComparerSettings settings)
  4305. {
  4306. // set up initial state - one CorrelatedSequence, UnKnown, contents == entire sequences (both)
  4307. CorrelatedSequence cs = new CorrelatedSequence()
  4308. {
  4309. CorrelationStatus = CorrelationStatus.Unknown,
  4310. ComparisonUnitArray1 = cu1,
  4311. ComparisonUnitArray2 = cu2,
  4312. };
  4313. List<CorrelatedSequence> csList = new List<CorrelatedSequence>()
  4314. {
  4315. cs
  4316. };
  4317. while (true)
  4318. {
  4319. if (s_False)
  4320. {
  4321. var sb = new StringBuilder();
  4322. foreach (var item in csList)
  4323. sb.Append(item.ToString()).Append(Environment.NewLine);
  4324. var sbs = sb.ToString();
  4325. TestUtil.NotePad(sbs);
  4326. }
  4327. var unknown = csList
  4328. .FirstOrDefault(z => z.CorrelationStatus == CorrelationStatus.Unknown);
  4329. if (unknown != null)
  4330. {
  4331. // if unknown consists of a single group of the same type in each side, then can set some Unids in the 'after' document.
  4332. // if the unknown is a pair of single tables, then can set table Unid.
  4333. // if the unknown is a pair of single rows, then can set table and rows Unids.
  4334. // if the unknown is a pair of single cells, then can set table, row, and cell Unids.
  4335. // if the unknown is a pair of paragraphs, then can set paragraph (and all ancestor) Unids.
  4336. SetAfterUnids(unknown);
  4337. if (s_False)
  4338. {
  4339. var sb = new StringBuilder();
  4340. sb.Append(unknown.ToString());
  4341. var sbs = sb.ToString();
  4342. TestUtil.NotePad(sbs);
  4343. }
  4344. List<CorrelatedSequence> newSequence = ProcessCorrelatedHashes(unknown, settings);
  4345. if (newSequence == null)
  4346. {
  4347. newSequence = FindCommonAtBeginningAndEnd(unknown, settings);
  4348. if (newSequence == null)
  4349. {
  4350. newSequence = DoLcsAlgorithm(unknown, settings);
  4351. }
  4352. }
  4353. var indexOfUnknown = csList.IndexOf(unknown);
  4354. csList.Remove(unknown);
  4355. newSequence.Reverse();
  4356. foreach (var item in newSequence)
  4357. csList.Insert(indexOfUnknown, item);
  4358. continue;
  4359. }
  4360. return csList;
  4361. }
  4362. }
  4363. private static void SetAfterUnids(CorrelatedSequence unknown)
  4364. {
  4365. if (unknown.ComparisonUnitArray1.Length == 1 && unknown.ComparisonUnitArray2.Length == 1)
  4366. {
  4367. var cua1 = unknown.ComparisonUnitArray1[0] as ComparisonUnitGroup;
  4368. var cua2 = unknown.ComparisonUnitArray2[0] as ComparisonUnitGroup;
  4369. if (cua1 != null &&
  4370. cua2 != null &&
  4371. cua1.ComparisonUnitGroupType == cua2.ComparisonUnitGroupType)
  4372. {
  4373. var groupType = cua1.ComparisonUnitGroupType;
  4374. var da1 = cua1.DescendantContentAtoms();
  4375. var da2 = cua2.DescendantContentAtoms();
  4376. XName takeThruName = null;
  4377. switch (groupType)
  4378. {
  4379. case ComparisonUnitGroupType.Paragraph:
  4380. takeThruName = W.p;
  4381. break;
  4382. case ComparisonUnitGroupType.Table:
  4383. takeThruName = W.tbl;
  4384. break;
  4385. case ComparisonUnitGroupType.Row:
  4386. takeThruName = W.tr;
  4387. break;
  4388. case ComparisonUnitGroupType.Cell:
  4389. takeThruName = W.tc;
  4390. break;
  4391. case ComparisonUnitGroupType.Textbox:
  4392. takeThruName = W.txbxContent;
  4393. break;
  4394. }
  4395. if (takeThruName == null)
  4396. throw new OpenXmlPowerToolsException("Internal error");
  4397. var relevantAncestors = new List<XElement>();
  4398. foreach (var ae in da1.First().AncestorElements)
  4399. {
  4400. if (ae.Name != takeThruName)
  4401. {
  4402. relevantAncestors.Add(ae);
  4403. continue;
  4404. }
  4405. relevantAncestors.Add(ae);
  4406. break;
  4407. }
  4408. var unidList = relevantAncestors
  4409. .Select(a =>
  4410. {
  4411. var unid = (string)a.Attribute(PtOpenXml.Unid);
  4412. if (unid == null)
  4413. throw new OpenXmlPowerToolsException("Internal error");
  4414. return unid;
  4415. })
  4416. .ToArray();
  4417. foreach (var da in da2)
  4418. {
  4419. var ancestorsToSet = da.AncestorElements.Take(unidList.Length);
  4420. var zipped = ancestorsToSet.Zip(unidList, (a, u) =>
  4421. new
  4422. {
  4423. Ancestor = a,
  4424. Unid = u,
  4425. });
  4426. foreach (var z in zipped)
  4427. {
  4428. var unid = z.Ancestor.Attribute(PtOpenXml.Unid);
  4429. if (z.Ancestor.Name == W.footnotes || z.Ancestor.Name == W.endnotes)
  4430. continue;
  4431. if (unid == null)
  4432. throw new OpenXmlPowerToolsException("Internal error");
  4433. unid.Value = z.Unid;
  4434. }
  4435. }
  4436. }
  4437. }
  4438. }
  4439. private static List<CorrelatedSequence> ProcessCorrelatedHashes(CorrelatedSequence unknown, WmlComparerSettings settings)
  4440. {
  4441. // never attempt this optimization if there are less than 3 groups
  4442. var maxd = Math.Min(unknown.ComparisonUnitArray1.Length, unknown.ComparisonUnitArray2.Length);
  4443. if (maxd < 3)
  4444. return null;
  4445. var firstInCu1 = unknown.ComparisonUnitArray1.FirstOrDefault() as ComparisonUnitGroup;
  4446. var firstInCu2 = unknown.ComparisonUnitArray2.FirstOrDefault() as ComparisonUnitGroup;
  4447. if (firstInCu1 != null && firstInCu2 != null)
  4448. {
  4449. if ((firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph ||
  4450. firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Table ||
  4451. firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) &&
  4452. (firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph ||
  4453. firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Table ||
  4454. firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Row))
  4455. {
  4456. var groupType = firstInCu1.ComparisonUnitGroupType;
  4457. // Next want to do the lcs algorithm on this.
  4458. // potentially, we will find all paragraphs are correlated, but they may not be for two reasons-
  4459. // - if there were changes that were not tracked
  4460. // - if the anomolies in the change tracking cause there to be a mismatch in the number of paragraphs
  4461. // therefore we are going to do the whole LCS algorithm thing
  4462. // and at the end of the process, we set up the correlated sequence list where correlated paragraphs are together in their
  4463. // own unknown correlated sequence.
  4464. var cul1 = unknown.ComparisonUnitArray1;
  4465. var cul2 = unknown.ComparisonUnitArray2;
  4466. int currentLongestCommonSequenceLength = 0;
  4467. int currentLongestCommonSequenceAtomCount = 0;
  4468. int currentI1 = -1;
  4469. int currentI2 = -1;
  4470. for (int i1 = 0; i1 < cul1.Length; i1++)
  4471. {
  4472. for (int i2 = 0; i2 < cul2.Length; i2++)
  4473. {
  4474. var thisSequenceLength = 0;
  4475. var thisSequenceAtomCount = 0;
  4476. var thisI1 = i1;
  4477. var thisI2 = i2;
  4478. while (true)
  4479. {
  4480. var group1 = cul1[thisI1] as ComparisonUnitGroup;
  4481. var group2 = cul2[thisI2] as ComparisonUnitGroup;
  4482. bool match = group1 != null &&
  4483. group2 != null &&
  4484. group1.ComparisonUnitGroupType == group2.ComparisonUnitGroupType &&
  4485. group1.CorrelatedSHA1Hash != null &&
  4486. group2.CorrelatedSHA1Hash != null &&
  4487. group1.CorrelatedSHA1Hash == group2.CorrelatedSHA1Hash;
  4488. if (match)
  4489. {
  4490. thisSequenceAtomCount += cul1[thisI1].DescendantContentAtomsCount;
  4491. thisI1++;
  4492. thisI2++;
  4493. thisSequenceLength++;
  4494. if (thisI1 == cul1.Length || thisI2 == cul2.Length)
  4495. {
  4496. if (thisSequenceAtomCount > currentLongestCommonSequenceAtomCount)
  4497. {
  4498. currentLongestCommonSequenceLength = thisSequenceLength;
  4499. currentLongestCommonSequenceAtomCount = thisSequenceAtomCount;
  4500. currentI1 = i1;
  4501. currentI2 = i2;
  4502. }
  4503. break;
  4504. }
  4505. continue;
  4506. }
  4507. else
  4508. {
  4509. if (thisSequenceAtomCount > currentLongestCommonSequenceAtomCount)
  4510. {
  4511. currentLongestCommonSequenceLength = thisSequenceLength;
  4512. currentLongestCommonSequenceAtomCount = thisSequenceAtomCount;
  4513. currentI1 = i1;
  4514. currentI2 = i2;
  4515. }
  4516. break;
  4517. }
  4518. }
  4519. }
  4520. }
  4521. // here we want to have some sort of threshold, and if the currentLongestCommonSequenceLength is not longer than the threshold, then don't do anything
  4522. bool doCorrelation = false;
  4523. if (currentLongestCommonSequenceLength == 1)
  4524. {
  4525. var numberOfAtoms1 = unknown.ComparisonUnitArray1[currentI1].DescendantContentAtoms().Count();
  4526. var numberOfAtoms2 = unknown.ComparisonUnitArray2[currentI2].DescendantContentAtoms().Count();
  4527. if (numberOfAtoms1 > 16 && numberOfAtoms2 > 16)
  4528. doCorrelation = true;
  4529. }
  4530. else if (currentLongestCommonSequenceLength > 1 && currentLongestCommonSequenceLength <= 3)
  4531. {
  4532. var numberOfAtoms1 = unknown.ComparisonUnitArray1.Skip(currentI1).Take(currentLongestCommonSequenceLength).Select(z => z.DescendantContentAtoms().Count()).Sum();
  4533. var numberOfAtoms2 = unknown.ComparisonUnitArray2.Skip(currentI2).Take(currentLongestCommonSequenceLength).Select(z => z.DescendantContentAtoms().Count()).Sum();
  4534. if (numberOfAtoms1 > 32 && numberOfAtoms2 > 32)
  4535. doCorrelation = true;
  4536. }
  4537. else if (currentLongestCommonSequenceLength > 3)
  4538. doCorrelation = true;
  4539. if (doCorrelation)
  4540. {
  4541. var newListOfCorrelatedSequence = new List<CorrelatedSequence>();
  4542. if (currentI1 > 0 && currentI2 == 0)
  4543. {
  4544. var deletedCorrelatedSequence = new CorrelatedSequence();
  4545. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  4546. deletedCorrelatedSequence.ComparisonUnitArray1 = cul1
  4547. .Take(currentI1)
  4548. .ToArray();
  4549. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  4550. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  4551. }
  4552. else if (currentI1 == 0 && currentI2 > 0)
  4553. {
  4554. var insertedCorrelatedSequence = new CorrelatedSequence();
  4555. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  4556. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  4557. insertedCorrelatedSequence.ComparisonUnitArray2 = cul2
  4558. .Take(currentI2)
  4559. .ToArray();
  4560. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  4561. }
  4562. else if (currentI1 > 0 && currentI2 > 0)
  4563. {
  4564. var unknownCorrelatedSequence = new CorrelatedSequence();
  4565. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  4566. unknownCorrelatedSequence.ComparisonUnitArray1 = cul1
  4567. .Take(currentI1)
  4568. .ToArray();
  4569. unknownCorrelatedSequence.ComparisonUnitArray2 = cul2
  4570. .Take(currentI2)
  4571. .ToArray();
  4572. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  4573. }
  4574. else if (currentI1 == 0 && currentI2 == 0)
  4575. {
  4576. // nothing to do
  4577. }
  4578. for (int i = 0; i < currentLongestCommonSequenceLength; i++)
  4579. {
  4580. var unknownCorrelatedSequence = new CorrelatedSequence();
  4581. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  4582. unknownCorrelatedSequence.ComparisonUnitArray1 = cul1
  4583. .Skip(currentI1)
  4584. .Skip(i)
  4585. .Take(1)
  4586. .ToArray();
  4587. unknownCorrelatedSequence.ComparisonUnitArray2 = cul2
  4588. .Skip(currentI2)
  4589. .Skip(i)
  4590. .Take(1)
  4591. .ToArray();
  4592. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  4593. }
  4594. int endI1 = currentI1 + currentLongestCommonSequenceLength;
  4595. int endI2 = currentI2 + currentLongestCommonSequenceLength;
  4596. if (endI1 < cul1.Length && endI2 == cul2.Length)
  4597. {
  4598. var deletedCorrelatedSequence = new CorrelatedSequence();
  4599. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  4600. deletedCorrelatedSequence.ComparisonUnitArray1 = cul1
  4601. .Skip(endI1)
  4602. .ToArray();
  4603. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  4604. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  4605. }
  4606. else if (endI1 == cul1.Length && endI2 < cul2.Length)
  4607. {
  4608. var insertedCorrelatedSequence = new CorrelatedSequence();
  4609. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  4610. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  4611. insertedCorrelatedSequence.ComparisonUnitArray2 = cul2
  4612. .Skip(endI2)
  4613. .ToArray();
  4614. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  4615. }
  4616. else if (endI1 < cul1.Length && endI2 < cul2.Length)
  4617. {
  4618. var unknownCorrelatedSequence = new CorrelatedSequence();
  4619. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  4620. unknownCorrelatedSequence.ComparisonUnitArray1 = cul1
  4621. .Skip(endI1)
  4622. .ToArray();
  4623. unknownCorrelatedSequence.ComparisonUnitArray2 = cul2
  4624. .Skip(endI2)
  4625. .ToArray();
  4626. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  4627. }
  4628. else if (endI1 == cul1.Length && endI2 == cul2.Length)
  4629. {
  4630. // nothing to do
  4631. }
  4632. return newListOfCorrelatedSequence;
  4633. }
  4634. return null;
  4635. }
  4636. }
  4637. return null;
  4638. }
  4639. private static List<CorrelatedSequence> DoLcsAlgorithm(CorrelatedSequence unknown, WmlComparerSettings settings)
  4640. {
  4641. var newListOfCorrelatedSequence = new List<CorrelatedSequence>();
  4642. var cul1 = unknown.ComparisonUnitArray1;
  4643. var cul2 = unknown.ComparisonUnitArray2;
  4644. // first thing to do - if we have an unknown with zero length on left or right side, create appropriate
  4645. // this is a code optimization that enables easier processing of cases elsewhere.
  4646. if (cul1.Length > 0 && cul2.Length == 0)
  4647. {
  4648. var deletedCorrelatedSequence = new CorrelatedSequence();
  4649. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  4650. deletedCorrelatedSequence.ComparisonUnitArray1 = cul1;
  4651. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  4652. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  4653. return newListOfCorrelatedSequence;
  4654. }
  4655. else if (cul1.Length == 0 && cul2.Length > 0)
  4656. {
  4657. var insertedCorrelatedSequence = new CorrelatedSequence();
  4658. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  4659. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  4660. insertedCorrelatedSequence.ComparisonUnitArray2 = cul2;
  4661. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  4662. return newListOfCorrelatedSequence;
  4663. }
  4664. else if (cul1.Length == 0 && cul2.Length == 0)
  4665. {
  4666. return newListOfCorrelatedSequence; // this will effectively remove the unknown with no data on either side from the current data model.
  4667. }
  4668. int currentLongestCommonSequenceLength = 0;
  4669. int currentI1 = -1;
  4670. int currentI2 = -1;
  4671. for (int i1 = 0; i1 < cul1.Length - currentLongestCommonSequenceLength; i1++)
  4672. {
  4673. for (int i2 = 0; i2 < cul2.Length - currentLongestCommonSequenceLength; i2++)
  4674. {
  4675. var thisSequenceLength = 0;
  4676. var thisI1 = i1;
  4677. var thisI2 = i2;
  4678. while (true)
  4679. {
  4680. if (cul1[thisI1].SHA1Hash == cul2[thisI2].SHA1Hash)
  4681. {
  4682. thisI1++;
  4683. thisI2++;
  4684. thisSequenceLength++;
  4685. if (thisI1 == cul1.Length || thisI2 == cul2.Length)
  4686. {
  4687. if (thisSequenceLength > currentLongestCommonSequenceLength)
  4688. {
  4689. currentLongestCommonSequenceLength = thisSequenceLength;
  4690. currentI1 = i1;
  4691. currentI2 = i2;
  4692. }
  4693. break;
  4694. }
  4695. continue;
  4696. }
  4697. else
  4698. {
  4699. if (thisSequenceLength > currentLongestCommonSequenceLength)
  4700. {
  4701. currentLongestCommonSequenceLength = thisSequenceLength;
  4702. currentI1 = i1;
  4703. currentI2 = i2;
  4704. }
  4705. break;
  4706. }
  4707. }
  4708. }
  4709. }
  4710. // never start a common section with a paragraph mark.
  4711. while (true)
  4712. {
  4713. if (currentLongestCommonSequenceLength <= 1)
  4714. break;
  4715. var firstCommon = cul1[currentI1];
  4716. var firstCommonWord = firstCommon as ComparisonUnitWord;
  4717. if (firstCommonWord == null)
  4718. break;
  4719. // if the word contains more than one atom, then not a paragraph mark
  4720. if (firstCommonWord.Contents.Count() != 1)
  4721. break;
  4722. var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom;
  4723. if (firstCommonAtom == null)
  4724. break;
  4725. if (firstCommonAtom.ContentElement.Name != W.pPr)
  4726. break;
  4727. --currentLongestCommonSequenceLength;
  4728. if (currentLongestCommonSequenceLength == 0)
  4729. {
  4730. currentI1 = -1;
  4731. currentI2 = -1;
  4732. }
  4733. else
  4734. {
  4735. ++currentI1;
  4736. ++currentI2;
  4737. }
  4738. }
  4739. bool isOnlyParagraphMark = false;
  4740. if (currentLongestCommonSequenceLength == 1)
  4741. {
  4742. var firstCommon = cul1[currentI1];
  4743. var firstCommonWord = firstCommon as ComparisonUnitWord;
  4744. if (firstCommonWord != null)
  4745. {
  4746. // if the word contains more than one atom, then not a paragraph mark
  4747. if (firstCommonWord.Contents.Count() == 1)
  4748. {
  4749. var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom;
  4750. if (firstCommonAtom != null)
  4751. {
  4752. if (firstCommonAtom.ContentElement.Name == W.pPr)
  4753. isOnlyParagraphMark = true;
  4754. }
  4755. }
  4756. }
  4757. }
  4758. // don't match just a single character
  4759. if (currentLongestCommonSequenceLength == 1)
  4760. {
  4761. var cuw2 = cul2[currentI2] as ComparisonUnitAtom;
  4762. if (cuw2 != null)
  4763. {
  4764. if (cuw2.ContentElement.Name == W.t && cuw2.ContentElement.Value == " ")
  4765. {
  4766. currentI1 = -1;
  4767. currentI2 = -1;
  4768. currentLongestCommonSequenceLength = 0;
  4769. }
  4770. }
  4771. }
  4772. // don't match only word break characters
  4773. if (currentLongestCommonSequenceLength > 0 && currentLongestCommonSequenceLength <= 3)
  4774. {
  4775. var commonSequence = cul1.Skip(currentI1).Take(currentLongestCommonSequenceLength).ToArray();
  4776. // if they are all ComparisonUnitWord objects
  4777. var oneIsNotWord = commonSequence.Any(cs => (cs as ComparisonUnitWord) == null);
  4778. var allAreWords = !oneIsNotWord;
  4779. if (allAreWords)
  4780. {
  4781. var contentOtherThanWordSplitChars = commonSequence
  4782. .Cast<ComparisonUnitWord>()
  4783. .Any(cs =>
  4784. {
  4785. var otherThanText = cs.DescendantContentAtoms().Any(dca => dca.ContentElement.Name != W.t);
  4786. if (otherThanText)
  4787. return true;
  4788. var otherThanWordSplit = cs
  4789. .DescendantContentAtoms()
  4790. .Any(dca =>
  4791. {
  4792. var charValue = dca.ContentElement.Value;
  4793. var isWordSplit = settings.WordSeparators.Contains(charValue[0]);
  4794. if (isWordSplit)
  4795. return false;
  4796. return true;
  4797. });
  4798. return otherThanWordSplit;
  4799. });
  4800. if (!contentOtherThanWordSplitChars)
  4801. {
  4802. currentI1 = -1;
  4803. currentI2 = -1;
  4804. currentLongestCommonSequenceLength = 0;
  4805. }
  4806. }
  4807. }
  4808. // if we are only looking at text, and if the longest common subsequence is less than 15% of the whole, then forget it,
  4809. // don't find that LCS.
  4810. if (!isOnlyParagraphMark && currentLongestCommonSequenceLength > 0)
  4811. {
  4812. var anyButWord1 = cul1.Any(cu => (cu as ComparisonUnitWord) == null);
  4813. var anyButWord2 = cul2.Any(cu => (cu as ComparisonUnitWord) == null);
  4814. if (!anyButWord1 && !anyButWord2)
  4815. {
  4816. var maxLen = Math.Max(cul1.Length, cul2.Length);
  4817. if (((double)currentLongestCommonSequenceLength / (double)maxLen) < settings.DetailThreshold)
  4818. {
  4819. currentI1 = -1;
  4820. currentI2 = -1;
  4821. currentLongestCommonSequenceLength = 0;
  4822. }
  4823. }
  4824. }
  4825. if (currentI1 == -1 && currentI2 == -1)
  4826. {
  4827. var leftLength = unknown.ComparisonUnitArray1.Length;
  4828. var leftTables = unknown.ComparisonUnitArray1.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Table).Count();
  4829. var leftRows = unknown.ComparisonUnitArray1.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Row).Count();
  4830. var leftCells = unknown.ComparisonUnitArray1.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell).Count();
  4831. var leftParagraphs = unknown.ComparisonUnitArray1.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph).Count();
  4832. var leftTextboxes = unknown.ComparisonUnitArray1.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox).Count();
  4833. var leftWords = unknown.ComparisonUnitArray1.OfType<ComparisonUnitWord>().Count();
  4834. var rightLength = unknown.ComparisonUnitArray2.Length;
  4835. var rightTables = unknown.ComparisonUnitArray2.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Table).Count();
  4836. var rightRows = unknown.ComparisonUnitArray2.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Row).Count();
  4837. var rightCells = unknown.ComparisonUnitArray2.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell).Count();
  4838. var rightParagraphs = unknown.ComparisonUnitArray2.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph).Count();
  4839. var rightTextboxes = unknown.ComparisonUnitArray2.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox).Count();
  4840. var rightWords = unknown.ComparisonUnitArray2.OfType<ComparisonUnitWord>().Count();
  4841. // if either side has both words, rows and text boxes, then we need to separate out into separate unknown correlated sequences
  4842. // group adjacent based on whether word, row, or textbox
  4843. // in most cases, the count of groups will be the same, but they may differ
  4844. // if the first group on either side is word, then create a deleted or inserted corr sequ for it.
  4845. // then have counter on both sides pointing to the first matched pairs of rows
  4846. // create an unknown corr sequ for it.
  4847. // increment both counters
  4848. // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done.
  4849. // if both are at the end, then done
  4850. // return the new list of corr sequ
  4851. var leftOnlyWordsRowsTextboxes = leftLength == leftWords + leftRows + leftTextboxes;
  4852. var rightOnlyWordsRowsTextboxes = rightLength == rightWords + rightRows + rightTextboxes;
  4853. if ((leftWords > 0 || rightWords > 0) &&
  4854. (leftRows > 0 || rightRows > 0 || leftTextboxes > 0 || rightTextboxes > 0) &&
  4855. (leftOnlyWordsRowsTextboxes && rightOnlyWordsRowsTextboxes))
  4856. {
  4857. var leftGrouped = unknown
  4858. .ComparisonUnitArray1
  4859. .GroupAdjacent(cu =>
  4860. {
  4861. if (cu is ComparisonUnitWord)
  4862. {
  4863. return "Word";
  4864. }
  4865. else
  4866. {
  4867. var cug = cu as ComparisonUnitGroup;
  4868. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  4869. return "Row";
  4870. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox)
  4871. return "Textbox";
  4872. throw new OpenXmlPowerToolsException("Internal error");
  4873. }
  4874. })
  4875. .ToArray();
  4876. var rightGrouped = unknown
  4877. .ComparisonUnitArray2
  4878. .GroupAdjacent(cu =>
  4879. {
  4880. if (cu is ComparisonUnitWord)
  4881. {
  4882. return "Word";
  4883. }
  4884. else
  4885. {
  4886. var cug = cu as ComparisonUnitGroup;
  4887. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  4888. return "Row";
  4889. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox)
  4890. return "Textbox";
  4891. throw new OpenXmlPowerToolsException("Internal error");
  4892. }
  4893. })
  4894. .ToArray();
  4895. int iLeft = 0;
  4896. int iRight = 0;
  4897. // create an unknown corr sequ for it.
  4898. // increment both counters
  4899. // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done.
  4900. // if both are at the end, then done
  4901. // return the new list of corr sequ
  4902. while (true)
  4903. {
  4904. if (leftGrouped[iLeft].Key == rightGrouped[iRight].Key)
  4905. {
  4906. var unknownCorrelatedSequence = new CorrelatedSequence();
  4907. unknownCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray();
  4908. unknownCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray();
  4909. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  4910. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  4911. ++iLeft;
  4912. ++iRight;
  4913. }
  4914. // have to decide which of the following two branches to do first based on whether the left contains a paragraph mark
  4915. // i.e. cant insert a string of deleted text right before a table.
  4916. else if (leftGrouped[iLeft].Key == "Word" &&
  4917. leftGrouped[iLeft].Select(lg => lg.DescendantContentAtoms()).SelectMany(m => m).Last().ContentElement.Name != W.pPr &&
  4918. rightGrouped[iRight].Key == "Row")
  4919. {
  4920. var insertedCorrelatedSequence = new CorrelatedSequence();
  4921. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  4922. insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray();
  4923. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  4924. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  4925. ++iRight;
  4926. }
  4927. else if (rightGrouped[iRight].Key == "Word" &&
  4928. rightGrouped[iRight].Select(lg => lg.DescendantContentAtoms()).SelectMany(m => m).Last().ContentElement.Name != W.pPr &&
  4929. leftGrouped[iLeft].Key == "Row")
  4930. {
  4931. var insertedCorrelatedSequence = new CorrelatedSequence();
  4932. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  4933. insertedCorrelatedSequence.ComparisonUnitArray2 = leftGrouped[iLeft].ToArray();
  4934. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  4935. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  4936. ++iLeft;
  4937. }
  4938. else if (leftGrouped[iLeft].Key == "Word" && rightGrouped[iRight].Key != "Word")
  4939. {
  4940. var deletedCorrelatedSequence = new CorrelatedSequence();
  4941. deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray();
  4942. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  4943. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  4944. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  4945. ++iLeft;
  4946. }
  4947. else if (leftGrouped[iLeft].Key != "Word" && rightGrouped[iRight].Key == "Word")
  4948. {
  4949. var insertedCorrelatedSequence = new CorrelatedSequence();
  4950. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  4951. insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray();
  4952. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  4953. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  4954. ++iRight;
  4955. }
  4956. if (iLeft == leftGrouped.Length && iRight == rightGrouped.Length)
  4957. return newListOfCorrelatedSequence;
  4958. // if there is content on the left, but not content on the right
  4959. if (iRight == rightGrouped.Length)
  4960. {
  4961. for (int j = iLeft; j < leftGrouped.Length; j++)
  4962. {
  4963. var deletedCorrelatedSequence = new CorrelatedSequence();
  4964. deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[j].ToArray();
  4965. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  4966. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  4967. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  4968. }
  4969. return newListOfCorrelatedSequence;
  4970. }
  4971. // there is content on the right but not on the left
  4972. else if (iLeft == leftGrouped.Length)
  4973. {
  4974. for (int j = iRight; j < rightGrouped.Length; j++)
  4975. {
  4976. var insertedCorrelatedSequence = new CorrelatedSequence();
  4977. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  4978. insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[j].ToArray();
  4979. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  4980. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  4981. }
  4982. return newListOfCorrelatedSequence;
  4983. }
  4984. // else continue on next round.
  4985. }
  4986. }
  4987. // if both sides contain tables and paragraphs, then split into multiple unknown corr sequ
  4988. if (leftTables > 0 && rightTables > 0 &&
  4989. leftParagraphs > 0 && rightParagraphs > 0 &&
  4990. (leftLength > 1 || rightLength > 1))
  4991. {
  4992. var leftGrouped = unknown
  4993. .ComparisonUnitArray1
  4994. .GroupAdjacent(cu =>
  4995. {
  4996. var cug = cu as ComparisonUnitGroup;
  4997. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Table)
  4998. return "Table";
  4999. else
  5000. return "Para";
  5001. })
  5002. .ToArray();
  5003. var rightGrouped = unknown
  5004. .ComparisonUnitArray2
  5005. .GroupAdjacent(cu =>
  5006. {
  5007. var cug = cu as ComparisonUnitGroup;
  5008. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Table)
  5009. return "Table";
  5010. else
  5011. return "Para";
  5012. })
  5013. .ToArray();
  5014. int iLeft = 0;
  5015. int iRight = 0;
  5016. // create an unknown corr sequ for it.
  5017. // increment both counters
  5018. // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done.
  5019. // if both are at the end, then done
  5020. // return the new list of corr sequ
  5021. while (true)
  5022. {
  5023. if ((leftGrouped[iLeft].Key == "Table" && rightGrouped[iRight].Key == "Table") ||
  5024. (leftGrouped[iLeft].Key == "Para" && rightGrouped[iRight].Key == "Para"))
  5025. {
  5026. var unknownCorrelatedSequence = new CorrelatedSequence();
  5027. unknownCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray();
  5028. unknownCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray();
  5029. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5030. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  5031. ++iLeft;
  5032. ++iRight;
  5033. }
  5034. else if (leftGrouped[iLeft].Key == "Para" && rightGrouped[iRight].Key == "Table")
  5035. {
  5036. var deletedCorrelatedSequence = new CorrelatedSequence();
  5037. deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray();
  5038. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5039. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5040. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5041. ++iLeft;
  5042. }
  5043. else if (leftGrouped[iLeft].Key == "Table" && rightGrouped[iRight].Key == "Para")
  5044. {
  5045. var insertedCorrelatedSequence = new CorrelatedSequence();
  5046. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5047. insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray();
  5048. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5049. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5050. ++iRight;
  5051. }
  5052. if (iLeft == leftGrouped.Length && iRight == rightGrouped.Length)
  5053. return newListOfCorrelatedSequence;
  5054. // if there is content on the left, but not content on the right
  5055. if (iRight == rightGrouped.Length)
  5056. {
  5057. for (int j = iLeft; j < leftGrouped.Length; j++)
  5058. {
  5059. var deletedCorrelatedSequence = new CorrelatedSequence();
  5060. deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[j].ToArray();
  5061. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5062. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5063. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5064. }
  5065. return newListOfCorrelatedSequence;
  5066. }
  5067. // there is content on the right but not on the left
  5068. else if (iLeft == leftGrouped.Length)
  5069. {
  5070. for (int j = iRight; j < rightGrouped.Length; j++)
  5071. {
  5072. var insertedCorrelatedSequence = new CorrelatedSequence();
  5073. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5074. insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[j].ToArray();
  5075. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5076. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5077. }
  5078. return newListOfCorrelatedSequence;
  5079. }
  5080. // else continue on next round.
  5081. }
  5082. }
  5083. // If both sides consists of a single table, and if the table contains merged cells, then mark as deleted/inserted
  5084. if (leftTables == 1 && leftLength == 1 &&
  5085. rightTables == 1 && rightLength == 1)
  5086. {
  5087. var result = DoLcsAlgorithmForTable(unknown, settings);
  5088. if (result != null)
  5089. return result;
  5090. }
  5091. // If either side contains only paras or tables, then flatten and iterate.
  5092. var leftOnlyParasTablesTextboxes = leftLength == leftTables + leftParagraphs + leftTextboxes;
  5093. var rightOnlyParasTablesTextboxes = rightLength == rightTables + rightParagraphs + rightTextboxes;
  5094. if (leftOnlyParasTablesTextboxes && rightOnlyParasTablesTextboxes)
  5095. {
  5096. // flatten paras and tables, and iterate
  5097. var left = unknown
  5098. .ComparisonUnitArray1
  5099. .Select(cu => cu.Contents)
  5100. .SelectMany(m => m)
  5101. .ToArray();
  5102. var right = unknown
  5103. .ComparisonUnitArray2
  5104. .Select(cu => cu.Contents)
  5105. .SelectMany(m => m)
  5106. .ToArray();
  5107. var unknownCorrelatedSequence = new CorrelatedSequence();
  5108. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5109. unknownCorrelatedSequence.ComparisonUnitArray1 = left;
  5110. unknownCorrelatedSequence.ComparisonUnitArray2 = right;
  5111. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  5112. return newListOfCorrelatedSequence;
  5113. }
  5114. // if first of left is a row and first of right is a row
  5115. // then flatten the row to cells and iterate.
  5116. var firstLeft = unknown
  5117. .ComparisonUnitArray1
  5118. .FirstOrDefault() as ComparisonUnitGroup;
  5119. var firstRight = unknown
  5120. .ComparisonUnitArray2
  5121. .FirstOrDefault() as ComparisonUnitGroup;
  5122. if (firstLeft != null && firstRight != null)
  5123. {
  5124. if (firstLeft.ComparisonUnitGroupType == ComparisonUnitGroupType.Row &&
  5125. firstRight.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  5126. {
  5127. ComparisonUnit[] leftContent = firstLeft.Contents.ToArray();
  5128. ComparisonUnit[] rightContent = firstRight.Contents.ToArray();
  5129. var lenLeft = leftContent.Length;
  5130. var lenRight = rightContent.Length;
  5131. if (lenLeft < lenRight)
  5132. leftContent = leftContent.Concat(Enumerable.Repeat<ComparisonUnit>(null, lenRight - lenLeft)).ToArray();
  5133. else if (lenRight < lenLeft)
  5134. rightContent = rightContent.Concat(Enumerable.Repeat<ComparisonUnit>(null, lenLeft - lenRight)).ToArray();
  5135. List<CorrelatedSequence> newCs = leftContent.Zip(rightContent, (l, r) =>
  5136. {
  5137. if (l != null && r != null)
  5138. {
  5139. var unknownCorrelatedSequence = new CorrelatedSequence();
  5140. unknownCorrelatedSequence.ComparisonUnitArray1 = new[] { l };
  5141. unknownCorrelatedSequence.ComparisonUnitArray2 = new[] { r };
  5142. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5143. return new[] { unknownCorrelatedSequence };
  5144. }
  5145. if (l == null)
  5146. {
  5147. var insertedCorrelatedSequence = new CorrelatedSequence();
  5148. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5149. insertedCorrelatedSequence.ComparisonUnitArray2 = r.Contents.ToArray();
  5150. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5151. return new[] { insertedCorrelatedSequence };
  5152. }
  5153. else if (r == null)
  5154. {
  5155. var deletedCorrelatedSequence = new CorrelatedSequence();
  5156. deletedCorrelatedSequence.ComparisonUnitArray1 = l.Contents.ToArray();
  5157. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5158. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5159. return new[] { deletedCorrelatedSequence };
  5160. }
  5161. else
  5162. throw new OpenXmlPowerToolsException("Internal error");
  5163. })
  5164. .SelectMany(m => m)
  5165. .ToList();
  5166. foreach (var cs in newCs)
  5167. newListOfCorrelatedSequence.Add(cs);
  5168. var remainderLeft = unknown
  5169. .ComparisonUnitArray1
  5170. .Skip(1)
  5171. .ToArray();
  5172. var remainderRight = unknown
  5173. .ComparisonUnitArray2
  5174. .Skip(1)
  5175. .ToArray();
  5176. if (remainderLeft.Length > 0 && remainderRight.Length == 0)
  5177. {
  5178. var deletedCorrelatedSequence = new CorrelatedSequence();
  5179. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5180. deletedCorrelatedSequence.ComparisonUnitArray1 = remainderLeft;
  5181. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5182. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5183. }
  5184. else if (remainderRight.Length > 0 && remainderLeft.Length == 0)
  5185. {
  5186. var insertedCorrelatedSequence = new CorrelatedSequence();
  5187. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5188. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5189. insertedCorrelatedSequence.ComparisonUnitArray2 = remainderRight;
  5190. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5191. }
  5192. else if (remainderLeft.Length > 0 && remainderRight.Length > 0)
  5193. {
  5194. var unknownCorrelatedSequence2 = new CorrelatedSequence();
  5195. unknownCorrelatedSequence2.CorrelationStatus = CorrelationStatus.Unknown;
  5196. unknownCorrelatedSequence2.ComparisonUnitArray1 = remainderLeft;
  5197. unknownCorrelatedSequence2.ComparisonUnitArray2 = remainderRight;
  5198. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence2);
  5199. }
  5200. if (s_False)
  5201. {
  5202. var sb = new StringBuilder();
  5203. foreach (var item in newListOfCorrelatedSequence)
  5204. sb.Append(item.ToString()).Append(Environment.NewLine);
  5205. var sbs = sb.ToString();
  5206. TestUtil.NotePad(sbs);
  5207. }
  5208. return newListOfCorrelatedSequence;
  5209. }
  5210. if (firstLeft.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell &&
  5211. firstRight.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell)
  5212. {
  5213. var left = firstLeft
  5214. .Contents
  5215. .ToArray();
  5216. var right = firstRight
  5217. .Contents
  5218. .ToArray();
  5219. var unknownCorrelatedSequence = new CorrelatedSequence();
  5220. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5221. unknownCorrelatedSequence.ComparisonUnitArray1 = left;
  5222. unknownCorrelatedSequence.ComparisonUnitArray2 = right;
  5223. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  5224. var remainderLeft = unknown
  5225. .ComparisonUnitArray1
  5226. .Skip(1)
  5227. .ToArray();
  5228. var remainderRight = unknown
  5229. .ComparisonUnitArray2
  5230. .Skip(1)
  5231. .ToArray();
  5232. if (remainderLeft.Length > 0 && remainderRight.Length == 0)
  5233. {
  5234. var deletedCorrelatedSequence = new CorrelatedSequence();
  5235. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5236. deletedCorrelatedSequence.ComparisonUnitArray1 = remainderLeft;
  5237. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5238. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5239. }
  5240. else if (remainderRight.Length > 0 && remainderLeft.Length == 0)
  5241. {
  5242. var insertedCorrelatedSequence = new CorrelatedSequence();
  5243. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5244. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5245. insertedCorrelatedSequence.ComparisonUnitArray2 = remainderRight;
  5246. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5247. }
  5248. else if (remainderLeft.Length > 0 && remainderRight.Length > 0)
  5249. {
  5250. var unknownCorrelatedSequence2 = new CorrelatedSequence();
  5251. unknownCorrelatedSequence2.CorrelationStatus = CorrelationStatus.Unknown;
  5252. unknownCorrelatedSequence2.ComparisonUnitArray1 = remainderLeft;
  5253. unknownCorrelatedSequence2.ComparisonUnitArray2 = remainderRight;
  5254. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence2);
  5255. }
  5256. return newListOfCorrelatedSequence;
  5257. }
  5258. }
  5259. if (unknown.ComparisonUnitArray1.Any() && unknown.ComparisonUnitArray2.Any())
  5260. {
  5261. var left = unknown.ComparisonUnitArray1.First() as ComparisonUnitWord;
  5262. var right = unknown.ComparisonUnitArray2.First() as ComparisonUnitGroup;
  5263. if (left != null &&
  5264. right != null &&
  5265. right.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  5266. {
  5267. var insertedCorrelatedSequence3 = new CorrelatedSequence();
  5268. insertedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Inserted;
  5269. insertedCorrelatedSequence3.ComparisonUnitArray1 = null;
  5270. insertedCorrelatedSequence3.ComparisonUnitArray2 = unknown.ComparisonUnitArray2;
  5271. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence3);
  5272. var deletedCorrelatedSequence3 = new CorrelatedSequence();
  5273. deletedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Deleted;
  5274. deletedCorrelatedSequence3.ComparisonUnitArray1 = unknown.ComparisonUnitArray1;
  5275. deletedCorrelatedSequence3.ComparisonUnitArray2 = null;
  5276. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence3);
  5277. return newListOfCorrelatedSequence;
  5278. }
  5279. var left2 = unknown.ComparisonUnitArray1.First() as ComparisonUnitGroup;
  5280. var right2 = unknown.ComparisonUnitArray2.First() as ComparisonUnitWord;
  5281. if (right2 != null &&
  5282. left2 != null &&
  5283. left2.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  5284. {
  5285. var deletedCorrelatedSequence3 = new CorrelatedSequence();
  5286. deletedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Deleted;
  5287. deletedCorrelatedSequence3.ComparisonUnitArray1 = unknown.ComparisonUnitArray1;
  5288. deletedCorrelatedSequence3.ComparisonUnitArray2 = null;
  5289. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence3);
  5290. var insertedCorrelatedSequence3 = new CorrelatedSequence();
  5291. insertedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Inserted;
  5292. insertedCorrelatedSequence3.ComparisonUnitArray1 = null;
  5293. insertedCorrelatedSequence3.ComparisonUnitArray2 = unknown.ComparisonUnitArray2;
  5294. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence3);
  5295. return newListOfCorrelatedSequence;
  5296. }
  5297. var lastContentAtomLeft = unknown.ComparisonUnitArray1.Select(cu => cu.DescendantContentAtoms().Last()).LastOrDefault();
  5298. var lastContentAtomRight = unknown.ComparisonUnitArray2.Select(cu => cu.DescendantContentAtoms().Last()).LastOrDefault();
  5299. if (lastContentAtomLeft != null && lastContentAtomRight != null)
  5300. {
  5301. if (lastContentAtomLeft.ContentElement.Name == W.pPr &&
  5302. lastContentAtomRight.ContentElement.Name != W.pPr)
  5303. {
  5304. var insertedCorrelatedSequence5 = new CorrelatedSequence();
  5305. insertedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Inserted;
  5306. insertedCorrelatedSequence5.ComparisonUnitArray1 = null;
  5307. insertedCorrelatedSequence5.ComparisonUnitArray2 = unknown.ComparisonUnitArray2;
  5308. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence5);
  5309. var deletedCorrelatedSequence5 = new CorrelatedSequence();
  5310. deletedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Deleted;
  5311. deletedCorrelatedSequence5.ComparisonUnitArray1 = unknown.ComparisonUnitArray1;
  5312. deletedCorrelatedSequence5.ComparisonUnitArray2 = null;
  5313. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence5);
  5314. return newListOfCorrelatedSequence;
  5315. }
  5316. else if (lastContentAtomLeft.ContentElement.Name != W.pPr &&
  5317. lastContentAtomRight.ContentElement.Name == W.pPr)
  5318. {
  5319. var deletedCorrelatedSequence5 = new CorrelatedSequence();
  5320. deletedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Deleted;
  5321. deletedCorrelatedSequence5.ComparisonUnitArray1 = unknown.ComparisonUnitArray1;
  5322. deletedCorrelatedSequence5.ComparisonUnitArray2 = null;
  5323. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence5);
  5324. var insertedCorrelatedSequence5 = new CorrelatedSequence();
  5325. insertedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Inserted;
  5326. insertedCorrelatedSequence5.ComparisonUnitArray1 = null;
  5327. insertedCorrelatedSequence5.ComparisonUnitArray2 = unknown.ComparisonUnitArray2;
  5328. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence5);
  5329. return newListOfCorrelatedSequence;
  5330. }
  5331. }
  5332. }
  5333. var deletedCorrelatedSequence4 = new CorrelatedSequence();
  5334. deletedCorrelatedSequence4.CorrelationStatus = CorrelationStatus.Deleted;
  5335. deletedCorrelatedSequence4.ComparisonUnitArray1 = unknown.ComparisonUnitArray1;
  5336. deletedCorrelatedSequence4.ComparisonUnitArray2 = null;
  5337. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence4);
  5338. var insertedCorrelatedSequence4 = new CorrelatedSequence();
  5339. insertedCorrelatedSequence4.CorrelationStatus = CorrelationStatus.Inserted;
  5340. insertedCorrelatedSequence4.ComparisonUnitArray1 = null;
  5341. insertedCorrelatedSequence4.ComparisonUnitArray2 = unknown.ComparisonUnitArray2;
  5342. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence4);
  5343. return newListOfCorrelatedSequence;
  5344. }
  5345. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  5346. // here we have the longest common subsequence.
  5347. // but it may start in the middle of a paragraph.
  5348. // therefore need to dispose of the content from the beginning of the longest common subsequence to the beginning of the paragraph.
  5349. // this should be in a separate unknown region
  5350. // if countCommonAtEnd != 0, and if it contains a paragraph mark, then if there are comparison units in the same paragraph before the common at end (in either version)
  5351. // then we want to put all of those comparison units into a single unknown, where they must be resolved against each other. We don't want those
  5352. // comparison units to go into the middle unknown comparison unit.
  5353. int remainingInLeftParagraph = 0;
  5354. int remainingInRightParagraph = 0;
  5355. if (currentLongestCommonSequenceLength != 0)
  5356. {
  5357. var commonSeq = unknown
  5358. .ComparisonUnitArray1
  5359. .Skip(currentI1)
  5360. .Take(currentLongestCommonSequenceLength)
  5361. .ToList();
  5362. var firstOfCommonSeq = commonSeq.First();
  5363. if (firstOfCommonSeq is ComparisonUnitWord)
  5364. {
  5365. // are there any paragraph marks in the common seq at end?
  5366. if (commonSeq.Any(cu =>
  5367. {
  5368. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  5369. if (firstComparisonUnitAtom == null)
  5370. return false;
  5371. return firstComparisonUnitAtom.ContentElement.Name == W.pPr;
  5372. }))
  5373. {
  5374. remainingInLeftParagraph = unknown
  5375. .ComparisonUnitArray1
  5376. .Take(currentI1)
  5377. .Reverse()
  5378. .TakeWhile(cu =>
  5379. {
  5380. if (!(cu is ComparisonUnitWord))
  5381. return false;
  5382. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  5383. if (firstComparisonUnitAtom == null)
  5384. return true;
  5385. return firstComparisonUnitAtom.ContentElement.Name != W.pPr;
  5386. })
  5387. .Count();
  5388. remainingInRightParagraph = unknown
  5389. .ComparisonUnitArray2
  5390. .Take(currentI2)
  5391. .Reverse()
  5392. .TakeWhile(cu =>
  5393. {
  5394. if (!(cu is ComparisonUnitWord))
  5395. return false;
  5396. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  5397. if (firstComparisonUnitAtom == null)
  5398. return true;
  5399. return firstComparisonUnitAtom.ContentElement.Name != W.pPr;
  5400. })
  5401. .Count();
  5402. }
  5403. }
  5404. }
  5405. var countBeforeCurrentParagraphLeft = currentI1 - remainingInLeftParagraph;
  5406. var countBeforeCurrentParagraphRight = currentI2 - remainingInRightParagraph;
  5407. if (countBeforeCurrentParagraphLeft > 0 && countBeforeCurrentParagraphRight == 0)
  5408. {
  5409. var deletedCorrelatedSequence = new CorrelatedSequence();
  5410. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5411. deletedCorrelatedSequence.ComparisonUnitArray1 = cul1
  5412. .Take(countBeforeCurrentParagraphLeft)
  5413. .ToArray();
  5414. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5415. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5416. }
  5417. else if (countBeforeCurrentParagraphLeft == 0 && countBeforeCurrentParagraphRight > 0)
  5418. {
  5419. var insertedCorrelatedSequence = new CorrelatedSequence();
  5420. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5421. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5422. insertedCorrelatedSequence.ComparisonUnitArray2 = cul2
  5423. .Take(countBeforeCurrentParagraphRight)
  5424. .ToArray();
  5425. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5426. }
  5427. else if (countBeforeCurrentParagraphLeft > 0 && countBeforeCurrentParagraphRight > 0)
  5428. {
  5429. var unknownCorrelatedSequence = new CorrelatedSequence();
  5430. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5431. unknownCorrelatedSequence.ComparisonUnitArray1 = cul1
  5432. .Take(countBeforeCurrentParagraphLeft)
  5433. .ToArray();
  5434. unknownCorrelatedSequence.ComparisonUnitArray2 = cul2
  5435. .Take(countBeforeCurrentParagraphRight)
  5436. .ToArray();
  5437. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  5438. }
  5439. else if (countBeforeCurrentParagraphLeft == 0 && countBeforeCurrentParagraphRight == 0)
  5440. {
  5441. // nothing to do
  5442. }
  5443. if (remainingInLeftParagraph > 0 && remainingInRightParagraph == 0)
  5444. {
  5445. var deletedCorrelatedSequence = new CorrelatedSequence();
  5446. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5447. deletedCorrelatedSequence.ComparisonUnitArray1 = cul1
  5448. .Skip(countBeforeCurrentParagraphLeft)
  5449. .Take(remainingInLeftParagraph)
  5450. .ToArray();
  5451. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5452. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5453. }
  5454. else if (remainingInLeftParagraph == 0 && remainingInRightParagraph > 0)
  5455. {
  5456. var insertedCorrelatedSequence = new CorrelatedSequence();
  5457. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5458. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5459. insertedCorrelatedSequence.ComparisonUnitArray2 = cul2
  5460. .Skip(countBeforeCurrentParagraphRight)
  5461. .Take(remainingInRightParagraph)
  5462. .ToArray();
  5463. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5464. }
  5465. else if (remainingInLeftParagraph > 0 && remainingInRightParagraph > 0)
  5466. {
  5467. var unknownCorrelatedSequence = new CorrelatedSequence();
  5468. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5469. unknownCorrelatedSequence.ComparisonUnitArray1 = cul1
  5470. .Skip(countBeforeCurrentParagraphLeft)
  5471. .Take(remainingInLeftParagraph)
  5472. .ToArray();
  5473. unknownCorrelatedSequence.ComparisonUnitArray2 = cul2
  5474. .Skip(countBeforeCurrentParagraphRight)
  5475. .Take(remainingInRightParagraph)
  5476. .ToArray();
  5477. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  5478. }
  5479. else if (remainingInLeftParagraph == 0 && remainingInRightParagraph == 0)
  5480. {
  5481. // nothing to do
  5482. }
  5483. var middleEqual = new CorrelatedSequence();
  5484. middleEqual.CorrelationStatus = CorrelationStatus.Equal;
  5485. middleEqual.ComparisonUnitArray1 = cul1
  5486. .Skip(currentI1)
  5487. .Take(currentLongestCommonSequenceLength)
  5488. .ToArray();
  5489. middleEqual.ComparisonUnitArray2 = cul2
  5490. .Skip(currentI2)
  5491. .Take(currentLongestCommonSequenceLength)
  5492. .ToArray();
  5493. newListOfCorrelatedSequence.Add(middleEqual);
  5494. int endI1 = currentI1 + currentLongestCommonSequenceLength;
  5495. int endI2 = currentI2 + currentLongestCommonSequenceLength;
  5496. var remaining1 = cul1
  5497. .Skip(endI1)
  5498. .ToArray();
  5499. var remaining2 = cul2
  5500. .Skip(endI2)
  5501. .ToArray();
  5502. // here is the point that we want to make a new unknown from this point to the end of the paragraph that contains the equal parts.
  5503. // this will never hurt anything, and will in many cases result in a better difference.
  5504. var leftCuw = middleEqual.ComparisonUnitArray1[middleEqual.ComparisonUnitArray1.Length - 1] as ComparisonUnitWord;
  5505. if (leftCuw != null)
  5506. {
  5507. var lastContentAtom = leftCuw.DescendantContentAtoms().LastOrDefault();
  5508. // if the middleEqual did not end with a paragraph mark
  5509. if (lastContentAtom != null && lastContentAtom.ContentElement.Name != W.pPr)
  5510. {
  5511. int idx1 = FindIndexOfNextParaMark(remaining1);
  5512. int idx2 = FindIndexOfNextParaMark(remaining2);
  5513. var unknownCorrelatedSequenceRemaining = new CorrelatedSequence();
  5514. unknownCorrelatedSequenceRemaining.CorrelationStatus = CorrelationStatus.Unknown;
  5515. unknownCorrelatedSequenceRemaining.ComparisonUnitArray1 = remaining1.Take(idx1).ToArray();
  5516. unknownCorrelatedSequenceRemaining.ComparisonUnitArray2 = remaining2.Take(idx2).ToArray();
  5517. newListOfCorrelatedSequence.Add(unknownCorrelatedSequenceRemaining);
  5518. var unknownCorrelatedSequenceAfter = new CorrelatedSequence();
  5519. unknownCorrelatedSequenceAfter.CorrelationStatus = CorrelationStatus.Unknown;
  5520. unknownCorrelatedSequenceAfter.ComparisonUnitArray1 = remaining1.Skip(idx1).ToArray();
  5521. unknownCorrelatedSequenceAfter.ComparisonUnitArray2 = remaining2.Skip(idx2).ToArray();
  5522. newListOfCorrelatedSequence.Add(unknownCorrelatedSequenceAfter);
  5523. return newListOfCorrelatedSequence;
  5524. }
  5525. }
  5526. var unknownCorrelatedSequence20 = new CorrelatedSequence();
  5527. unknownCorrelatedSequence20.CorrelationStatus = CorrelationStatus.Unknown;
  5528. unknownCorrelatedSequence20.ComparisonUnitArray1 = remaining1;
  5529. unknownCorrelatedSequence20.ComparisonUnitArray2 = remaining2;
  5530. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence20);
  5531. return newListOfCorrelatedSequence;
  5532. }
  5533. private static int FindIndexOfNextParaMark(ComparisonUnit[] cul)
  5534. {
  5535. for (int i = 0; i < cul.Length; i++)
  5536. {
  5537. var cuw = cul[i] as ComparisonUnitWord;
  5538. var lastAtom = cuw.DescendantContentAtoms().LastOrDefault();
  5539. if (lastAtom.ContentElement.Name == W.pPr)
  5540. return i;
  5541. }
  5542. return cul.Length;
  5543. }
  5544. private static List<CorrelatedSequence> DoLcsAlgorithmForTable(CorrelatedSequence unknown, WmlComparerSettings settings)
  5545. {
  5546. List<CorrelatedSequence> newListOfCorrelatedSequence = new List<CorrelatedSequence>();
  5547. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  5548. // if we have a table with the same number of rows, and all rows have equal CorrelatedSHA1Hash, then we can flatten and compare every corresponding row.
  5549. // This is true regardless of whether there are horizontally or vertically merged cells, since that characteristic is incorporated into the CorrespondingSHA1Hash.
  5550. // This is probably not very common, but it will never do any harm.
  5551. var tblGroup1 = unknown.ComparisonUnitArray1.First() as ComparisonUnitGroup;
  5552. var tblGroup2 = unknown.ComparisonUnitArray2.First() as ComparisonUnitGroup;
  5553. if (tblGroup1.Contents.Count() == tblGroup2.Contents.Count()) // if there are the same number of rows
  5554. {
  5555. var zipped = tblGroup1.Contents.Zip(tblGroup2.Contents, (r1, r2) => new
  5556. {
  5557. Row1 = r1 as ComparisonUnitGroup,
  5558. Row2 = r2 as ComparisonUnitGroup,
  5559. });
  5560. var canCollapse = true;
  5561. if (zipped.Any(z => z.Row1.CorrelatedSHA1Hash != z.Row2.CorrelatedSHA1Hash))
  5562. canCollapse = false;
  5563. if (canCollapse)
  5564. {
  5565. newListOfCorrelatedSequence = zipped
  5566. .Select(z =>
  5567. {
  5568. var unknownCorrelatedSequence = new CorrelatedSequence();
  5569. unknownCorrelatedSequence.ComparisonUnitArray1 = new[] { z.Row1 };
  5570. unknownCorrelatedSequence.ComparisonUnitArray2 = new[] { z.Row2 };
  5571. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5572. return unknownCorrelatedSequence;
  5573. })
  5574. .ToList();
  5575. return newListOfCorrelatedSequence;
  5576. }
  5577. }
  5578. var firstContentAtom1 = tblGroup1.DescendantContentAtoms().FirstOrDefault();
  5579. if (firstContentAtom1 == null)
  5580. throw new OpenXmlPowerToolsException("Internal error");
  5581. var tblElement1 = firstContentAtom1
  5582. .AncestorElements
  5583. .Reverse()
  5584. .FirstOrDefault(a => a.Name == W.tbl);
  5585. var firstContentAtom2 = tblGroup2.DescendantContentAtoms().FirstOrDefault();
  5586. if (firstContentAtom2 == null)
  5587. throw new OpenXmlPowerToolsException("Internal error");
  5588. var tblElement2 = firstContentAtom2
  5589. .AncestorElements
  5590. .Reverse()
  5591. .FirstOrDefault(a => a.Name == W.tbl);
  5592. var leftContainsMerged = tblElement1
  5593. .Descendants()
  5594. .Any(d => d.Name == W.vMerge || d.Name == W.gridSpan);
  5595. var rightContainsMerged = tblElement2
  5596. .Descendants()
  5597. .Any(d => d.Name == W.vMerge || d.Name == W.gridSpan);
  5598. if (leftContainsMerged || rightContainsMerged)
  5599. {
  5600. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  5601. // If StructureSha1Hash is the same for both tables, then we know that the structure of the tables is identical, so we can break into correlated sequences for rows.
  5602. if (tblGroup1.StructureSHA1Hash != null &&
  5603. tblGroup2.StructureSHA1Hash != null &&
  5604. tblGroup1.StructureSHA1Hash == tblGroup2.StructureSHA1Hash)
  5605. {
  5606. var zipped = tblGroup1.Contents.Zip(tblGroup2.Contents, (r1, r2) => new
  5607. {
  5608. Row1 = r1 as ComparisonUnitGroup,
  5609. Row2 = r2 as ComparisonUnitGroup,
  5610. });
  5611. newListOfCorrelatedSequence = zipped
  5612. .Select(z =>
  5613. {
  5614. var unknownCorrelatedSequence = new CorrelatedSequence();
  5615. unknownCorrelatedSequence.ComparisonUnitArray1 = new[] { z.Row1 };
  5616. unknownCorrelatedSequence.ComparisonUnitArray2 = new[] { z.Row2 };
  5617. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5618. return unknownCorrelatedSequence;
  5619. })
  5620. .ToList();
  5621. return newListOfCorrelatedSequence;
  5622. }
  5623. // otherwise flatten to rows
  5624. var deletedCorrelatedSequence = new CorrelatedSequence();
  5625. deletedCorrelatedSequence.ComparisonUnitArray1 = unknown
  5626. .ComparisonUnitArray1
  5627. .Select(z => z.Contents)
  5628. .SelectMany(m => m)
  5629. .ToArray();
  5630. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5631. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5632. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5633. var insertedCorrelatedSequence = new CorrelatedSequence();
  5634. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5635. insertedCorrelatedSequence.ComparisonUnitArray2 = unknown
  5636. .ComparisonUnitArray2
  5637. .Select(z => z.Contents)
  5638. .SelectMany(m => m)
  5639. .ToArray();
  5640. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5641. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5642. return newListOfCorrelatedSequence;
  5643. }
  5644. return null;
  5645. }
  5646. private static XName[] WordBreakElements = new XName[] {
  5647. W.pPr,
  5648. W.tab,
  5649. W.br,
  5650. W.continuationSeparator,
  5651. W.cr,
  5652. W.dayLong,
  5653. W.dayShort,
  5654. W.drawing,
  5655. W.pict,
  5656. W.endnoteRef,
  5657. W.footnoteRef,
  5658. W.monthLong,
  5659. W.monthShort,
  5660. W.noBreakHyphen,
  5661. W._object,
  5662. W.ptab,
  5663. W.separator,
  5664. W.sym,
  5665. W.yearLong,
  5666. W.yearShort,
  5667. M.oMathPara,
  5668. M.oMath,
  5669. W.footnoteReference,
  5670. W.endnoteReference,
  5671. };
  5672. private class Atgbw
  5673. {
  5674. public int? Key;
  5675. public ComparisonUnitAtom ComparisonUnitAtomMember;
  5676. public int NextIndex;
  5677. }
  5678. private static ComparisonUnit[] GetComparisonUnitList(ComparisonUnitAtom[] comparisonUnitAtomList, WmlComparerSettings settings)
  5679. {
  5680. var seed = new Atgbw()
  5681. {
  5682. Key = null,
  5683. ComparisonUnitAtomMember = null,
  5684. NextIndex = 0,
  5685. };
  5686. var groupingKey = comparisonUnitAtomList
  5687. .Rollup(seed, (sr, prevAtgbw, i) =>
  5688. {
  5689. int? key = null;
  5690. var nextIndex = prevAtgbw.NextIndex;
  5691. if (sr.ContentElement.Name == W.t)
  5692. {
  5693. string chr = sr.ContentElement.Value;
  5694. var ch = chr[0];
  5695. if (ch == '.' || ch == ',')
  5696. {
  5697. bool beforeIsDigit = false;
  5698. if (i > 0)
  5699. {
  5700. var prev = comparisonUnitAtomList[i - 1];
  5701. if (prev.ContentElement.Name == W.t && char.IsDigit(prev.ContentElement.Value[0]))
  5702. beforeIsDigit = true;
  5703. }
  5704. bool afterIsDigit = false;
  5705. if (i < comparisonUnitAtomList.Length - 1)
  5706. {
  5707. var next = comparisonUnitAtomList[i + 1];
  5708. if (next.ContentElement.Name == W.t && char.IsDigit(next.ContentElement.Value[0]))
  5709. afterIsDigit = true;
  5710. }
  5711. if (beforeIsDigit || afterIsDigit)
  5712. {
  5713. key = nextIndex;
  5714. }
  5715. else
  5716. {
  5717. nextIndex++;
  5718. key = nextIndex;
  5719. nextIndex++;
  5720. }
  5721. }
  5722. else if (settings.WordSeparators.Contains(ch))
  5723. {
  5724. nextIndex++;
  5725. key = nextIndex;
  5726. nextIndex++;
  5727. }
  5728. else
  5729. {
  5730. key = nextIndex;
  5731. }
  5732. }
  5733. else if (WordBreakElements.Contains(sr.ContentElement.Name))
  5734. {
  5735. nextIndex++;
  5736. key = nextIndex;
  5737. nextIndex++;
  5738. }
  5739. else
  5740. {
  5741. key = nextIndex;
  5742. }
  5743. return new Atgbw()
  5744. {
  5745. Key = key,
  5746. ComparisonUnitAtomMember = sr,
  5747. NextIndex = nextIndex,
  5748. };
  5749. });
  5750. if (s_False)
  5751. {
  5752. var sb = new StringBuilder();
  5753. foreach (var item in groupingKey)
  5754. {
  5755. sb.Append(item.Key + Environment.NewLine);
  5756. sb.Append(" " + item.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
  5757. }
  5758. var sbs = sb.ToString();
  5759. TestUtil.NotePad(sbs);
  5760. }
  5761. var groupedByWords = groupingKey
  5762. .GroupAdjacent(gc => gc.Key);
  5763. if (s_False)
  5764. {
  5765. var sb = new StringBuilder();
  5766. foreach (var group in groupedByWords)
  5767. {
  5768. sb.Append("Group ===== " + group.Key + Environment.NewLine);
  5769. foreach (var gc in group)
  5770. {
  5771. sb.Append(" " + gc.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
  5772. }
  5773. }
  5774. var sbs = sb.ToString();
  5775. TestUtil.NotePad(sbs);
  5776. }
  5777. var withHierarchicalGroupingKey = groupedByWords
  5778. .Select(g =>
  5779. {
  5780. var hierarchicalGroupingArray = g
  5781. .First()
  5782. .ComparisonUnitAtomMember
  5783. .AncestorElements
  5784. .Where(a => ComparisonGroupingElements.Contains(a.Name))
  5785. .Select(a => a.Name.LocalName + ":" + (string)a.Attribute(PtOpenXml.Unid))
  5786. .ToArray();
  5787. return new WithHierarchicalGroupingKey()
  5788. {
  5789. ComparisonUnitWord = new ComparisonUnitWord(g.Select(gc => gc.ComparisonUnitAtomMember)),
  5790. HierarchicalGroupingArray = hierarchicalGroupingArray,
  5791. };
  5792. }
  5793. )
  5794. .ToArray();
  5795. if (s_False)
  5796. {
  5797. var sb = new StringBuilder();
  5798. foreach (var group in withHierarchicalGroupingKey)
  5799. {
  5800. sb.Append("Grouping Array: " + group.HierarchicalGroupingArray.Select(gam => gam + " - ").StringConcatenate() + Environment.NewLine);
  5801. foreach (var gc in group.ComparisonUnitWord.Contents)
  5802. {
  5803. sb.Append(" " + gc.ToString(0) + Environment.NewLine);
  5804. }
  5805. }
  5806. var sbs = sb.ToString();
  5807. TestUtil.NotePad(sbs);
  5808. }
  5809. var cul = GetHierarchicalComparisonUnits(withHierarchicalGroupingKey, 0).ToArray();
  5810. if (s_False)
  5811. {
  5812. var str = ComparisonUnit.ComparisonUnitListToString(cul);
  5813. TestUtil.NotePad(str);
  5814. }
  5815. return cul;
  5816. }
  5817. private static IEnumerable<ComparisonUnit> GetHierarchicalComparisonUnits(IEnumerable<WithHierarchicalGroupingKey> input, int level)
  5818. {
  5819. var grouped = input
  5820. .GroupAdjacent(whgk =>
  5821. {
  5822. if (level >= whgk.HierarchicalGroupingArray.Length)
  5823. return "";
  5824. return whgk.HierarchicalGroupingArray[level];
  5825. });
  5826. var retList = grouped
  5827. .Select(gc =>
  5828. {
  5829. if (gc.Key == "")
  5830. {
  5831. return (IEnumerable<ComparisonUnit>)gc.Select(whgk => whgk.ComparisonUnitWord).ToList();
  5832. }
  5833. else
  5834. {
  5835. ComparisonUnitGroupType? group = null;
  5836. var spl = gc.Key.Split(':');
  5837. if (spl[0] == "p")
  5838. group = ComparisonUnitGroupType.Paragraph;
  5839. else if (spl[0] == "tbl")
  5840. group = ComparisonUnitGroupType.Table;
  5841. else if (spl[0] == "tr")
  5842. group = ComparisonUnitGroupType.Row;
  5843. else if (spl[0] == "tc")
  5844. group = ComparisonUnitGroupType.Cell;
  5845. else if (spl[0] == "txbxContent")
  5846. group = ComparisonUnitGroupType.Textbox;
  5847. var childHierarchicalComparisonUnits = GetHierarchicalComparisonUnits(gc, level + 1);
  5848. var newCompUnitGroup = new ComparisonUnitGroup(childHierarchicalComparisonUnits, (ComparisonUnitGroupType)group, level);
  5849. return new[] { newCompUnitGroup };
  5850. }
  5851. })
  5852. .SelectMany(m => m)
  5853. .ToList();
  5854. return retList;
  5855. }
  5856. private static XName[] AllowableRunChildren = new XName[] {
  5857. W.br,
  5858. W.drawing,
  5859. W.cr,
  5860. W.dayLong,
  5861. W.dayShort,
  5862. W.footnoteReference,
  5863. W.endnoteReference,
  5864. W.monthLong,
  5865. W.monthShort,
  5866. W.noBreakHyphen,
  5867. //W._object,
  5868. W.pgNum,
  5869. W.ptab,
  5870. W.softHyphen,
  5871. W.sym,
  5872. W.tab,
  5873. W.yearLong,
  5874. W.yearShort,
  5875. M.oMathPara,
  5876. M.oMath,
  5877. W.fldChar,
  5878. W.instrText,
  5879. };
  5880. private static XName[] ElementsToThrowAway = new XName[] {
  5881. W.bookmarkStart,
  5882. W.bookmarkEnd,
  5883. W.commentRangeStart,
  5884. W.commentRangeEnd,
  5885. W.lastRenderedPageBreak,
  5886. W.proofErr,
  5887. W.tblPr,
  5888. W.sectPr,
  5889. W.permEnd,
  5890. W.permStart,
  5891. W.footnoteRef,
  5892. W.endnoteRef,
  5893. W.separator,
  5894. W.continuationSeparator,
  5895. };
  5896. private static XName[] ElementsToHaveSha1Hash = new XName[]
  5897. {
  5898. W.p,
  5899. W.tbl,
  5900. W.tr,
  5901. W.tc,
  5902. W.drawing,
  5903. W.pict,
  5904. W.txbxContent,
  5905. };
  5906. private static XName[] InvalidElements = new XName[]
  5907. {
  5908. W.altChunk,
  5909. W.customXml,
  5910. W.customXmlDelRangeEnd,
  5911. W.customXmlDelRangeStart,
  5912. W.customXmlInsRangeEnd,
  5913. W.customXmlInsRangeStart,
  5914. W.customXmlMoveFromRangeEnd,
  5915. W.customXmlMoveFromRangeStart,
  5916. W.customXmlMoveToRangeEnd,
  5917. W.customXmlMoveToRangeStart,
  5918. W.moveFrom,
  5919. W.moveFromRangeStart,
  5920. W.moveFromRangeEnd,
  5921. W.moveTo,
  5922. W.moveToRangeStart,
  5923. W.moveToRangeEnd,
  5924. W.subDoc,
  5925. };
  5926. private class RecursionInfo
  5927. {
  5928. public XName ElementName;
  5929. public XName[] ChildElementPropertyNames;
  5930. }
  5931. private static RecursionInfo[] RecursionElements = new RecursionInfo[]
  5932. {
  5933. new RecursionInfo()
  5934. {
  5935. ElementName = W.del,
  5936. ChildElementPropertyNames = null,
  5937. },
  5938. new RecursionInfo()
  5939. {
  5940. ElementName = W.ins,
  5941. ChildElementPropertyNames = null,
  5942. },
  5943. new RecursionInfo()
  5944. {
  5945. ElementName = W.tbl,
  5946. ChildElementPropertyNames = new[] { W.tblPr, W.tblGrid, W.tblPrEx },
  5947. },
  5948. new RecursionInfo()
  5949. {
  5950. ElementName = W.tr,
  5951. ChildElementPropertyNames = new[] { W.trPr, W.tblPrEx },
  5952. },
  5953. new RecursionInfo()
  5954. {
  5955. ElementName = W.tc,
  5956. ChildElementPropertyNames = new[] { W.tcPr, W.tblPrEx },
  5957. },
  5958. new RecursionInfo()
  5959. {
  5960. ElementName = W.pict,
  5961. ChildElementPropertyNames = new[] { VML.shapetype },
  5962. },
  5963. new RecursionInfo()
  5964. {
  5965. ElementName = VML.group,
  5966. ChildElementPropertyNames = null,
  5967. },
  5968. new RecursionInfo()
  5969. {
  5970. ElementName = VML.shape,
  5971. ChildElementPropertyNames = null,
  5972. },
  5973. new RecursionInfo()
  5974. {
  5975. ElementName = VML.rect,
  5976. ChildElementPropertyNames = null,
  5977. },
  5978. new RecursionInfo()
  5979. {
  5980. ElementName = VML.textbox,
  5981. ChildElementPropertyNames = null,
  5982. },
  5983. new RecursionInfo()
  5984. {
  5985. ElementName = O._lock,
  5986. ChildElementPropertyNames = null,
  5987. },
  5988. new RecursionInfo()
  5989. {
  5990. ElementName = W.txbxContent,
  5991. ChildElementPropertyNames = null,
  5992. },
  5993. new RecursionInfo()
  5994. {
  5995. ElementName = W10.wrap,
  5996. ChildElementPropertyNames = null,
  5997. },
  5998. new RecursionInfo()
  5999. {
  6000. ElementName = W.sdt,
  6001. ChildElementPropertyNames = new[] { W.sdtPr, W.sdtEndPr },
  6002. },
  6003. new RecursionInfo()
  6004. {
  6005. ElementName = W.sdtContent,
  6006. ChildElementPropertyNames = null,
  6007. },
  6008. new RecursionInfo()
  6009. {
  6010. ElementName = W.hyperlink,
  6011. ChildElementPropertyNames = null,
  6012. },
  6013. new RecursionInfo()
  6014. {
  6015. ElementName = W.fldSimple,
  6016. ChildElementPropertyNames = null,
  6017. },
  6018. new RecursionInfo()
  6019. {
  6020. ElementName = VML.shapetype,
  6021. ChildElementPropertyNames = null,
  6022. },
  6023. new RecursionInfo()
  6024. {
  6025. ElementName = W.smartTag,
  6026. ChildElementPropertyNames = new[] { W.smartTagPr },
  6027. },
  6028. new RecursionInfo()
  6029. {
  6030. ElementName = W.ruby,
  6031. ChildElementPropertyNames = new[] { W.rubyPr },
  6032. },
  6033. };
  6034. internal static ComparisonUnitAtom[] CreateComparisonUnitAtomList(OpenXmlPart part, XElement contentParent, WmlComparerSettings settings)
  6035. {
  6036. VerifyNoInvalidContent(contentParent);
  6037. AssignUnidToAllElements(contentParent); // add the Guid id to every element
  6038. MoveLastSectPrIntoLastParagraph(contentParent);
  6039. var cal = CreateComparisonUnitAtomListInternal(part, contentParent, settings).ToArray();
  6040. if (s_False)
  6041. {
  6042. var sb = new StringBuilder();
  6043. foreach (var item in cal)
  6044. sb.Append(item.ToString() + Environment.NewLine);
  6045. var sbs = sb.ToString();
  6046. TestUtil.NotePad(sbs);
  6047. }
  6048. return cal;
  6049. }
  6050. private static void VerifyNoInvalidContent(XElement contentParent)
  6051. {
  6052. var invalidElement = contentParent.Descendants().FirstOrDefault(d => InvalidElements.Contains(d.Name));
  6053. if (invalidElement == null)
  6054. return;
  6055. throw new NotSupportedException("Document contains " + invalidElement.Name.LocalName);
  6056. }
  6057. internal static XDocument Coalesce(ComparisonUnitAtom[] comparisonUnitAtomList)
  6058. {
  6059. XDocument newXDoc = new XDocument();
  6060. var newBodyChildren = CoalesceRecurse(comparisonUnitAtomList, 0);
  6061. newXDoc.Add(new XElement(W.document,
  6062. new XAttribute(XNamespace.Xmlns + "w", W.w.NamespaceName),
  6063. new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName),
  6064. new XElement(W.body, newBodyChildren)));
  6065. // little bit of cleanup
  6066. MoveLastSectPrToChildOfBody(newXDoc);
  6067. XElement newXDoc2Root = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(newXDoc.Root);
  6068. newXDoc.Root.ReplaceWith(newXDoc2Root);
  6069. return newXDoc;
  6070. }
  6071. private static object CoalesceRecurse(IEnumerable<ComparisonUnitAtom> list, int level)
  6072. {
  6073. var grouped = list
  6074. .GroupBy(sr =>
  6075. {
  6076. // per the algorithm, The following condition will never evaluate to true
  6077. // if it evaluates to true, then the basic mechanism for breaking a hierarchical structure into flat and back is broken.
  6078. // for a table, we initially get all ComparisonUnitAtoms for the entire table, then process. When processing a row,
  6079. // no ComparisonUnitAtoms will have ancestors outside the row. Ditto for cells, and on down the tree.
  6080. if (level >= sr.AncestorElements.Length)
  6081. throw new OpenXmlPowerToolsException("Internal error 4 - why do we have ComparisonUnitAtom objects with fewer ancestors than its siblings?");
  6082. var unid = (string)sr.AncestorElements[level].Attribute(PtOpenXml.Unid);
  6083. return unid;
  6084. });
  6085. if (s_False)
  6086. {
  6087. var sb = new StringBuilder();
  6088. foreach (var group in grouped)
  6089. {
  6090. sb.AppendFormat("Group Key: {0}", group.Key);
  6091. sb.Append(Environment.NewLine);
  6092. foreach (var groupChildItem in group)
  6093. {
  6094. sb.Append(" ");
  6095. sb.Append(groupChildItem.ToString(0));
  6096. sb.Append(Environment.NewLine);
  6097. }
  6098. sb.Append(Environment.NewLine);
  6099. }
  6100. var sbs = sb.ToString();
  6101. }
  6102. var elementList = grouped
  6103. .Select(g =>
  6104. {
  6105. // see the comment above at the beginning of CoalesceRecurse
  6106. if (level >= g.First().AncestorElements.Length)
  6107. throw new OpenXmlPowerToolsException("Internal error 3 - why do we have ComparisonUnitAtom objects with fewer ancestors than its siblings?");
  6108. var ancestorBeingConstructed = g.First().AncestorElements[level];
  6109. if (ancestorBeingConstructed.Name == W.p)
  6110. {
  6111. var groupedChildren = g
  6112. .GroupAdjacent(gc => gc.ContentElement.Name.ToString());
  6113. var newChildElements = groupedChildren
  6114. .Where(gc => gc.First().ContentElement.Name != W.pPr)
  6115. .Select(gc =>
  6116. {
  6117. return CoalesceRecurse(gc, level + 1);
  6118. });
  6119. var newParaProps = groupedChildren
  6120. .Where(gc => gc.First().ContentElement.Name == W.pPr)
  6121. .Select(gc => gc.Select(gce => gce.ContentElement));
  6122. return new XElement(W.p,
  6123. ancestorBeingConstructed.Attributes(),
  6124. newParaProps, newChildElements);
  6125. }
  6126. if (ancestorBeingConstructed.Name == W.r)
  6127. {
  6128. var groupedChildren = g
  6129. .GroupAdjacent(gc => gc.ContentElement.Name.ToString());
  6130. var newChildElements = groupedChildren
  6131. .Select(gc =>
  6132. {
  6133. var name = gc.First().ContentElement.Name;
  6134. if (name == W.t || name == W.delText)
  6135. {
  6136. var textOfTextElement = gc.Select(gce => gce.ContentElement.Value).StringConcatenate();
  6137. return (object)(new XElement(name,
  6138. GetXmlSpaceAttribute(textOfTextElement),
  6139. textOfTextElement));
  6140. }
  6141. else
  6142. return gc.Select(gce => gce.ContentElement);
  6143. });
  6144. var runProps = ancestorBeingConstructed.Elements(W.rPr);
  6145. return new XElement(W.r, runProps, newChildElements);
  6146. }
  6147. var re = RecursionElements.FirstOrDefault(z => z.ElementName == ancestorBeingConstructed.Name);
  6148. if (re != null)
  6149. {
  6150. return ReconstructElement(g, ancestorBeingConstructed, re.ChildElementPropertyNames, level);
  6151. }
  6152. var newElement = new XElement(ancestorBeingConstructed.Name,
  6153. ancestorBeingConstructed.Attributes(),
  6154. CoalesceRecurse(g, level + 1));
  6155. return newElement;
  6156. })
  6157. .ToList();
  6158. return elementList;
  6159. }
  6160. private static XElement ReconstructElement(IGrouping<string, ComparisonUnitAtom> g, XElement ancestorBeingConstructed, XName[] childPropElementNames, int level)
  6161. {
  6162. var newChildElements = CoalesceRecurse(g, level + 1);
  6163. IEnumerable<XElement> childProps = null;
  6164. if (childPropElementNames != null)
  6165. childProps = ancestorBeingConstructed.Elements()
  6166. .Where(a => childPropElementNames.Contains(a.Name));
  6167. var reconstructedElement = new XElement(ancestorBeingConstructed.Name, childProps, newChildElements);
  6168. return reconstructedElement;
  6169. }
  6170. private static void MoveLastSectPrIntoLastParagraph(XElement contentParent)
  6171. {
  6172. var lastSectPrList = contentParent.Elements(W.sectPr).ToList();
  6173. if (lastSectPrList.Count() > 1)
  6174. throw new OpenXmlPowerToolsException("Invalid document");
  6175. var lastSectPr = lastSectPrList.FirstOrDefault();
  6176. if (lastSectPr != null)
  6177. {
  6178. var lastParagraph = contentParent.Elements(W.p).LastOrDefault();
  6179. if (lastParagraph == null)
  6180. throw new OpenXmlPowerToolsException("Invalid document");
  6181. var pPr = lastParagraph.Element(W.pPr);
  6182. if (pPr == null)
  6183. {
  6184. pPr = new XElement(W.pPr);
  6185. lastParagraph.AddFirst(W.pPr);
  6186. }
  6187. pPr.Add(lastSectPr);
  6188. contentParent.Elements(W.sectPr).Remove();
  6189. }
  6190. }
  6191. private static List<ComparisonUnitAtom> CreateComparisonUnitAtomListInternal(OpenXmlPart part, XElement contentParent, WmlComparerSettings settings)
  6192. {
  6193. var comparisonUnitAtomList = new List<ComparisonUnitAtom>();
  6194. CreateComparisonUnitAtomListRecurse(part, contentParent, comparisonUnitAtomList, settings);
  6195. return comparisonUnitAtomList;
  6196. }
  6197. private static XName[] ComparisonGroupingElements = new[] {
  6198. W.p,
  6199. W.tbl,
  6200. W.tr,
  6201. W.tc,
  6202. W.txbxContent,
  6203. };
  6204. private static void CreateComparisonUnitAtomListRecurse(OpenXmlPart part, XElement element, List<ComparisonUnitAtom> comparisonUnitAtomList, WmlComparerSettings settings)
  6205. {
  6206. if (element.Name == W.body || element.Name == W.footnote || element.Name == W.endnote)
  6207. {
  6208. foreach (var item in element.Elements())
  6209. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  6210. return;
  6211. }
  6212. if (element.Name == W.p)
  6213. {
  6214. var paraChildrenToProcess = element
  6215. .Elements()
  6216. .Where(e => e.Name != W.pPr);
  6217. foreach (var item in paraChildrenToProcess)
  6218. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  6219. var paraProps = element.Element(W.pPr);
  6220. if (paraProps == null)
  6221. {
  6222. ComparisonUnitAtom pPrComparisonUnitAtom = new ComparisonUnitAtom(
  6223. new XElement(W.pPr),
  6224. element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(),
  6225. part,
  6226. settings);
  6227. comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
  6228. }
  6229. else
  6230. {
  6231. ComparisonUnitAtom pPrComparisonUnitAtom = new ComparisonUnitAtom(
  6232. paraProps,
  6233. element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(),
  6234. part,
  6235. settings);
  6236. comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
  6237. }
  6238. return;
  6239. }
  6240. if (element.Name == W.r)
  6241. {
  6242. var runChildrenToProcess = element
  6243. .Elements()
  6244. .Where(e => e.Name != W.rPr);
  6245. foreach (var item in runChildrenToProcess)
  6246. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  6247. return;
  6248. }
  6249. if (element.Name == W.t || element.Name == W.delText)
  6250. {
  6251. var val = element.Value;
  6252. foreach (var ch in val)
  6253. {
  6254. ComparisonUnitAtom sr = new ComparisonUnitAtom(
  6255. new XElement(element.Name, ch),
  6256. element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(),
  6257. part,
  6258. settings);
  6259. comparisonUnitAtomList.Add(sr);
  6260. }
  6261. return;
  6262. }
  6263. if (AllowableRunChildren.Contains(element.Name) || element.Name == W._object)
  6264. {
  6265. ComparisonUnitAtom sr3 = new ComparisonUnitAtom(
  6266. element,
  6267. element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(),
  6268. part,
  6269. settings);
  6270. comparisonUnitAtomList.Add(sr3);
  6271. return;
  6272. }
  6273. var re = RecursionElements.FirstOrDefault(z => z.ElementName == element.Name);
  6274. if (re != null)
  6275. {
  6276. AnnotateElementWithProps(part, element, comparisonUnitAtomList, re.ChildElementPropertyNames, settings);
  6277. return;
  6278. }
  6279. if (ElementsToThrowAway.Contains(element.Name))
  6280. return;
  6281. AnnotateElementWithProps(part, element, comparisonUnitAtomList, null, settings);
  6282. }
  6283. private static void AnnotateElementWithProps(OpenXmlPart part, XElement element, List<ComparisonUnitAtom> comparisonUnitAtomList, XName[] childElementPropertyNames, WmlComparerSettings settings)
  6284. {
  6285. IEnumerable<XElement> runChildrenToProcess = null;
  6286. if (childElementPropertyNames == null)
  6287. runChildrenToProcess = element.Elements();
  6288. else
  6289. runChildrenToProcess = element
  6290. .Elements()
  6291. .Where(e => !childElementPropertyNames.Contains(e.Name));
  6292. foreach (var item in runChildrenToProcess)
  6293. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  6294. }
  6295. private static void AssignUnidToAllElements(XElement contentParent)
  6296. {
  6297. var content = contentParent.Descendants();
  6298. foreach (var d in content)
  6299. {
  6300. if (d.Attribute(PtOpenXml.Unid) == null)
  6301. {
  6302. string unid = Guid.NewGuid().ToString().Replace("-", "");
  6303. var newAtt = new XAttribute(PtOpenXml.Unid, unid);
  6304. d.Add(newAtt);
  6305. }
  6306. }
  6307. }
  6308. }
  6309. internal class WithHierarchicalGroupingKey
  6310. {
  6311. public string[] HierarchicalGroupingArray;
  6312. public ComparisonUnitWord ComparisonUnitWord;
  6313. }
  6314. public abstract class ComparisonUnit
  6315. {
  6316. public List<ComparisonUnit> Contents;
  6317. public string SHA1Hash;
  6318. public CorrelationStatus CorrelationStatus;
  6319. public IEnumerable<ComparisonUnit> Descendants()
  6320. {
  6321. List<ComparisonUnit> comparisonUnitList = new List<ComparisonUnit>();
  6322. DescendantsInternal(this, comparisonUnitList);
  6323. return comparisonUnitList;
  6324. }
  6325. public IEnumerable<ComparisonUnitAtom> DescendantContentAtoms()
  6326. {
  6327. return Descendants().OfType<ComparisonUnitAtom>();
  6328. }
  6329. private int? m_DescendantContentAtomsCount = null;
  6330. public int DescendantContentAtomsCount
  6331. {
  6332. get
  6333. {
  6334. if (m_DescendantContentAtomsCount != null)
  6335. return (int)m_DescendantContentAtomsCount;
  6336. m_DescendantContentAtomsCount = this.DescendantContentAtoms().Count();
  6337. return (int)m_DescendantContentAtomsCount;
  6338. }
  6339. }
  6340. private void DescendantsInternal(ComparisonUnit comparisonUnit, List<ComparisonUnit> comparisonUnitList)
  6341. {
  6342. foreach (var cu in comparisonUnit.Contents)
  6343. {
  6344. comparisonUnitList.Add(cu);
  6345. if (cu.Contents != null && cu.Contents.Any())
  6346. DescendantsInternal(cu, comparisonUnitList);
  6347. }
  6348. }
  6349. public abstract string ToString(int indent);
  6350. internal static string ComparisonUnitListToString(ComparisonUnit[] cul)
  6351. {
  6352. var sb = new StringBuilder();
  6353. sb.Append("Dump Comparision Unit List To String" + Environment.NewLine);
  6354. foreach (var item in cul)
  6355. {
  6356. sb.Append(item.ToString(2) + Environment.NewLine);
  6357. }
  6358. return sb.ToString();
  6359. }
  6360. }
  6361. internal class ComparisonUnitWord : ComparisonUnit
  6362. {
  6363. public ComparisonUnitWord(IEnumerable<ComparisonUnitAtom> comparisonUnitAtomList)
  6364. {
  6365. Contents = comparisonUnitAtomList.OfType<ComparisonUnit>().ToList();
  6366. var sha1String = Contents
  6367. .Select(c => c.SHA1Hash)
  6368. .StringConcatenate();
  6369. SHA1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(sha1String);
  6370. }
  6371. public static XName[] s_ElementsWithRelationshipIds = new XName[] {
  6372. A.blip,
  6373. A.hlinkClick,
  6374. A.relIds,
  6375. C.chart,
  6376. C.externalData,
  6377. C.userShapes,
  6378. DGM.relIds,
  6379. O.OLEObject,
  6380. VML.fill,
  6381. VML.imagedata,
  6382. VML.stroke,
  6383. W.altChunk,
  6384. W.attachedTemplate,
  6385. W.control,
  6386. W.dataSource,
  6387. W.embedBold,
  6388. W.embedBoldItalic,
  6389. W.embedItalic,
  6390. W.embedRegular,
  6391. W.footerReference,
  6392. W.headerReference,
  6393. W.headerSource,
  6394. W.hyperlink,
  6395. W.printerSettings,
  6396. W.recipientData,
  6397. W.saveThroughXslt,
  6398. W.sourceFileName,
  6399. W.src,
  6400. W.subDoc,
  6401. WNE.toolbarData,
  6402. };
  6403. public static XName[] s_RelationshipAttributeNames = new XName[] {
  6404. R.embed,
  6405. R.link,
  6406. R.id,
  6407. R.cs,
  6408. R.dm,
  6409. R.lo,
  6410. R.qs,
  6411. R.href,
  6412. R.pict,
  6413. };
  6414. public override string ToString(int indent)
  6415. {
  6416. var sb = new StringBuilder();
  6417. sb.Append("".PadRight(indent) + "Word SHA1:" + this.SHA1Hash.Substring(0, 8) + Environment.NewLine);
  6418. foreach (var comparisonUnitAtom in Contents)
  6419. sb.Append(comparisonUnitAtom.ToString(indent + 2) + Environment.NewLine);
  6420. return sb.ToString();
  6421. }
  6422. }
  6423. class WmlComparerUtil
  6424. {
  6425. public static string SHA1HashStringForUTF8String(string s)
  6426. {
  6427. byte[] bytes = Encoding.UTF8.GetBytes(s);
  6428. var sha1 = SHA1.Create();
  6429. byte[] hashBytes = sha1.ComputeHash(bytes);
  6430. return HexStringFromBytes(hashBytes);
  6431. }
  6432. public static string SHA1HashStringForByteArray(byte[] bytes)
  6433. {
  6434. var sha1 = SHA1.Create();
  6435. byte[] hashBytes = sha1.ComputeHash(bytes);
  6436. return HexStringFromBytes(hashBytes);
  6437. }
  6438. public static string HexStringFromBytes(byte[] bytes)
  6439. {
  6440. var sb = new StringBuilder();
  6441. foreach (byte b in bytes)
  6442. {
  6443. var hex = b.ToString("x2");
  6444. sb.Append(hex);
  6445. }
  6446. return sb.ToString();
  6447. }
  6448. }
  6449. public class ComparisonUnitAtom : ComparisonUnit
  6450. {
  6451. // AncestorElements are kept in order from the body to the leaf, because this is the order in which we need to access in order
  6452. // to reassemble the document. However, in many places in the code, it is necessary to find the nearest ancestor, i.e. cell
  6453. // so it is necessary to reverse the order when looking for it, i.e. look from the leaf back to the body element.
  6454. public XElement[] AncestorElements;
  6455. public string[] AncestorUnids;
  6456. public XElement ContentElement;
  6457. public XElement ContentElementBefore;
  6458. public ComparisonUnitAtom ComparisonUnitAtomBefore;
  6459. public OpenXmlPart Part;
  6460. public XElement RevTrackElement;
  6461. public ComparisonUnitAtom(XElement contentElement, XElement[] ancestorElements, OpenXmlPart part, WmlComparerSettings settings)
  6462. {
  6463. ContentElement = contentElement;
  6464. AncestorElements = ancestorElements;
  6465. Part = part;
  6466. RevTrackElement = GetRevisionTrackingElementFromAncestors(contentElement, AncestorElements);
  6467. if (RevTrackElement == null)
  6468. {
  6469. CorrelationStatus = CorrelationStatus.Equal;
  6470. }
  6471. else
  6472. {
  6473. if (RevTrackElement.Name == W.del)
  6474. CorrelationStatus = CorrelationStatus.Deleted;
  6475. else if (RevTrackElement.Name == W.ins)
  6476. CorrelationStatus = CorrelationStatus.Inserted;
  6477. }
  6478. string sha1Hash = (string)contentElement.Attribute(PtOpenXml.SHA1Hash);
  6479. if (sha1Hash != null)
  6480. {
  6481. SHA1Hash = sha1Hash;
  6482. }
  6483. else
  6484. {
  6485. var shaHashString = GetSha1HashStringForElement(ContentElement, settings);
  6486. SHA1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaHashString);
  6487. }
  6488. }
  6489. private string GetSha1HashStringForElement(XElement contentElement, WmlComparerSettings settings)
  6490. {
  6491. var text = contentElement.Value;
  6492. if (settings.CaseInsensitive)
  6493. text = text.ToUpper(settings.CultureInfo);
  6494. return contentElement.Name.LocalName + text;
  6495. }
  6496. private static XElement GetRevisionTrackingElementFromAncestors(XElement contentElement, XElement[] ancestors)
  6497. {
  6498. XElement revTrackElement = null;
  6499. if (contentElement.Name == W.pPr)
  6500. {
  6501. revTrackElement = contentElement
  6502. .Elements(W.rPr)
  6503. .Elements()
  6504. .FirstOrDefault(e => e.Name == W.del || e.Name == W.ins);
  6505. return revTrackElement;
  6506. }
  6507. revTrackElement = ancestors.FirstOrDefault(a => a.Name == W.del || a.Name == W.ins);
  6508. return revTrackElement;
  6509. }
  6510. public override string ToString(int indent)
  6511. {
  6512. int xNamePad = 16;
  6513. var indentString = "".PadRight(indent);
  6514. var sb = new StringBuilder();
  6515. sb.Append(indentString);
  6516. string correlationStatus = "";
  6517. if (CorrelationStatus != CorrelationStatus.Nil)
  6518. correlationStatus = string.Format("[{0}] ", CorrelationStatus.ToString().PadRight(8));
  6519. if (ContentElement.Name == W.t || ContentElement.Name == W.delText)
  6520. {
  6521. sb.AppendFormat("Atom {0}: {1} {2} SHA1:{3} ", PadLocalName(xNamePad, this), ContentElement.Value, correlationStatus, this.SHA1Hash.Substring(0, 8));
  6522. AppendAncestorsDump(sb, this);
  6523. }
  6524. else
  6525. {
  6526. sb.AppendFormat("Atom {0}: {1} SHA1:{2} ", PadLocalName(xNamePad, this), correlationStatus, this.SHA1Hash.Substring(0, 8));
  6527. AppendAncestorsDump(sb, this);
  6528. }
  6529. return sb.ToString();
  6530. }
  6531. public string ToStringAncestorUnids(int indent)
  6532. {
  6533. int xNamePad = 16;
  6534. var indentString = "".PadRight(indent);
  6535. var sb = new StringBuilder();
  6536. sb.Append(indentString);
  6537. string correlationStatus = "";
  6538. if (CorrelationStatus != CorrelationStatus.Nil)
  6539. correlationStatus = string.Format("[{0}] ", CorrelationStatus.ToString().PadRight(8));
  6540. if (ContentElement.Name == W.t || ContentElement.Name == W.delText)
  6541. {
  6542. sb.AppendFormat("Atom {0}: {1} {2} SHA1:{3} ", PadLocalName(xNamePad, this), ContentElement.Value, correlationStatus, this.SHA1Hash.Substring(0, 8));
  6543. AppendAncestorsUnidsDump(sb, this);
  6544. }
  6545. else
  6546. {
  6547. sb.AppendFormat("Atom {0}: {1} SHA1:{2} ", PadLocalName(xNamePad, this), correlationStatus, this.SHA1Hash.Substring(0, 8));
  6548. AppendAncestorsUnidsDump(sb, this);
  6549. }
  6550. return sb.ToString();
  6551. }
  6552. public override string ToString()
  6553. {
  6554. return ToString(0);
  6555. }
  6556. public string ToStringAncestorUnids()
  6557. {
  6558. return ToStringAncestorUnids(0);
  6559. }
  6560. private static string PadLocalName(int xNamePad, ComparisonUnitAtom item)
  6561. {
  6562. return (item.ContentElement.Name.LocalName + " ").PadRight(xNamePad, '-') + " ";
  6563. }
  6564. private void AppendAncestorsDump(StringBuilder sb, ComparisonUnitAtom sr)
  6565. {
  6566. var s = sr.AncestorElements.Select(p => p.Name.LocalName + GetUnid(p) + "/").StringConcatenate().TrimEnd('/');
  6567. sb.Append("Ancestors:" + s);
  6568. }
  6569. private void AppendAncestorsUnidsDump(StringBuilder sb, ComparisonUnitAtom sr)
  6570. {
  6571. var zipped = sr.AncestorElements.Zip(sr.AncestorUnids, (a, u) => new
  6572. {
  6573. AncestorElement = a,
  6574. AncestorUnid = u,
  6575. });
  6576. var s = zipped.Select(p => p.AncestorElement.Name.LocalName + "[" + p.AncestorUnid.Substring(0, 8) + "]/").StringConcatenate().TrimEnd('/');
  6577. sb.Append("Ancestors:" + s);
  6578. }
  6579. private string GetUnid(XElement p)
  6580. {
  6581. var unid = (string)p.Attribute(PtOpenXml.Unid);
  6582. if (unid == null)
  6583. return "";
  6584. return "[" + unid.Substring(0, 8) + "]";
  6585. }
  6586. public static string ComparisonUnitAtomListToString(List<ComparisonUnitAtom> comparisonUnitAtomList, int indent)
  6587. {
  6588. StringBuilder sb = new StringBuilder();
  6589. var cal = comparisonUnitAtomList
  6590. .Select((ca, i) => new
  6591. {
  6592. ComparisonUnitAtom = ca,
  6593. Index = i,
  6594. });
  6595. foreach (var item in cal)
  6596. sb.Append("".PadRight(indent))
  6597. .AppendFormat("[{0:000000}] ", item.Index + 1)
  6598. .Append(item.ComparisonUnitAtom.ToString(0) + Environment.NewLine);
  6599. return sb.ToString();
  6600. }
  6601. }
  6602. internal enum ComparisonUnitGroupType
  6603. {
  6604. Paragraph,
  6605. Table,
  6606. Row,
  6607. Cell,
  6608. Textbox,
  6609. };
  6610. internal class ComparisonUnitGroup : ComparisonUnit
  6611. {
  6612. public ComparisonUnitGroupType ComparisonUnitGroupType;
  6613. public string CorrelatedSHA1Hash;
  6614. public string StructureSHA1Hash;
  6615. public ComparisonUnitGroup(IEnumerable<ComparisonUnit> comparisonUnitList, ComparisonUnitGroupType groupType, int level)
  6616. {
  6617. Contents = comparisonUnitList.ToList();
  6618. ComparisonUnitGroupType = groupType;
  6619. var first = comparisonUnitList.First();
  6620. ComparisonUnitAtom comparisonUnitAtom = GetFirstComparisonUnitAtomOfGroup(first);
  6621. XName ancestorName = null;
  6622. if (groupType == ComparisonUnitGroupType.Table)
  6623. ancestorName = W.tbl;
  6624. else if (groupType == ComparisonUnitGroupType.Row)
  6625. ancestorName = W.tr;
  6626. else if (groupType == ComparisonUnitGroupType.Cell)
  6627. ancestorName = W.tc;
  6628. else if (groupType == ComparisonUnitGroupType.Paragraph)
  6629. ancestorName = W.p;
  6630. else if (groupType == ComparisonUnitGroupType.Textbox)
  6631. ancestorName = W.txbxContent;
  6632. var ancestorsToLookAt = comparisonUnitAtom.AncestorElements.Where(ae => ae.Name == W.tbl || ae.Name == W.tr || ae.Name == W.tc || ae.Name == W.p || ae.Name == W.txbxContent).ToArray(); ;
  6633. var ancestor = ancestorsToLookAt[level];
  6634. if (ancestor == null)
  6635. throw new OpenXmlPowerToolsException("Internal error: ComparisonUnitGroup");
  6636. SHA1Hash = (string)ancestor.Attribute(PtOpenXml.SHA1Hash);
  6637. CorrelatedSHA1Hash = (string)ancestor.Attribute(PtOpenXml.CorrelatedSHA1Hash);
  6638. StructureSHA1Hash = (string)ancestor.Attribute(PtOpenXml.StructureSHA1Hash);
  6639. }
  6640. public static ComparisonUnitAtom GetFirstComparisonUnitAtomOfGroup(ComparisonUnit group)
  6641. {
  6642. var thisGroup = group;
  6643. while (true)
  6644. {
  6645. var tg = thisGroup as ComparisonUnitGroup;
  6646. if (tg != null)
  6647. {
  6648. thisGroup = tg.Contents.First();
  6649. continue;
  6650. }
  6651. var tw = thisGroup as ComparisonUnitWord;
  6652. if (tw == null)
  6653. throw new OpenXmlPowerToolsException("Internal error: GetFirstComparisonUnitAtomOfGroup");
  6654. var ca = (ComparisonUnitAtom)tw.Contents.First();
  6655. return ca;
  6656. }
  6657. }
  6658. public override string ToString(int indent)
  6659. {
  6660. var sb = new StringBuilder();
  6661. sb.Append("".PadRight(indent) + "Group Type: " + ComparisonUnitGroupType.ToString() + " SHA1:" + SHA1Hash + Environment.NewLine);
  6662. foreach (var comparisonUnitAtom in Contents)
  6663. sb.Append(comparisonUnitAtom.ToString(indent + 2));
  6664. return sb.ToString();
  6665. }
  6666. }
  6667. public enum CorrelationStatus
  6668. {
  6669. Nil,
  6670. Normal,
  6671. Unknown,
  6672. Inserted,
  6673. Deleted,
  6674. Equal,
  6675. Group,
  6676. }
  6677. class PartSHA1HashAnnotation
  6678. {
  6679. public string Hash;
  6680. public PartSHA1HashAnnotation(string hash)
  6681. {
  6682. Hash = hash;
  6683. }
  6684. }
  6685. class CorrelatedSequence
  6686. {
  6687. public CorrelationStatus CorrelationStatus;
  6688. // if ComparisonUnitList1 == null and ComparisonUnitList2 contains sequence, then inserted content.
  6689. // if ComparisonUnitList2 == null and ComparisonUnitList1 contains sequence, then deleted content.
  6690. // if ComparisonUnitList2 contains sequence and ComparisonUnitList1 contains sequence, then either is Unknown or Equal.
  6691. public ComparisonUnit[] ComparisonUnitArray1;
  6692. public ComparisonUnit[] ComparisonUnitArray2;
  6693. #if DEBUG
  6694. public string SourceFile;
  6695. public int SourceLine;
  6696. #endif
  6697. public CorrelatedSequence()
  6698. {
  6699. #if DEBUG
  6700. SourceFile = new System.Diagnostics.StackTrace(true).GetFrame(1).GetFileName();
  6701. SourceLine = new System.Diagnostics.StackTrace(true).GetFrame(1).GetFileLineNumber();
  6702. #endif
  6703. }
  6704. public override string ToString()
  6705. {
  6706. var sb = new StringBuilder();
  6707. var indentString = " ";
  6708. var indentString4 = " ";
  6709. sb.Append("CorrelatedSequence =====" + Environment.NewLine);
  6710. #if DEBUG
  6711. sb.Append(indentString + "Created at Line: " + SourceLine.ToString() + Environment.NewLine);
  6712. #endif
  6713. sb.Append(indentString + "CorrelatedItem =====" + Environment.NewLine);
  6714. sb.Append(indentString4 + "CorrelationStatus: " + CorrelationStatus.ToString() + Environment.NewLine);
  6715. if (CorrelationStatus == CorrelationStatus.Equal)
  6716. {
  6717. sb.Append(indentString4 + "ComparisonUnitList =====" + Environment.NewLine);
  6718. foreach (var item in ComparisonUnitArray2)
  6719. sb.Append(item.ToString(6) + Environment.NewLine);
  6720. }
  6721. else
  6722. {
  6723. if (ComparisonUnitArray1 != null)
  6724. {
  6725. sb.Append(indentString4 + "ComparisonUnitList1 =====" + Environment.NewLine);
  6726. foreach (var item in ComparisonUnitArray1)
  6727. sb.Append(item.ToString(6) + Environment.NewLine);
  6728. }
  6729. if (ComparisonUnitArray2 != null)
  6730. {
  6731. sb.Append(indentString4 + "ComparisonUnitList2 =====" + Environment.NewLine);
  6732. foreach (var item in ComparisonUnitArray2)
  6733. sb.Append(item.ToString(6) + Environment.NewLine);
  6734. }
  6735. }
  6736. return sb.ToString();
  6737. }
  6738. }
  6739. }