WmlComparer.cs 371 KB


  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. // TODO Line 1202 there are inefficient calls to PutXDocument() for footnotes and endnotes
  4. // TODO wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument();
  5. // TODO Take care of this after the conference
  6. using System;
  7. using System.Collections.Generic;
  8. using System.Linq;
  9. using System.Globalization;
  10. using System.IO;
  11. using System.IO.Packaging;
  12. using System.Text;
  13. using System.Xml.Linq;
  14. using DocumentFormat.OpenXml.Packaging;
  15. using System.Drawing;
  16. using System.Security.Cryptography;
  17. using OpenXmlPowerTools;
  18. // It is possible to optimize DescendantContentAtoms
  19. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  20. /// Currently, the unid is set at the beginning of the algorithm. It is used by the code that establishes correlation based on first rejecting
  21. /// tracked revisions, then correlating paragraphs/tables. It is requred for this algorithm - after finding a correlated sequence in the document with rejected
  22. /// revisions, it uses the unid to find the same paragraph in the document without rejected revisions, then sets the correlated sha1 hash in that document.
  23. ///
  24. /// But then when accepting tracked revisions, for certain paragraphs (where there are deleted paragraph marks) it is going to lose the unids. But this isn't a
  25. /// problem because when paragraph marks are deleted, the correlation is definitely no longer possible. Any paragraphs that are in a range of paragraphs that
  26. /// are coalesced can't be correlated to paragraphs in the other document via their hash. At that point we no longer care what their unids are.
  27. ///
  28. /// But after that it is only used to reconstruct the tree. It is also used in the debugging code that
  29. /// prints the various correlated sequences and comparison units - this is display for debugging purposes only.
  30. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  31. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  32. /// The key idea here is that a given paragraph will always have the same ancestors, and it doesn't matter whether the content was deleted from the old document,
  33. /// inserted into the new document, or set as equal. At this point, we identify a paragraph as a sequential list of content atoms, terminated by a paragraph mark.
  34. /// This entire list will for a single paragraph, regardless of whether the paragraph is a child of the body, or if the paragraph is in a cell in a table, or if
  35. /// the paragraph is in a text box. The list of ancestors, from the paragraph to the root of the XML tree will be the same for all content atoms in the paragraph.
  36. ///
  37. /// Therefore:
  38. ///
  39. /// Iterate through the list of content atoms backwards. When the loop sees a paragraph mark, it gets the ancestor unids from the paragraph mark to the top of the
  40. /// tree, and sets this as the same for all content atoms in the paragraph. For descendants of the paragraph mark, it doesn't really matter if content is put into
  41. /// separate runs or what not. We don't need to be concerned about what the unids are for descendants of the paragraph.
  42. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  43. namespace OpenXmlPowerTools.Previous
  44. {
  45. public class WmlComparerSettings
  46. {
  47. public char[] WordSeparators;
  48. public string AuthorForRevisions = "Open-Xml-PowerTools";
  49. public string DateTimeForRevisions = DateTime.Now.ToString("o");
  50. public double DetailThreshold = 0.15;
  51. public bool CaseInsensitive = false;
  52. public bool ConflateBreakingAndNonbreakingSpaces = true;
  53. public CultureInfo CultureInfo = null;
  54. public Action<string> LogCallback = null;
  55. public int StartingIdForFootnotesEndnotes = 1;
  56. public DirectoryInfo DebugTempFileDi;
  57. public WmlComparerSettings()
  58. {
  59. // note that , and . are processed explicitly to handle cases where they are in a number or word
  60. WordSeparators = new[] { ' ', '-', ')', '(', ';', ',', '(', ')', ',', '、', '、', ',', ';', '。', ':', '的', }; // todo need to fix this for complete list
  61. }
  62. }
  63. public class WmlComparerConsolidateSettings
  64. {
  65. public bool ConsolidateWithTable = true;
  66. }
  67. public class WmlRevisedDocumentInfo
  68. {
  69. public WmlDocument RevisedDocument;
  70. public string Revisor;
  71. public Color Color;
  72. }
  73. public static class WmlComparer
  74. {
  75. public static bool s_False = false;
  76. public static bool s_True = true;
  77. public static bool s_SaveIntermediateFilesForDebugging = false;
  78. public static WmlDocument Compare(WmlDocument source1, WmlDocument source2, WmlComparerSettings settings)
  79. {
  80. return CompareInternal(source1, source2, settings, true);
  81. }
  82. private static WmlDocument CompareInternal(WmlDocument source1, WmlDocument source2, WmlComparerSettings settings,
  83. bool preProcessMarkupInOriginal)
  84. {
  85. if (preProcessMarkupInOriginal)
  86. source1 = PreProcessMarkup(source1, settings.StartingIdForFootnotesEndnotes + 1000);
  87. source2 = PreProcessMarkup(source2, settings.StartingIdForFootnotesEndnotes + 2000);
  88. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  89. {
  90. var name1 = "Source1-Step1-PreProcess.docx";
  91. var name2 = "Source2-Step1-PreProcess.docx";
  92. var preProcFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  93. source1.SaveAs(preProcFi1.FullName);
  94. var preProcFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  95. source2.SaveAs(preProcFi2.FullName);
  96. }
  97. // at this point, both source1 and source2 have unid on every element. These are the values that will enable reassembly of the XML tree.
  98. // but we need other values.
  99. // In source1:
  100. // - accept tracked revisions
  101. // - determine hash code for every block-level element
  102. // - save as attribute on every element
  103. // - accept tracked revisions and reject tracked revisions leave the unids alone, where possible.
  104. // - after accepting and calculating the hash, then can use the unids to find the right block-level element in the unmodified source1, and install the hash
  105. // In source2:
  106. // - reject tracked revisions
  107. // - determine hash code for every block-level element
  108. // - save as an attribute on every element
  109. // - after rejecting and calculating the hash, then can use the unids to find the right block-level element in the unmodified source2, and install the hash
  110. // - sometimes after accepting or rejecting tracked revisions, several paragraphs will get coalesced into a single paragraph due to paragraph marks being inserted / deleted.
  111. // - in this case, some paragraphs will not get a hash injected onto them.
  112. // - if a paragraph doesn't have a hash, then it will never correspond to another paragraph, and such issues will need to be resolved in the normal execution of the LCS algorithm.
  113. // - note that when we do propagate the unid through for the first paragraph.
  114. // Establish correlation between the two.
  115. // Find the longest common sequence of block-level elements where hash codes are the same.
  116. // this sometimes will be every block level element in the document. Or sometimes will be just a fair number of them.
  117. // at the start of doing the LCS algorithm, we will match up content, and put them in corresponding unknown correlated comparison units. Those paragraphs will only ever be matched to their corresponding paragraph.
  118. // then the algorithm can proceed as usual.
  119. // need to call ChangeFootnoteEndnoteReferencesToUniqueRange before creating the wmlResult document, so that
  120. // the same GUID ids are used for footnote and endnote references in both the 'after' document, and in the
  121. // result document.
  122. var source1afterAccepting = RevisionProcessor.AcceptRevisions(source1);
  123. var source2afterRejecting = RevisionProcessor.RejectRevisions(source2);
  124. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  125. {
  126. var name1 = "Source1-Step2-AfterAccepting.docx";
  127. var name2 = "Source2-Step2-AfterRejecting.docx";
  128. var afterAcceptingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  129. source1afterAccepting.SaveAs(afterAcceptingFi1.FullName);
  130. var afterRejectingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  131. source2afterRejecting.SaveAs(afterRejectingFi2.FullName);
  132. }
  133. // this creates the correlated hash codes that enable us to match up ranges of paragraphs based on
  134. // accepting in source1, rejecting in source2
  135. source1 = HashBlockLevelContent(source1, source1afterAccepting, settings);
  136. source2 = HashBlockLevelContent(source2, source2afterRejecting, settings);
  137. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  138. {
  139. var name1 = "Source1-Step3-AfterHashing.docx";
  140. var name2 = "Source2-Step3-AfterHashing.docx";
  141. var afterHashingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  142. source1.SaveAs(afterHashingFi1.FullName);
  143. var afterHashingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  144. source2.SaveAs(afterHashingFi2.FullName);
  145. }
  146. // Accept revisions in before, and after
  147. source1 = RevisionProcessor.AcceptRevisions(source1);
  148. source2 = RevisionProcessor.AcceptRevisions(source2);
  149. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  150. {
  151. var name1 = "Source1-Step4-AfterAccepting.docx";
  152. var name2 = "Source2-Step4-AfterAccepting.docx";
  153. var afterAcceptingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  154. source1.SaveAs(afterAcceptingFi1.FullName);
  155. var afterAcceptingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  156. source2.SaveAs(afterAcceptingFi2.FullName);
  157. }
  158. // after accepting revisions, some unids may have been removed by revision accepter, along with the correlatedSHA1Hash codes,
  159. // this is as it should be.
  160. // but need to go back in and add guids to paragraphs that have had them removed.
  161. using (MemoryStream ms = new MemoryStream())
  162. {
  163. ms.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length);
  164. using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true))
  165. {
  166. AddUnidsToMarkupInContentParts(wDoc);
  167. }
  168. }
  169. WmlDocument wmlResult = new WmlDocument(source1);
  170. using (MemoryStream ms1 = new MemoryStream())
  171. using (MemoryStream ms2 = new MemoryStream())
  172. {
  173. ms1.Write(source1.DocumentByteArray, 0, source1.DocumentByteArray.Length);
  174. ms2.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length);
  175. WmlDocument producedDocument;
  176. using (WordprocessingDocument wDoc1 = WordprocessingDocument.Open(ms1, true))
  177. using (WordprocessingDocument wDoc2 = WordprocessingDocument.Open(ms2, true))
  178. {
  179. producedDocument = ProduceDocumentWithTrackedRevisions(settings, wmlResult, wDoc1, wDoc2);
  180. }
  181. if (s_False && settings.DebugTempFileDi != null)
  182. {
  183. var name1 = "Source1-Step5-AfterProducingDocWithRevTrk.docx";
  184. var name2 = "Source2-Step5-AfterProducingDocWithRevTrk.docx";
  185. var afterProducingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  186. var afterProducingWml1 = new WmlDocument("after1.docx", ms1.ToArray());
  187. afterProducingWml1.SaveAs(afterProducingFi1.FullName);
  188. var afterProducingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  189. var afterProducingWml2 = new WmlDocument("after2.docx", ms2.ToArray());
  190. afterProducingWml2.SaveAs(afterProducingFi2.FullName);
  191. }
  192. if (s_False && settings.DebugTempFileDi != null)
  193. {
  194. var cleanedSource = CleanPowerToolsAndRsid(source1);
  195. var name1 = "Cleaned-Source.docx";
  196. var cleanedSourceFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  197. cleanedSource.SaveAs(cleanedSourceFi1.FullName);
  198. var cleanedProduced = CleanPowerToolsAndRsid(producedDocument);
  199. var name2 = "Cleaned-Produced.docx";
  200. var cleanedProducedFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2));
  201. cleanedProduced.SaveAs(cleanedProducedFi1.FullName);
  202. }
  203. return producedDocument;
  204. }
  205. }
  206. private static WmlDocument CleanPowerToolsAndRsid(WmlDocument producedDocument)
  207. {
  208. using (MemoryStream ms = new MemoryStream())
  209. {
  210. ms.Write(producedDocument.DocumentByteArray, 0, producedDocument.DocumentByteArray.Length);
  211. using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true))
  212. {
  213. foreach (var cp in wDoc.ContentParts())
  214. {
  215. var xd = cp.GetXDocument();
  216. var newRoot = CleanPartTransform(xd.Root);
  217. xd.Root.ReplaceWith(newRoot);
  218. cp.PutXDocument();
  219. }
  220. }
  221. var cleaned = new WmlDocument("cleaned.docx", ms.ToArray());
  222. return cleaned;
  223. }
  224. }
  225. private static WmlDocument HashBlockLevelContent(WmlDocument source, WmlDocument source1afterProcessingRevTracking, WmlComparerSettings settings)
  226. {
  227. using (MemoryStream msSource = new MemoryStream())
  228. using (MemoryStream msAfterProc = new MemoryStream())
  229. {
  230. msSource.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
  231. msAfterProc.Write(source1afterProcessingRevTracking.DocumentByteArray, 0, source1afterProcessingRevTracking.DocumentByteArray.Length);
  232. using (WordprocessingDocument wDocSource = WordprocessingDocument.Open(msSource, true))
  233. using (WordprocessingDocument wDocAfterProc = WordprocessingDocument.Open(msAfterProc, true))
  234. {
  235. // create Unid dictionary for source
  236. var sourceMainXDoc = wDocSource
  237. .MainDocumentPart
  238. .GetXDocument();
  239. var sourceUnidDict = sourceMainXDoc
  240. .Root
  241. .Descendants()
  242. .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr)
  243. .ToDictionary(d => (string)d.Attribute(PtOpenXml.Unid));
  244. var afterProcMainXDoc = wDocAfterProc
  245. .MainDocumentPart
  246. .GetXDocument();
  247. foreach (var blockLevelContent in afterProcMainXDoc.Root.Descendants().Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr))
  248. {
  249. var cloneBlockLevelContentForHashing = (XElement)CloneBlockLevelContentForHashing(wDocAfterProc.MainDocumentPart, blockLevelContent, true, settings);
  250. var shaString = cloneBlockLevelContentForHashing.ToString(SaveOptions.DisableFormatting)
  251. .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", "");
  252. var sha1Hash = PtUtils.SHA1HashStringForUTF8String(shaString);
  253. var thisUnid = (string)blockLevelContent.Attribute(PtOpenXml.Unid);
  254. if (thisUnid != null)
  255. {
  256. if (sourceUnidDict.ContainsKey(thisUnid))
  257. {
  258. var correlatedBlockLevelContent = sourceUnidDict[thisUnid];
  259. correlatedBlockLevelContent.Add(new XAttribute(PtOpenXml.CorrelatedSHA1Hash, sha1Hash));
  260. }
  261. }
  262. }
  263. wDocSource.MainDocumentPart.PutXDocument();
  264. }
  265. WmlDocument sourceWithCorrelatedSHA1Hash = new WmlDocument(source.FileName, msSource.ToArray());
  266. return sourceWithCorrelatedSHA1Hash;
  267. }
  268. }
  269. private static WmlDocument PreProcessMarkup(WmlDocument source, int startingIdForFootnotesEndnotes)
  270. {
  271. // open and close to get rid of MC content
  272. using (MemoryStream ms = new MemoryStream())
  273. {
  274. ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
  275. OpenSettings os = new OpenSettings();
  276. os.MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings(MarkupCompatibilityProcessMode.ProcessAllParts,
  277. DocumentFormat.OpenXml.FileFormatVersions.Office2007);
  278. using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os))
  279. {
  280. var doc = wDoc.MainDocumentPart.RootElement;
  281. if (wDoc.MainDocumentPart.FootnotesPart != null)
  282. {
  283. // contrary to what you might think, looking at the API, it is necessary to access the root element of each part to cause
  284. // the SDK to process MC markup.
  285. var fn = wDoc.MainDocumentPart.FootnotesPart.RootElement;
  286. }
  287. if (wDoc.MainDocumentPart.EndnotesPart != null)
  288. {
  289. var en = wDoc.MainDocumentPart.EndnotesPart.RootElement;
  290. }
  291. }
  292. source = new WmlDocument(source.FileName, ms.ToArray());
  293. }
  294. // open and close to get rid of MC content
  295. using (MemoryStream ms = new MemoryStream())
  296. {
  297. ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
  298. OpenSettings os = new OpenSettings();
  299. os.MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings(MarkupCompatibilityProcessMode.ProcessAllParts,
  300. DocumentFormat.OpenXml.FileFormatVersions.Office2007);
  301. using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os))
  302. {
  303. TestForInvalidContent(wDoc);
  304. RemoveExistingPowerToolsMarkup(wDoc);
  305. SimplifyMarkupSettings msSettings = new SimplifyMarkupSettings()
  306. {
  307. RemoveBookmarks = true,
  308. AcceptRevisions = false,
  309. RemoveComments = true,
  310. RemoveContentControls = true,
  311. RemoveFieldCodes = true,
  312. RemoveGoBackBookmark = true,
  313. RemoveLastRenderedPageBreak = true,
  314. RemovePermissions = true,
  315. RemoveProof = true,
  316. RemoveSmartTags = true,
  317. RemoveSoftHyphens = true,
  318. RemoveHyperlinks = true,
  319. };
  320. MarkupSimplifier.SimplifyMarkup(wDoc, msSettings);
  321. ChangeFootnoteEndnoteReferencesToUniqueRange(wDoc, startingIdForFootnotesEndnotes);
  322. AddUnidsToMarkupInContentParts(wDoc);
  323. AddFootnotesEndnotesParts(wDoc);
  324. FillInEmptyFootnotesEndnotes(wDoc);
  325. DetachExternalData(wDoc);
  326. }
  327. return new WmlDocument(source.FileName, ms.ToArray());
  328. }
  329. }
  330. private static void DetachExternalData(WordprocessingDocument wDoc)
  331. {
  332. // External data for chart parts contains relationships to external links, which are not properly propagated to the destination document (There is little point to doing so.)
  333. // Therefore remove them.
  334. foreach (var chart in wDoc.MainDocumentPart.ChartParts)
  335. {
  336. var cxd = chart.GetXDocument();
  337. cxd.Descendants(C.externalData).Remove();
  338. chart.PutXDocument();
  339. }
  340. }
  341. // somehow, sometimes a footnote or endnote contains absolutely nothing - no paragraph - nothing.
  342. // This messes up the algorithm, so in this case, insert an empty paragraph.
  343. // This is pretty wacky markup to find, and I don't know how this markup comes into existence, but this is an innocuous fix.
  344. private static void FillInEmptyFootnotesEndnotes(WordprocessingDocument wDoc)
  345. {
  346. XElement emptyFootnote = XElement.Parse(
  347. @"<w:p xmlns:w='http://schemas.openxmlformats.org/wordprocessingml/2006/main'>
  348. <w:pPr>
  349. <w:pStyle w:val='FootnoteText'/>
  350. </w:pPr>
  351. <w:r>
  352. <w:rPr>
  353. <w:rStyle w:val='FootnoteReference'/>
  354. </w:rPr>
  355. <w:footnoteRef/>
  356. </w:r>
  357. </w:p>");
  358. XElement emptyEndnote = XElement.Parse(
  359. @"<w:p xmlns:w='http://schemas.openxmlformats.org/wordprocessingml/2006/main'>
  360. <w:pPr>
  361. <w:pStyle w:val='EndnoteText'/>
  362. </w:pPr>
  363. <w:r>
  364. <w:rPr>
  365. <w:rStyle w:val='EndnoteReference'/>
  366. </w:rPr>
  367. <w:endnoteRef/>
  368. </w:r>
  369. </w:p>");
  370. var footnotePart = wDoc.MainDocumentPart.FootnotesPart;
  371. if (footnotePart != null)
  372. {
  373. var fnXDoc = footnotePart.GetXDocument();
  374. foreach (var fn in fnXDoc.Root.Elements(W.footnote))
  375. {
  376. if (!fn.HasElements)
  377. fn.Add(emptyFootnote);
  378. }
  379. footnotePart.PutXDocument();
  380. }
  381. var endnotePart = wDoc.MainDocumentPart.EndnotesPart;
  382. if (endnotePart != null)
  383. {
  384. var fnXDoc = endnotePart.GetXDocument();
  385. foreach (var fn in fnXDoc.Root.Elements(W.endnote))
  386. {
  387. if (!fn.HasElements)
  388. fn.Add(emptyEndnote);
  389. }
  390. endnotePart.PutXDocument();
  391. }
  392. }
  393. private static bool ContentContainsFootnoteEndnoteReferencesThatHaveRevisions(XElement element, WordprocessingDocument wDocDelta)
  394. {
  395. var footnoteEndnoteReferences = element.Descendants().Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference);
  396. if (!footnoteEndnoteReferences.Any())
  397. return false;
  398. var footnoteXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument();
  399. var endnoteXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument();
  400. foreach (var note in footnoteEndnoteReferences)
  401. {
  402. XElement fnen = null;
  403. if (note.Name == W.footnoteReference)
  404. {
  405. var id = (int)note.Attribute(W.id);
  406. fnen = footnoteXDoc
  407. .Root
  408. .Elements(W.footnote)
  409. .FirstOrDefault(n => (int)n.Attribute(W.id) == id);
  410. if (fnen.Descendants().Where(d => d.Name == W.ins || d.Name == W.del).Any())
  411. return true;
  412. }
  413. if (note.Name == W.endnoteReference)
  414. {
  415. var id = (int)note.Attribute(W.id);
  416. fnen = endnoteXDoc
  417. .Root
  418. .Elements(W.endnote)
  419. .FirstOrDefault(n => (int)n.Attribute(W.id) == id);
  420. if (fnen.Descendants().Where(d => d.Name == W.ins || d.Name == W.del).Any())
  421. return true;
  422. }
  423. }
  424. return false;
  425. }
  426. private static void AddUnidsToMarkupInContentParts(WordprocessingDocument wDoc)
  427. {
  428. var mdp = wDoc.MainDocumentPart.GetXDocument();
  429. AssignUnidToAllElements(mdp.Root);
  430. IgnorePt14Namespace(mdp.Root);
  431. wDoc.MainDocumentPart.PutXDocument();
  432. if (wDoc.MainDocumentPart.FootnotesPart != null)
  433. {
  434. var p = wDoc.MainDocumentPart.FootnotesPart.GetXDocument();
  435. AssignUnidToAllElements(p.Root);
  436. IgnorePt14Namespace(p.Root);
  437. wDoc.MainDocumentPart.FootnotesPart.PutXDocument();
  438. }
  439. if (wDoc.MainDocumentPart.EndnotesPart != null)
  440. {
  441. var p = wDoc.MainDocumentPart.EndnotesPart.GetXDocument();
  442. AssignUnidToAllElements(p.Root);
  443. IgnorePt14Namespace(p.Root);
  444. wDoc.MainDocumentPart.EndnotesPart.PutXDocument();
  445. }
  446. }
  447. private class ConsolidationInfo
  448. {
  449. public string Revisor;
  450. public Color Color;
  451. public XElement RevisionElement;
  452. public bool InsertBefore = false;
  453. public string RevisionHash;
  454. public XElement[] Footnotes;
  455. public XElement[] Endnotes;
  456. public string RevisionString; // for debugging purposes only
  457. }
  458. private static string nl = Environment.NewLine;
  459. /*****************************************************************************************************************/
  460. // Consolidate processes footnotes and endnotes in a particular fashion - if the unmodified document has a footnote
  461. // reference, and a delta has a footnote reference, we end up with two footnotes - one is unmodified, and is refered to
  462. // from the unmodified content. The footnote reference in the delta refers to the modified footnote. This is as it
  463. // should be.
  464. /*****************************************************************************************************************/
  465. public static WmlDocument Consolidate(WmlDocument original,
  466. List<WmlRevisedDocumentInfo> revisedDocumentInfoList,
  467. WmlComparerSettings settings)
  468. {
  469. var consolidateSettings = new WmlComparerConsolidateSettings();
  470. return Consolidate(original, revisedDocumentInfoList, settings, consolidateSettings);
  471. }
  472. public static WmlDocument Consolidate(WmlDocument original,
  473. List<WmlRevisedDocumentInfo> revisedDocumentInfoList,
  474. WmlComparerSettings settings, WmlComparerConsolidateSettings consolidateSettings)
  475. {
  476. #if false
  477. var now = DateTime.Now;
  478. var tempName = String.Format("{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", now.Year - 2000, now.Month, now.Day, now.Hour, now.Minute, now.Second);
  479. FileInfo fi = new FileInfo("./WmlComparer.Consolidate-" + tempName + "-Original.docx");
  480. File.WriteAllBytes(fi.FullName, original.DocumentByteArray);
  481. for (int i = 0; i < revisedDocumentInfoList.Count(); i++)
  482. {
  483. fi = new FileInfo("./WmlComparer.Consolidate-" + tempName + string.Format("-Revised-{0}", i) + ".docx");
  484. File.WriteAllBytes(fi.FullName, revisedDocumentInfoList.ElementAt(i).RevisedDocument.DocumentByteArray);
  485. }
  486. StringBuilder sbt = new StringBuilder();
  487. int count = 0;
  488. foreach (var rev in revisedDocumentInfoList)
  489. {
  490. sbt.Append("Revised #" + (count++).ToString() + Environment.NewLine);
  491. sbt.Append("Color:" + rev.Color.ToString() + Environment.NewLine);
  492. sbt.Append("Revisor:" + rev.Revisor + Environment.NewLine);
  493. sbt.Append("" + Environment.NewLine);
  494. }
  495. sbt.Append("settings.AuthorForRevisions:" + settings.AuthorForRevisions + Environment.NewLine);
  496. sbt.Append("settings.CaseInsensitive:" + settings.CaseInsensitive.ToString() + Environment.NewLine);
  497. sbt.Append("settings.CultureInfo:" + settings.CultureInfo.ToString() + Environment.NewLine);
  498. sbt.Append("settings.DateTimeForRevisions:" + settings.DateTimeForRevisions.ToString() + Environment.NewLine);
  499. sbt.Append("settings.DetailThreshold:" + settings.DetailThreshold.ToString() + Environment.NewLine);
  500. sbt.Append("settings.StartingIdForFootnotesEndnotes:" + settings.StartingIdForFootnotesEndnotes.ToString() + Environment.NewLine);
  501. sbt.Append("settings.WordSeparators:" + settings.WordSeparators.Select(ws => ws.ToString()).StringConcatenate() + Environment.NewLine);
  502. //sb.Append(":" + settings);
  503. fi = new FileInfo("./WmlComparer.Consolidate-" + tempName + "-Settings.txt");
  504. File.WriteAllText(fi.FullName, sbt.ToString());
  505. #endif
  506. // pre-process the original, so that it already has unids for all elements
  507. // then when comparing all documents to the original, each one will have the unid as appropriate
  508. // for all revision block-level content
  509. // set unid to look for
  510. // while true
  511. // determine where to insert
  512. // get the unid for the revision
  513. // look it up in the original. if find it, then insert after that element
  514. // if not in the original
  515. // look backwards in revised document, set unid to look for, do the loop again
  516. // if get to the beginning of the document
  517. // insert at beginning of document
  518. settings.StartingIdForFootnotesEndnotes = 3000;
  519. var originalWithUnids = PreProcessMarkup(original, settings.StartingIdForFootnotesEndnotes);
  520. WmlDocument consolidated = new WmlDocument(originalWithUnids);
  521. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  522. {
  523. var name1 = "Original-with-Unids.docx";
  524. var preProcFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  525. originalWithUnids.SaveAs(preProcFi1.FullName);
  526. }
  527. var revisedDocumentInfoListCount = revisedDocumentInfoList.Count();
  528. using (MemoryStream consolidatedMs = new MemoryStream())
  529. {
  530. consolidatedMs.Write(consolidated.DocumentByteArray, 0, consolidated.DocumentByteArray.Length);
  531. using (WordprocessingDocument consolidatedWDoc = WordprocessingDocument.Open(consolidatedMs, true))
  532. {
  533. var consolidatedMainDocPart = consolidatedWDoc.MainDocumentPart;
  534. var consolidatedMainDocPartXDoc = consolidatedMainDocPart.GetXDocument();
  535. // save away last sectPr
  536. XElement savedSectPr = consolidatedMainDocPartXDoc
  537. .Root
  538. .Element(W.body)
  539. .Elements(W.sectPr)
  540. .LastOrDefault();
  541. consolidatedMainDocPartXDoc
  542. .Root
  543. .Element(W.body)
  544. .Elements(W.sectPr)
  545. .Remove();
  546. var consolidatedByUnid = consolidatedMainDocPartXDoc
  547. .Descendants()
  548. .Where(d => (d.Name == W.p || d.Name == W.tbl) && d.Attribute(PtOpenXml.Unid) != null)
  549. .ToDictionary(d => (string)d.Attribute(PtOpenXml.Unid));
  550. int deltaNbr = 1;
  551. foreach (var revisedDocumentInfo in revisedDocumentInfoList)
  552. {
  553. settings.StartingIdForFootnotesEndnotes = (deltaNbr * 2000) + 3000;
  554. var delta = WmlComparer.CompareInternal(originalWithUnids, revisedDocumentInfo.RevisedDocument, settings, false);
  555. if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
  556. {
  557. var name1 = string.Format("Delta-{0}.docx", deltaNbr++);
  558. var deltaFi = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1));
  559. delta.SaveAs(deltaFi.FullName);
  560. }
  561. var colorRgb = revisedDocumentInfo.Color.ToArgb();
  562. var colorString = colorRgb.ToString("X");
  563. if (colorString.Length == 8)
  564. colorString = colorString.Substring(2);
  565. using (MemoryStream msOriginalWithUnids = new MemoryStream())
  566. using (MemoryStream msDelta = new MemoryStream())
  567. {
  568. msOriginalWithUnids.Write(originalWithUnids.DocumentByteArray, 0, originalWithUnids.DocumentByteArray.Length);
  569. msDelta.Write(delta.DocumentByteArray, 0, delta.DocumentByteArray.Length);
  570. using (WordprocessingDocument wDocOriginalWithUnids = WordprocessingDocument.Open(msOriginalWithUnids, true))
  571. using (WordprocessingDocument wDocDelta = WordprocessingDocument.Open(msDelta, true))
  572. {
  573. var modMainDocPart = wDocDelta.MainDocumentPart;
  574. var modMainDocPartXDoc = modMainDocPart.GetXDocument();
  575. var blockLevelContentToMove = modMainDocPartXDoc
  576. .Root
  577. .DescendantsTrimmed(d => d.Name == W.txbxContent || d.Name == W.tr)
  578. .Where(d => d.Name == W.p || d.Name == W.tbl)
  579. .Where(d => d.Descendants().Any(z => z.Name == W.ins || z.Name == W.del) ||
  580. ContentContainsFootnoteEndnoteReferencesThatHaveRevisions(d, wDocDelta))
  581. .ToList();
  582. foreach (var revision in blockLevelContentToMove)
  583. {
  584. var elementLookingAt = revision;
  585. while (true)
  586. {
  587. var unid = (string)elementLookingAt.Attribute(PtOpenXml.Unid);
  588. if (unid == null)
  589. throw new OpenXmlPowerToolsException("Internal error");
  590. XElement elementToInsertAfter = null;
  591. if (consolidatedByUnid.ContainsKey(unid))
  592. elementToInsertAfter = consolidatedByUnid[unid];
  593. if (elementToInsertAfter != null)
  594. {
  595. ConsolidationInfo ci = new ConsolidationInfo();
  596. ci.Revisor = revisedDocumentInfo.Revisor;
  597. ci.Color = revisedDocumentInfo.Color;
  598. ci.RevisionElement = revision;
  599. ci.Footnotes = revision
  600. .Descendants(W.footnoteReference)
  601. .Select(fr =>
  602. {
  603. var id = (int)fr.Attribute(W.id);
  604. var fnXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument();
  605. var footnote = fnXDoc.Root.Elements(W.footnote).FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  606. if (footnote == null)
  607. throw new OpenXmlPowerToolsException("Internal Error");
  608. return footnote;
  609. })
  610. .ToArray();
  611. ci.Endnotes = revision
  612. .Descendants(W.endnoteReference)
  613. .Select(er =>
  614. {
  615. var id = (int)er.Attribute(W.id);
  616. var enXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument();
  617. var endnote = enXDoc.Root.Elements(W.endnote).FirstOrDefault(en => (int)en.Attribute(W.id) == id);
  618. if (endnote == null)
  619. throw new OpenXmlPowerToolsException("Internal Error");
  620. return endnote;
  621. })
  622. .ToArray();
  623. AddToAnnotation(
  624. wDocDelta,
  625. consolidatedWDoc,
  626. elementToInsertAfter,
  627. ci,
  628. settings);
  629. break;
  630. }
  631. else
  632. {
  633. // find an element to insert after
  634. var elementBeforeRevision = elementLookingAt
  635. .SiblingsBeforeSelfReverseDocumentOrder()
  636. .FirstOrDefault(e => e.Attribute(PtOpenXml.Unid) != null);
  637. if (elementBeforeRevision == null)
  638. {
  639. var firstElement = consolidatedMainDocPartXDoc
  640. .Root
  641. .Element(W.body)
  642. .Elements()
  643. .FirstOrDefault(e => e.Name == W.p || e.Name == W.tbl);
  644. ConsolidationInfo ci = new ConsolidationInfo();
  645. ci.Revisor = revisedDocumentInfo.Revisor;
  646. ci.Color = revisedDocumentInfo.Color;
  647. ci.RevisionElement = revision;
  648. ci.InsertBefore = true;
  649. ci.Footnotes = revision
  650. .Descendants(W.footnoteReference)
  651. .Select(fr =>
  652. {
  653. var id = (int)fr.Attribute(W.id);
  654. var fnXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument();
  655. var footnote = fnXDoc.Root.Elements(W.footnote).FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  656. if (footnote == null)
  657. throw new OpenXmlPowerToolsException("Internal Error");
  658. return footnote;
  659. })
  660. .ToArray();
  661. ci.Endnotes = revision
  662. .Descendants(W.endnoteReference)
  663. .Select(er =>
  664. {
  665. var id = (int)er.Attribute(W.id);
  666. var enXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument();
  667. var endnote = enXDoc.Root.Elements(W.endnote).FirstOrDefault(en => (int)en.Attribute(W.id) == id);
  668. if (endnote == null)
  669. throw new OpenXmlPowerToolsException("Internal Error");
  670. return endnote;
  671. })
  672. .ToArray();
  673. AddToAnnotation(
  674. wDocDelta,
  675. consolidatedWDoc,
  676. firstElement,
  677. ci,
  678. settings);
  679. break;
  680. }
  681. else
  682. {
  683. elementLookingAt = elementBeforeRevision;
  684. continue;
  685. }
  686. }
  687. }
  688. }
  689. CopyMissingStylesFromOneDocToAnother(wDocDelta, consolidatedWDoc);
  690. }
  691. }
  692. }
  693. // at this point, everything is added as an annotation, from all documents to be merged.
  694. // so now the process is to go through and add the annotations to the document
  695. var elementsToProcess = consolidatedMainDocPartXDoc
  696. .Root
  697. .Descendants()
  698. .Where(d => d.Annotation<List<ConsolidationInfo>>() != null)
  699. .ToList();
  700. var emptyParagraph = new XElement(W.p,
  701. new XElement(W.pPr,
  702. new XElement(W.spacing,
  703. new XAttribute(W.after, "0"),
  704. new XAttribute(W.line, "240"),
  705. new XAttribute(W.lineRule, "auto"))));
  706. foreach (var ele in elementsToProcess)
  707. {
  708. var lci = ele.Annotation<List<ConsolidationInfo>>();
  709. // process before
  710. var contentToAddBefore = lci
  711. .Where(ci => ci.InsertBefore == true)
  712. .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString())
  713. .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings));
  714. ele.AddBeforeSelf(contentToAddBefore);
  715. // process after
  716. // if all revisions from all revisors are exactly the same, then instead of adding multiple tables after
  717. // that contains the revisions, then simply replace the paragraph with the one with the revisions.
  718. // RC004 documents contain the test data to exercise this.
  719. var lciCount = lci.Where(ci => ci.InsertBefore == false).Count();
  720. if (lciCount > 1 && lciCount == revisedDocumentInfoListCount)
  721. {
  722. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  723. // This is the code that determines if revisions should be consolidated into one.
  724. var uniqueRevisions = lci
  725. .Where(ci => ci.InsertBefore == false)
  726. .GroupBy(ci =>
  727. {
  728. // Get a hash after first accepting revisions and compressing the text.
  729. var acceptedRevisionElement = RevisionProcessor.AcceptRevisionsForElement(ci.RevisionElement);
  730. var sha1Hash = PtUtils.SHA1HashStringForUTF8String(acceptedRevisionElement.Value.Replace(" ", "").Replace(" ", "").Replace(" ", "").Replace("\n", "").Replace(".", "").Replace(",", "").ToUpper());
  731. return sha1Hash;
  732. })
  733. .OrderByDescending(g => g.Count())
  734. .ToList();
  735. var uniqueRevisionCount = uniqueRevisions.Count();
  736. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  737. if (uniqueRevisionCount == 1)
  738. {
  739. MoveFootnotesEndnotesForConsolidatedRevisions(lci.First(), consolidatedWDoc);
  740. var dummyElement = new XElement("dummy", lci.First().RevisionElement);
  741. foreach (var rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null))
  742. {
  743. var aut = rev.Attribute(W.author);
  744. aut.Value = "ITU";
  745. }
  746. ele.ReplaceWith(dummyElement.Elements());
  747. continue;
  748. }
  749. // this is the location where we have determined that there are the same number of revisions for this paragraph as there are revision documents.
  750. // however, the hash for all of them were not the same.
  751. // therefore, they would be added to the consolidated document as separate revisions.
  752. // create a log that shows what is different, in detail.
  753. if (settings.LogCallback != null)
  754. {
  755. StringBuilder sb = new StringBuilder();
  756. sb.Append("====================================================================================================" + nl);
  757. sb.Append("Non-Consolidated Revision" + nl);
  758. sb.Append("====================================================================================================" + nl);
  759. foreach (var urList in uniqueRevisions)
  760. {
  761. var revisorList = urList.Select(ur => ur.Revisor + " : ").StringConcatenate().TrimEnd(' ', ':');
  762. sb.Append("Revisors: " + revisorList + nl);
  763. var str = RevisionToLogFormTransform(urList.First().RevisionElement, 0, false);
  764. sb.Append(str);
  765. sb.Append("=========================" + nl);
  766. }
  767. sb.Append(nl);
  768. settings.LogCallback(sb.ToString());
  769. }
  770. }
  771. // todo this is where it assembles the content to put into a single cell table
  772. // the magic function is AssembledConjoinedRevisionContent
  773. var contentToAddAfter = lci
  774. .Where(ci => ci.InsertBefore == false)
  775. .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString())
  776. .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings));
  777. ele.AddAfterSelf(contentToAddAfter);
  778. }
  779. #if false
  780. // old code
  781. foreach (var ele in elementsToProcess)
  782. {
  783. var lci = ele.Annotation<List<ConsolidationInfo>>();
  784. // if all revisions from all revisors are exactly the same, then instead of adding multiple tables after
  785. // that contains the revisions, then simply replace the paragraph with the one with the revisions.
  786. // RC004 documents contain the test data to exercise this.
  787. var lciCount = lci.Count();
  788. if (lci.Count() > 1 && lciCount == revisedDocumentInfoListCount)
  789. {
  790. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  791. // This is the code that determines if revisions should be consolidated into one.
  792. var uniqueRevisions = lci
  793. .GroupBy(ci =>
  794. {
  795. // Get a hash after first accepting revisions and compressing the text.
  796. var ciz = ci;
  797. var acceptedRevisionElement = RevisionProcessor.AcceptRevisionsForElement(ci.RevisionElement);
  798. var text = acceptedRevisionElement.Value
  799. .Replace(" ", "")
  800. .Replace(" ", "")
  801. .Replace(" ", "")
  802. .Replace("\n", "");
  803. var sha1Hash = PtUtils.SHA1HashStringForUTF8String(text);
  804. return ci.InsertBefore.ToString() + sha1Hash;
  805. })
  806. .OrderByDescending(g => g.Count())
  807. .ToList();
  808. var uniqueRevisionCount = uniqueRevisions.Count();
  809. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  810. if (uniqueRevisionCount == 1)
  811. {
  812. MoveFootnotesEndnotesForConsolidatedRevisions(lci.First(), consolidatedWDoc);
  813. var dummyElement = new XElement("dummy", lci.First().RevisionElement);
  814. foreach(var rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null))
  815. {
  816. var aut = rev.Attribute(W.author);
  817. aut.Value = "ITU";
  818. }
  819. ele.ReplaceWith(dummyElement.Elements());
  820. continue;
  821. }
  822. // this is the location where we have determined that there are the same number of revisions for this paragraph as there are revision documents.
  823. // however, the hash for all of them were not the same.
  824. // therefore, they would be added to the consolidated document as separate revisions.
  825. // create a log that shows what is different, in detail.
  826. if (settings.LogCallback != null)
  827. {
  828. StringBuilder sb = new StringBuilder();
  829. sb.Append("====================================================================================================" + nl);
  830. sb.Append("Non-Consolidated Revision" + nl);
  831. sb.Append("====================================================================================================" + nl);
  832. foreach (var urList in uniqueRevisions)
  833. {
  834. var revisorList = urList.Select(ur => ur.Revisor + " : ").StringConcatenate().TrimEnd(' ', ':');
  835. sb.Append("Revisors: " + revisorList + nl);
  836. var str = RevisionToLogFormTransform(urList.First().RevisionElement, 0, false);
  837. sb.Append(str);
  838. sb.Append("=========================" + nl);
  839. }
  840. sb.Append(nl);
  841. settings.LogCallback(sb.ToString());
  842. }
  843. }
  844. var contentToAddBefore = lci
  845. .Where(ci => ci.InsertBefore == true)
  846. .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString())
  847. .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings));
  848. var contentToAddAfter = lci
  849. .Where(ci => ci.InsertBefore == false)
  850. .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString())
  851. .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings));
  852. ele.AddBeforeSelf(contentToAddBefore);
  853. ele.AddAfterSelf(contentToAddAfter);
  854. }
  855. #endif
  856. consolidatedMainDocPartXDoc
  857. .Root
  858. .Element(W.body)
  859. .Add(savedSectPr);
  860. AddTableGridStyleToStylesPart(consolidatedWDoc.MainDocumentPart.StyleDefinitionsPart);
  861. FixUpRevisionIds(consolidatedWDoc, consolidatedMainDocPartXDoc);
  862. IgnorePt14NamespaceForFootnotesEndnotes(consolidatedWDoc);
  863. FixUpDocPrIds(consolidatedWDoc);
  864. FixUpShapeIds(consolidatedWDoc);
  865. FixUpGroupIds(consolidatedWDoc);
  866. FixUpShapeTypeIds(consolidatedWDoc);
  867. RemoveCustomMarkFollows(consolidatedWDoc);
  868. WmlComparer.IgnorePt14Namespace(consolidatedMainDocPartXDoc.Root);
  869. consolidatedWDoc.MainDocumentPart.PutXDocument();
  870. AddFootnotesEndnotesStyles(consolidatedWDoc);
  871. }
  872. var newConsolidatedDocument = new WmlDocument("consolidated.docx", consolidatedMs.ToArray());
  873. return newConsolidatedDocument;
  874. }
  875. }
  876. private static void RemoveCustomMarkFollows(WordprocessingDocument consolidatedWDoc)
  877. {
  878. var mxDoc = consolidatedWDoc.MainDocumentPart.GetXDocument();
  879. mxDoc.Root.Descendants().Attributes(W.customMarkFollows).Remove();
  880. consolidatedWDoc.MainDocumentPart.PutXDocument();
  881. }
  882. private static void MoveFootnotesEndnotesForConsolidatedRevisions(ConsolidationInfo ci, WordprocessingDocument wDocConsolidated)
  883. {
  884. var consolidatedFootnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument();
  885. var consolidatedEndnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument();
  886. int maxFootnoteId = 1;
  887. if (consolidatedFootnoteXDoc.Root.Elements(W.footnote).Any())
  888. maxFootnoteId = consolidatedFootnoteXDoc.Root.Elements(W.footnote).Select(e => (int)e.Attribute(W.id)).Max();
  889. int maxEndnoteId = 1;
  890. if (consolidatedEndnoteXDoc.Root.Elements(W.endnote).Any())
  891. maxEndnoteId = consolidatedEndnoteXDoc.Root.Elements(W.endnote).Select(e => (int)e.Attribute(W.id)).Max(); ;
  892. /// At this point, content might contain a footnote or endnote reference.
  893. /// Need to add the footnote / endnote into the consolidated document (with the same guid id)
  894. /// Because of preprocessing of the documents, all footnote and endnote references will be unique at this point
  895. if (ci.RevisionElement.Descendants(W.footnoteReference).Any())
  896. {
  897. var footnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument();
  898. foreach (var footnoteReference in ci.RevisionElement.Descendants(W.footnoteReference))
  899. {
  900. var id = (int)footnoteReference.Attribute(W.id);
  901. var footnote = ci.Footnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  902. var newId = maxFootnoteId + 1;
  903. maxFootnoteId++;
  904. footnoteReference.Attribute(W.id).Value = newId.ToString();
  905. var clonedFootnote = new XElement(footnote);
  906. clonedFootnote.Attribute(W.id).Value = newId.ToString();
  907. footnoteXDoc.Root.Add(clonedFootnote);
  908. }
  909. wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument();
  910. }
  911. if (ci.RevisionElement.Descendants(W.endnoteReference).Any())
  912. {
  913. var endnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument();
  914. foreach (var endnoteReference in ci.RevisionElement.Descendants(W.endnoteReference))
  915. {
  916. var id = (int)endnoteReference.Attribute(W.id);
  917. var endnote = ci.Endnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  918. var newId = maxEndnoteId + 1;
  919. maxEndnoteId++;
  920. endnoteReference.Attribute(W.id).Value = newId.ToString();
  921. var clonedEndnote = new XElement(endnote);
  922. clonedEndnote.Attribute(W.id).Value = newId.ToString();
  923. endnoteXDoc.Root.Add(clonedEndnote);
  924. }
  925. wDocConsolidated.MainDocumentPart.EndnotesPart.PutXDocument();
  926. }
  927. }
  928. private static object CleanPartTransform(XNode node)
  929. {
  930. var element = node as XElement;
  931. if (element != null)
  932. {
  933. return new XElement(element.Name,
  934. element.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt &&
  935. !a.Name.LocalName.ToLower().Contains("rsid")),
  936. element.Nodes().Select(n => CleanPartTransform(n)));
  937. }
  938. return node;
  939. }
  940. private static string RevisionToLogFormTransform(XElement element, int depth, bool inserting)
  941. {
  942. if (element.Name == W.p)
  943. return "Paragraph" + nl + element.Elements().Select(e => RevisionToLogFormTransform(e, depth + 2, false)).StringConcatenate();
  944. if (element.Name == W.pPr || element.Name == W.rPr)
  945. return "";
  946. if (element.Name == W.r)
  947. return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, inserting)).StringConcatenate();
  948. if (element.Name == W.t)
  949. {
  950. if (inserting)
  951. return "".PadRight(depth) + "Inserted Text:" + QuoteIt((string)element) + nl;
  952. else
  953. return "".PadRight(depth) + "Text:" + QuoteIt((string)element) + nl;
  954. }
  955. if (element.Name == W.delText)
  956. return "".PadRight(depth) + "Deleted Text:" + QuoteIt((string)element) + nl;
  957. if (element.Name == W.ins)
  958. return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, true)).StringConcatenate();
  959. if (element.Name == W.del)
  960. return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, false)).StringConcatenate();
  961. return "";
  962. }
  963. private static string QuoteIt(string str)
  964. {
  965. var quoteString = "\"";
  966. if (str.Contains('\"'))
  967. quoteString = "\'";
  968. return quoteString + str + quoteString;
  969. }
  970. private static void IgnorePt14NamespaceForFootnotesEndnotes(WordprocessingDocument wDoc)
  971. {
  972. var footnotesPart = wDoc.MainDocumentPart.FootnotesPart;
  973. var endnotesPart = wDoc.MainDocumentPart.EndnotesPart;
  974. XDocument footnotesPartXDoc = null;
  975. if (footnotesPart != null)
  976. {
  977. footnotesPartXDoc = footnotesPart.GetXDocument();
  978. WmlComparer.IgnorePt14Namespace(footnotesPartXDoc.Root);
  979. }
  980. XDocument endnotesPartXDoc = null;
  981. if (endnotesPart != null)
  982. {
  983. endnotesPartXDoc = endnotesPart.GetXDocument();
  984. WmlComparer.IgnorePt14Namespace(endnotesPartXDoc.Root);
  985. }
  986. if (footnotesPart != null)
  987. footnotesPart.PutXDocument();
  988. if (endnotesPart != null)
  989. endnotesPart.PutXDocument();
  990. }
  991. private static XElement[] AssembledConjoinedRevisionContent(XElement emptyParagraph, IGrouping<string, ConsolidationInfo> groupedCi, int idx, WordprocessingDocument wDocConsolidated,
  992. WmlComparerConsolidateSettings consolidateSettings)
  993. {
  994. var consolidatedFootnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument();
  995. var consolidatedEndnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument();
  996. int maxFootnoteId = 1;
  997. if (consolidatedFootnoteXDoc.Root.Elements(W.footnote).Any())
  998. maxFootnoteId = consolidatedFootnoteXDoc.Root.Elements(W.footnote).Select(e => (int)e.Attribute(W.id)).Max();
  999. int maxEndnoteId = 1;
  1000. if (consolidatedEndnoteXDoc.Root.Elements(W.endnote).Any())
  1001. maxEndnoteId = consolidatedEndnoteXDoc.Root.Elements(W.endnote).Select(e => (int)e.Attribute(W.id)).Max(); ;
  1002. var revisor = groupedCi.First().Revisor;
  1003. var captionParagraph = new XElement(W.p,
  1004. new XElement(W.pPr,
  1005. new XElement(W.jc, new XAttribute(W.val, "both")),
  1006. new XElement(W.rPr,
  1007. new XElement(W.b),
  1008. new XElement(W.bCs))),
  1009. new XElement(W.r,
  1010. new XElement(W.rPr,
  1011. new XElement(W.b),
  1012. new XElement(W.bCs)),
  1013. new XElement(W.t, revisor)));
  1014. var colorRgb = groupedCi.First().Color.ToArgb();
  1015. var colorString = colorRgb.ToString("X");
  1016. if (colorString.Length == 8)
  1017. colorString = colorString.Substring(2);
  1018. if (consolidateSettings.ConsolidateWithTable)
  1019. {
  1020. var table = new XElement(W.tbl,
  1021. new XElement(W.tblPr,
  1022. new XElement(W.tblStyle, new XAttribute(W.val, "TableGridForRevisions")),
  1023. new XElement(W.tblW,
  1024. new XAttribute(W._w, "0"),
  1025. new XAttribute(W.type, "auto")),
  1026. new XElement(W.shd,
  1027. new XAttribute(W.val, "clear"),
  1028. new XAttribute(W.color, "auto"),
  1029. new XAttribute(W.fill, colorString)),
  1030. new XElement(W.tblLook,
  1031. new XAttribute(W.firstRow, "0"),
  1032. new XAttribute(W.lastRow, "0"),
  1033. new XAttribute(W.firstColumn, "0"),
  1034. new XAttribute(W.lastColumn, "0"),
  1035. new XAttribute(W.noHBand, "0"),
  1036. new XAttribute(W.noVBand, "0"))),
  1037. new XElement(W.tblGrid,
  1038. new XElement(W.gridCol, new XAttribute(W._w, "9576"))),
  1039. new XElement(W.tr,
  1040. new XElement(W.tc,
  1041. new XElement(W.tcPr,
  1042. new XElement(W.shd,
  1043. new XAttribute(W.val, "clear"),
  1044. new XAttribute(W.color, "auto"),
  1045. new XAttribute(W.fill, colorString))),
  1046. captionParagraph,
  1047. groupedCi.Select(ci =>
  1048. {
  1049. /// At this point, content might contain a footnote or endnote reference.
  1050. /// Need to add the footnote / endnote into the consolidated document (with the same guid id)
  1051. /// Because of preprocessing of the documents, all footnote and endnote references will be unique at this point
  1052. if (ci.RevisionElement.Descendants(W.endnoteReference).Any())
  1053. {
  1054. var endnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument();
  1055. foreach (var endnoteReference in ci.RevisionElement.Descendants(W.endnoteReference))
  1056. {
  1057. var id = (int)endnoteReference.Attribute(W.id);
  1058. var endnote = ci.Endnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  1059. var newId = maxEndnoteId + 1;
  1060. maxEndnoteId++;
  1061. endnoteReference.Attribute(W.id).Value = newId.ToString();
  1062. var clonedEndnote = new XElement(endnote);
  1063. clonedEndnote.Attribute(W.id).Value = newId.ToString();
  1064. endnoteXDoc.Root.Add(clonedEndnote);
  1065. }
  1066. wDocConsolidated.MainDocumentPart.EndnotesPart.PutXDocument();
  1067. }
  1068. if (ci.RevisionElement.Descendants(W.footnoteReference).Any())
  1069. {
  1070. var footnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument();
  1071. foreach (var footnoteReference in ci.RevisionElement.Descendants(W.footnoteReference))
  1072. {
  1073. var id = (int)footnoteReference.Attribute(W.id);
  1074. var footnote = ci.Footnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  1075. var newId = maxFootnoteId + 1;
  1076. maxFootnoteId++;
  1077. footnoteReference.Attribute(W.id).Value = newId.ToString();
  1078. var clonedFootnote = new XElement(footnote);
  1079. clonedFootnote.Attribute(W.id).Value = newId.ToString();
  1080. footnoteXDoc.Root.Add(clonedFootnote);
  1081. }
  1082. wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument();
  1083. }
  1084. // it is important that this code follows the code above, because the code above updates ci.RevisionElement (using DML)
  1085. XElement paraAfter = null;
  1086. if (ci.RevisionElement.Name == W.tbl)
  1087. paraAfter = emptyParagraph;
  1088. var revisionInTable = new[] {
  1089. ci.RevisionElement,
  1090. paraAfter,
  1091. };
  1092. return revisionInTable;
  1093. }))));
  1094. // if the last paragraph has a deleted paragraph mark, then remove the deletion from the paragraph mark. This is to prevent Word from misbehaving.
  1095. // the last paragraph in a cell must not have a deleted paragraph mark.
  1096. var theCell = table
  1097. .Descendants(W.tc)
  1098. .FirstOrDefault();
  1099. var lastPara = theCell
  1100. .Elements(W.p)
  1101. .LastOrDefault();
  1102. if (lastPara != null)
  1103. {
  1104. var isDeleted = lastPara
  1105. .Elements(W.pPr)
  1106. .Elements(W.rPr)
  1107. .Elements(W.del)
  1108. .Any();
  1109. if (isDeleted)
  1110. lastPara
  1111. .Elements(W.pPr)
  1112. .Elements(W.rPr)
  1113. .Elements(W.del)
  1114. .Remove();
  1115. }
  1116. var content = new[] {
  1117. idx == 0 ? emptyParagraph : null,
  1118. table,
  1119. emptyParagraph,
  1120. };
  1121. var dummyElement = new XElement("dummy", content);
  1122. foreach (var rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null))
  1123. {
  1124. var aut = rev.Attribute(W.author);
  1125. aut.Value = revisor;
  1126. }
  1127. return dummyElement.Elements().ToArray();
  1128. }
  1129. else
  1130. {
  1131. var content = groupedCi.Select(ci =>
  1132. {
  1133. XElement paraAfter = null;
  1134. if (ci.RevisionElement.Name == W.tbl)
  1135. paraAfter = emptyParagraph;
  1136. var revisionInTable = new[] {
  1137. ci.RevisionElement,
  1138. paraAfter,
  1139. };
  1140. /// At this point, content might contain a footnote or endnote reference.
  1141. /// Need to add the footnote / endnote into the consolidated document (with the same guid id)
  1142. /// Because of preprocessing of the documents, all footnote and endnote references will be unique at this point
  1143. if (ci.RevisionElement.Descendants(W.footnoteReference).Any())
  1144. {
  1145. var footnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument();
  1146. foreach (var footnoteReference in ci.RevisionElement.Descendants(W.footnoteReference))
  1147. {
  1148. var id = (int)footnoteReference.Attribute(W.id);
  1149. var footnote = ci.Footnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  1150. var newId = maxFootnoteId + 1;
  1151. maxFootnoteId++;
  1152. footnoteReference.Attribute(W.id).Value = newId.ToString();
  1153. var clonedFootnote = new XElement(footnote);
  1154. clonedFootnote.Attribute(W.id).Value = newId.ToString();
  1155. footnoteXDoc.Root.Add(clonedFootnote);
  1156. }
  1157. wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument();
  1158. }
  1159. if (ci.RevisionElement.Descendants(W.endnoteReference).Any())
  1160. {
  1161. var endnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument();
  1162. foreach (var endnoteReference in ci.RevisionElement.Descendants(W.endnoteReference))
  1163. {
  1164. var id = (int)endnoteReference.Attribute(W.id);
  1165. var endnote = ci.Endnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id);
  1166. var newId = maxEndnoteId + 1;
  1167. maxEndnoteId++;
  1168. endnoteReference.Attribute(W.id).Value = newId.ToString();
  1169. var clonedEndnote = new XElement(endnote);
  1170. clonedEndnote.Attribute(W.id).Value = newId.ToString();
  1171. endnoteXDoc.Root.Add(clonedEndnote);
  1172. }
  1173. wDocConsolidated.MainDocumentPart.EndnotesPart.PutXDocument();
  1174. }
  1175. return revisionInTable;
  1176. });
  1177. var dummyElement = new XElement("dummy",
  1178. content.SelectMany(m => m));
  1179. foreach (var rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null))
  1180. {
  1181. var aut = rev.Attribute(W.author);
  1182. aut.Value = revisor;
  1183. }
  1184. return dummyElement.Elements().ToArray();
  1185. }
  1186. }
  1187. private static void AddToAnnotation(
  1188. WordprocessingDocument wDocDelta,
  1189. WordprocessingDocument consolidatedWDoc,
  1190. XElement elementToInsertAfter,
  1191. ConsolidationInfo consolidationInfo,
  1192. WmlComparerSettings settings)
  1193. {
  1194. Package packageOfDeletedContent = wDocDelta.MainDocumentPart.OpenXmlPackage.Package;
  1195. Package packageOfNewContent = consolidatedWDoc.MainDocumentPart.OpenXmlPackage.Package;
  1196. PackagePart partInDeletedDocument = packageOfDeletedContent.GetPart(wDocDelta.MainDocumentPart.Uri);
  1197. PackagePart partInNewDocument = packageOfNewContent.GetPart(consolidatedWDoc.MainDocumentPart.Uri);
  1198. consolidationInfo.RevisionElement = MoveRelatedPartsToDestination(partInDeletedDocument, partInNewDocument, consolidationInfo.RevisionElement);
  1199. var clonedForHashing = (XElement)CloneBlockLevelContentForHashing(consolidatedWDoc.MainDocumentPart, consolidationInfo.RevisionElement, false, settings);
  1200. clonedForHashing.Descendants().Where(d => d.Name == W.ins || d.Name == W.del).Attributes(W.id).Remove();
  1201. var shaString = clonedForHashing.ToString(SaveOptions.DisableFormatting)
  1202. .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", "");
  1203. var sha1Hash = PtUtils.SHA1HashStringForUTF8String(shaString);
  1204. consolidationInfo.RevisionString = shaString;
  1205. consolidationInfo.RevisionHash = sha1Hash;
  1206. var annotationList = elementToInsertAfter.Annotation<List<ConsolidationInfo>>();
  1207. if (annotationList == null)
  1208. {
  1209. annotationList = new List<ConsolidationInfo>();
  1210. elementToInsertAfter.AddAnnotation(annotationList);
  1211. }
  1212. annotationList.Add(consolidationInfo);
  1213. }
  1214. private static void AddTableGridStyleToStylesPart(StyleDefinitionsPart styleDefinitionsPart)
  1215. {
  1216. var sXDoc = styleDefinitionsPart.GetXDocument();
  1217. var tableGridStyle = sXDoc
  1218. .Root
  1219. .Elements(W.style)
  1220. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "TableGridForRevisions");
  1221. if (tableGridStyle == null)
  1222. {
  1223. var tableGridForRevisionsStyleMarkup =
  1224. @"<w:style w:type=""table""
  1225. w:styleId=""TableGridForRevisions""
  1226. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1227. <w:name w:val=""Table Grid For Revisions""/>
  1228. <w:basedOn w:val=""TableNormal""/>
  1229. <w:rsid w:val=""0092121A""/>
  1230. <w:rPr>
  1231. <w:rFonts w:asciiTheme=""minorHAnsi""
  1232. w:eastAsiaTheme=""minorEastAsia""
  1233. w:hAnsiTheme=""minorHAnsi""
  1234. w:cstheme=""minorBidi""/>
  1235. <w:sz w:val=""22""/>
  1236. <w:szCs w:val=""22""/>
  1237. </w:rPr>
  1238. <w:tblPr>
  1239. <w:tblBorders>
  1240. <w:top w:val=""single""
  1241. w:sz=""4""
  1242. w:space=""0""
  1243. w:color=""auto""/>
  1244. <w:left w:val=""single""
  1245. w:sz=""4""
  1246. w:space=""0""
  1247. w:color=""auto""/>
  1248. <w:bottom w:val=""single""
  1249. w:sz=""4""
  1250. w:space=""0""
  1251. w:color=""auto""/>
  1252. <w:right w:val=""single""
  1253. w:sz=""4""
  1254. w:space=""0""
  1255. w:color=""auto""/>
  1256. <w:insideH w:val=""single""
  1257. w:sz=""4""
  1258. w:space=""0""
  1259. w:color=""auto""/>
  1260. <w:insideV w:val=""single""
  1261. w:sz=""4""
  1262. w:space=""0""
  1263. w:color=""auto""/>
  1264. </w:tblBorders>
  1265. </w:tblPr>
  1266. </w:style>";
  1267. var tgsElement = XElement.Parse(tableGridForRevisionsStyleMarkup);
  1268. sXDoc.Root.Add(tgsElement);
  1269. }
  1270. var tableNormalStyle = sXDoc
  1271. .Root
  1272. .Elements(W.style)
  1273. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "TableNormal");
  1274. if (tableNormalStyle == null)
  1275. {
  1276. var tableNormalStyleMarkup =
  1277. @"<w:style w:type=""table""
  1278. w:default=""1""
  1279. w:styleId=""TableNormal""
  1280. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1281. <w:name w:val=""Normal Table""/>
  1282. <w:uiPriority w:val=""99""/>
  1283. <w:semiHidden/>
  1284. <w:unhideWhenUsed/>
  1285. <w:tblPr>
  1286. <w:tblInd w:w=""0""
  1287. w:type=""dxa""/>
  1288. <w:tblCellMar>
  1289. <w:top w:w=""0""
  1290. w:type=""dxa""/>
  1291. <w:left w:w=""108""
  1292. w:type=""dxa""/>
  1293. <w:bottom w:w=""0""
  1294. w:type=""dxa""/>
  1295. <w:right w:w=""108""
  1296. w:type=""dxa""/>
  1297. </w:tblCellMar>
  1298. </w:tblPr>
  1299. </w:style>";
  1300. var tnsElement = XElement.Parse(tableNormalStyleMarkup);
  1301. sXDoc.Root.Add(tnsElement);
  1302. }
  1303. styleDefinitionsPart.PutXDocument();
  1304. }
  1305. private static XAttribute[] NamespaceAttributes =
  1306. {
  1307. new XAttribute(XNamespace.Xmlns + "wpc", WPC.wpc),
  1308. new XAttribute(XNamespace.Xmlns + "mc", MC.mc),
  1309. new XAttribute(XNamespace.Xmlns + "o", O.o),
  1310. new XAttribute(XNamespace.Xmlns + "r", R.r),
  1311. new XAttribute(XNamespace.Xmlns + "m", M.m),
  1312. new XAttribute(XNamespace.Xmlns + "v", VML.vml),
  1313. new XAttribute(XNamespace.Xmlns + "wp14", WP14.wp14),
  1314. new XAttribute(XNamespace.Xmlns + "wp", WP.wp),
  1315. new XAttribute(XNamespace.Xmlns + "w10", W10.w10),
  1316. new XAttribute(XNamespace.Xmlns + "w", W.w),
  1317. new XAttribute(XNamespace.Xmlns + "w14", W14.w14),
  1318. new XAttribute(XNamespace.Xmlns + "wpg", WPG.wpg),
  1319. new XAttribute(XNamespace.Xmlns + "wpi", WPI.wpi),
  1320. new XAttribute(XNamespace.Xmlns + "wne", WNE.wne),
  1321. new XAttribute(XNamespace.Xmlns + "wps", WPS.wps),
  1322. new XAttribute(MC.Ignorable, "w14 wp14"),
  1323. };
  1324. private static void AddFootnotesEndnotesParts(WordprocessingDocument wDoc)
  1325. {
  1326. var mdp = wDoc.MainDocumentPart;
  1327. if (mdp.FootnotesPart == null)
  1328. {
  1329. mdp.AddNewPart<FootnotesPart>();
  1330. var newFootnotes = wDoc.MainDocumentPart.FootnotesPart.GetXDocument();
  1331. newFootnotes.Declaration.Standalone = "yes";
  1332. newFootnotes.Declaration.Encoding = "UTF-8";
  1333. newFootnotes.Add(new XElement(W.footnotes, NamespaceAttributes));
  1334. mdp.FootnotesPart.PutXDocument();
  1335. }
  1336. if (mdp.EndnotesPart == null)
  1337. {
  1338. mdp.AddNewPart<EndnotesPart>();
  1339. var newEndnotes = wDoc.MainDocumentPart.EndnotesPart.GetXDocument();
  1340. newEndnotes.Declaration.Standalone = "yes";
  1341. newEndnotes.Declaration.Encoding = "UTF-8";
  1342. newEndnotes.Add(new XElement(W.endnotes, NamespaceAttributes));
  1343. mdp.EndnotesPart.PutXDocument();
  1344. }
  1345. }
  1346. private static void ChangeFootnoteEndnoteReferencesToUniqueRange(WordprocessingDocument wDoc, int startingIdForFootnotesEndnotes)
  1347. {
  1348. var mainDocPart = wDoc.MainDocumentPart;
  1349. var footnotesPart = wDoc.MainDocumentPart.FootnotesPart;
  1350. var endnotesPart = wDoc.MainDocumentPart.EndnotesPart;
  1351. var mainDocumentXDoc = mainDocPart.GetXDocument();
  1352. XDocument footnotesPartXDoc = null;
  1353. if (footnotesPart != null)
  1354. footnotesPartXDoc = footnotesPart.GetXDocument();
  1355. XDocument endnotesPartXDoc = null;
  1356. if (endnotesPart != null)
  1357. endnotesPartXDoc = endnotesPart.GetXDocument();
  1358. var references = mainDocumentXDoc
  1359. .Root
  1360. .Descendants()
  1361. .Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference);
  1362. var rnd = new Random();
  1363. foreach (var r in references)
  1364. {
  1365. var oldId = (string)r.Attribute(W.id);
  1366. var newId = startingIdForFootnotesEndnotes.ToString();
  1367. startingIdForFootnotesEndnotes++;
  1368. r.Attribute(W.id).Value = newId;
  1369. if (r.Name == W.footnoteReference)
  1370. {
  1371. var fn = footnotesPartXDoc
  1372. .Root
  1373. .Elements()
  1374. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  1375. if (fn == null)
  1376. throw new OpenXmlPowerToolsException("Invalid document");
  1377. fn.Attribute(W.id).Value = newId;
  1378. }
  1379. else
  1380. {
  1381. var en = endnotesPartXDoc
  1382. .Root
  1383. .Elements()
  1384. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  1385. if (en == null)
  1386. throw new OpenXmlPowerToolsException("Invalid document");
  1387. en.Attribute(W.id).Value = newId;
  1388. }
  1389. }
  1390. mainDocPart.PutXDocument();
  1391. if (footnotesPart != null)
  1392. footnotesPart.PutXDocument();
  1393. if (endnotesPart != null)
  1394. endnotesPart.PutXDocument();
  1395. }
  1396. private static WmlDocument ProduceDocumentWithTrackedRevisions(WmlComparerSettings settings, WmlDocument wmlResult, WordprocessingDocument wDoc1, WordprocessingDocument wDoc2)
  1397. {
  1398. // save away sectPr so that can set in the newly produced document.
  1399. var savedSectPr = wDoc1
  1400. .MainDocumentPart
  1401. .GetXDocument()
  1402. .Root
  1403. .Element(W.body)
  1404. .Element(W.sectPr);
  1405. var contentParent1 = wDoc1.MainDocumentPart.GetXDocument().Root.Element(W.body);
  1406. AddSha1HashToBlockLevelContent(wDoc1.MainDocumentPart, contentParent1, settings);
  1407. var contentParent2 = wDoc2.MainDocumentPart.GetXDocument().Root.Element(W.body);
  1408. AddSha1HashToBlockLevelContent(wDoc2.MainDocumentPart, contentParent2, settings);
  1409. var cal1 = WmlComparer.CreateComparisonUnitAtomList(wDoc1.MainDocumentPart, wDoc1.MainDocumentPart.GetXDocument().Root.Element(W.body), settings);
  1410. if (s_False)
  1411. {
  1412. var sb = new StringBuilder();
  1413. foreach (var item in cal1)
  1414. sb.Append(item.ToString() + Environment.NewLine);
  1415. var sbs = sb.ToString();
  1416. DocxComparerUtil.NotePad(sbs);
  1417. }
  1418. var cus1 = GetComparisonUnitList(cal1, settings);
  1419. if (s_False)
  1420. {
  1421. var sbs = ComparisonUnit.ComparisonUnitListToString(cus1);
  1422. DocxComparerUtil.NotePad(sbs);
  1423. }
  1424. var cal2 = WmlComparer.CreateComparisonUnitAtomList(wDoc2.MainDocumentPart, wDoc2.MainDocumentPart.GetXDocument().Root.Element(W.body), settings);
  1425. if (s_False)
  1426. {
  1427. var sb = new StringBuilder();
  1428. foreach (var item in cal2)
  1429. sb.Append(item.ToString() + Environment.NewLine);
  1430. var sbs = sb.ToString();
  1431. DocxComparerUtil.NotePad(sbs);
  1432. }
  1433. var cus2 = GetComparisonUnitList(cal2, settings);
  1434. if (s_False)
  1435. {
  1436. var sbs = ComparisonUnit.ComparisonUnitListToString(cus2);
  1437. DocxComparerUtil.NotePad(sbs);
  1438. }
  1439. if (s_False)
  1440. {
  1441. var sb3 = new StringBuilder();
  1442. sb3.Append("ComparisonUnitList 1 =====" + Environment.NewLine + Environment.NewLine);
  1443. sb3.Append(ComparisonUnit.ComparisonUnitListToString(cus1));
  1444. sb3.Append(Environment.NewLine);
  1445. sb3.Append("ComparisonUnitList 2 =====" + Environment.NewLine + Environment.NewLine);
  1446. sb3.Append(ComparisonUnit.ComparisonUnitListToString(cus2));
  1447. var sbs3 = sb3.ToString();
  1448. DocxComparerUtil.NotePad(sbs3);
  1449. }
  1450. // if cus1 and cus2 have completely different content, then just return the first document deleted, and the second document inserted.
  1451. List<CorrelatedSequence> correlatedSequence = null;
  1452. correlatedSequence = DetectUnrelatedSources(cus1, cus2, settings);
  1453. if (correlatedSequence == null)
  1454. correlatedSequence = Lcs(cus1, cus2, settings);
  1455. if (s_False)
  1456. {
  1457. var sb = new StringBuilder();
  1458. foreach (var item in correlatedSequence)
  1459. sb.Append(item.ToString() + Environment.NewLine);
  1460. var sbs = sb.ToString();
  1461. DocxComparerUtil.NotePad(sbs);
  1462. }
  1463. // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore
  1464. // when generating the document, the appropriate row will be marked as deleted or inserted.
  1465. MarkRowsAsDeletedOrInserted(settings, correlatedSequence);
  1466. // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal
  1467. var listOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(correlatedSequence, settings);
  1468. if (s_False)
  1469. {
  1470. var sb = new StringBuilder();
  1471. foreach (var item in listOfComparisonUnitAtoms)
  1472. sb.Append(item.ToString() + Environment.NewLine);
  1473. var sbs = sb.ToString();
  1474. DocxComparerUtil.NotePad(sbs);
  1475. }
  1476. // note - we don't want to do the hack until after flattening all of the groups. At the end of the flattening, we should simply
  1477. // have a list of ComparisonUnitAtoms, appropriately marked as equal, inserted, or deleted.
  1478. // the table id will be hacked in the normal course of events.
  1479. // in the case where a row is deleted, not necessary to hack - the deleted row ID will do.
  1480. // in the case where a row is inserted, not necessary to hack - the inserted row ID will do as well.
  1481. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(listOfComparisonUnitAtoms);
  1482. if (s_False)
  1483. {
  1484. var sb = new StringBuilder();
  1485. foreach (var item in listOfComparisonUnitAtoms)
  1486. sb.Append(item.ToStringAncestorUnids() + Environment.NewLine);
  1487. var sbs = sb.ToString();
  1488. DocxComparerUtil.NotePad(sbs);
  1489. }
  1490. // and then finally can generate the document with revisions
  1491. using (MemoryStream ms = new MemoryStream())
  1492. {
  1493. ms.Write(wmlResult.DocumentByteArray, 0, wmlResult.DocumentByteArray.Length);
  1494. using (WordprocessingDocument wDocWithRevisions = WordprocessingDocument.Open(ms, true))
  1495. {
  1496. var xDoc = wDocWithRevisions.MainDocumentPart.GetXDocument();
  1497. var rootNamespaceAttributes = xDoc
  1498. .Root
  1499. .Attributes()
  1500. .Where(a => a.IsNamespaceDeclaration || a.Name.Namespace == MC.mc)
  1501. .ToList();
  1502. // ======================================
  1503. // The following produces a new valid WordprocessingML document from the listOfComparisonUnitAtoms
  1504. var newBodyChildren = ProduceNewWmlMarkupFromCorrelatedSequence(wDocWithRevisions.MainDocumentPart,
  1505. listOfComparisonUnitAtoms, settings);
  1506. XDocument newXDoc = new XDocument();
  1507. newXDoc.Add(
  1508. new XElement(W.document,
  1509. rootNamespaceAttributes,
  1510. new XElement(W.body, newBodyChildren)));
  1511. MarkContentAsDeletedOrInserted(newXDoc, settings);
  1512. CoalesceAdjacentRunsWithIdenticalFormatting(newXDoc);
  1513. IgnorePt14Namespace(newXDoc.Root);
  1514. ProcessFootnoteEndnote(settings,
  1515. listOfComparisonUnitAtoms,
  1516. wDoc1.MainDocumentPart,
  1517. wDoc2.MainDocumentPart,
  1518. newXDoc);
  1519. RectifyFootnoteEndnoteIds(
  1520. wDoc1.MainDocumentPart,
  1521. wDoc2.MainDocumentPart,
  1522. wDocWithRevisions.MainDocumentPart,
  1523. newXDoc,
  1524. settings);
  1525. ConjoinDeletedInsertedParagraphMarks(wDocWithRevisions.MainDocumentPart, newXDoc);
  1526. FixUpRevisionIds(wDocWithRevisions, newXDoc);
  1527. // little bit of cleanup
  1528. MoveLastSectPrToChildOfBody(newXDoc);
  1529. XElement newXDoc2Root = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(newXDoc.Root);
  1530. xDoc.Root.ReplaceWith(newXDoc2Root);
  1531. /**********************************************************************************************/
  1532. // temporary code to remove sections. When remove this code, get validation errors for some ITU documents.
  1533. xDoc.Root.Descendants(W.sectPr).Remove();
  1534. // move w:sectPr from source document into newly generated document.
  1535. if (savedSectPr != null)
  1536. {
  1537. var xd = wDocWithRevisions.MainDocumentPart.GetXDocument();
  1538. // add everything but headers/footers
  1539. var clonedSectPr = new XElement(W.sectPr,
  1540. savedSectPr.Attributes(),
  1541. savedSectPr.Element(W.type),
  1542. savedSectPr.Element(W.pgSz),
  1543. savedSectPr.Element(W.pgMar),
  1544. savedSectPr.Element(W.cols),
  1545. savedSectPr.Element(W.titlePg));
  1546. xd.Root.Element(W.body).Add(clonedSectPr);
  1547. }
  1548. /**********************************************************************************************/
  1549. wDocWithRevisions.MainDocumentPart.PutXDocument();
  1550. FixUpFootnotesEndnotesWithCustomMarkers(wDocWithRevisions);
  1551. FixUpRevMarkIds(wDocWithRevisions);
  1552. FixUpDocPrIds(wDocWithRevisions);
  1553. FixUpShapeIds(wDocWithRevisions);
  1554. FixUpShapeTypeIds(wDocWithRevisions);
  1555. AddFootnotesEndnotesStyles(wDocWithRevisions);
  1556. CopyMissingStylesFromOneDocToAnother(wDoc2, wDocWithRevisions);
  1557. DeleteFootnotePropertiesInSettings(wDocWithRevisions);
  1558. }
  1559. foreach (var part in wDoc1.ContentParts())
  1560. part.PutXDocument();
  1561. foreach (var part in wDoc2.ContentParts())
  1562. part.PutXDocument();
  1563. var updatedWmlResult = new WmlDocument("Dummy.docx", ms.ToArray());
  1564. return updatedWmlResult;
  1565. }
  1566. }
  1567. private static void DeleteFootnotePropertiesInSettings(WordprocessingDocument wDocWithRevisions)
  1568. {
  1569. var settingsPart = wDocWithRevisions.MainDocumentPart.DocumentSettingsPart;
  1570. if (settingsPart != null)
  1571. {
  1572. var sxDoc = settingsPart.GetXDocument();
  1573. sxDoc.Root.Elements().Where(e => e.Name == W.footnotePr || e.Name == W.endnotePr).Remove();
  1574. settingsPart.PutXDocument();
  1575. }
  1576. }
  1577. private static void FixUpFootnotesEndnotesWithCustomMarkers(WordprocessingDocument wDocWithRevisions)
  1578. {
  1579. #if FALSE
  1580. // this needs to change
  1581. <w:del w:author="Open-Xml-PowerTools"
  1582. w:id="7"
  1583. w:date="2017-06-07T12:23:22.8601285-07:00">
  1584. <w:r>
  1585. <w:rPr pt14:Unid="ec75a71361c84562a757eee8b28fc229">
  1586. <w:rFonts w:cs="Times New Roman Bold"
  1587. pt14:Unid="16bb355df5964ba09854f9152c97242b" />
  1588. <w:b w:val="0"
  1589. pt14:Unid="9abcec54ad414791a5627cbb198e8aa9" />
  1590. <w:bCs pt14:Unid="71ecd2eba85e4bfaa92b3d618e2f8829" />
  1591. <w:position w:val="6"
  1592. pt14:Unid="61793f6a5f494700b7f2a3a753ce9055" />
  1593. <w:sz w:val="16"
  1594. pt14:Unid="60b3cd020c214d0ea07e5a68ae0e4efe" />
  1595. <w:szCs w:val="16"
  1596. pt14:Unid="9ae61a724de44a75868180aac44ea380" />
  1597. </w:rPr>
  1598. <w:footnoteReference w:customMarkFollows="1"
  1599. w:id="1"
  1600. pt14:Status="Deleted" />
  1601. </w:r>
  1602. </w:del>
  1603. <w:del w:author="Open-Xml-PowerTools"
  1604. w:id="8"
  1605. w:date="2017-06-07T12:23:22.8601285-07:00">
  1606. <w:r>
  1607. <w:rPr pt14:Unid="445caef74a624e588e7adaa6d7775639">
  1608. <w:rFonts w:cs="Times New Roman Bold"
  1609. pt14:Unid="5920885f8ec44c53bcaece2de7eafda2" />
  1610. <w:b w:val="0"
  1611. pt14:Unid="023a29e2e6d44c3b8c5df47317ace4c6" />
  1612. <w:bCs pt14:Unid="e96e37daf9174b268ef4731df831df7d" />
  1613. <w:position w:val="6"
  1614. pt14:Unid="be3f8ff7ed0745ae9340bb2706b28b1f" />
  1615. <w:sz w:val="16"
  1616. pt14:Unid="6fbbde024e7c46b9b72435ae50065459" />
  1617. <w:szCs w:val="16"
  1618. pt14:Unid="cc82e7bd75f441f2b609eae0672fb285" />
  1619. </w:rPr>
  1620. <w:delText>1</w:delText>
  1621. </w:r>
  1622. </w:del>
  1623. // to this
  1624. <w:del w:author="Open-Xml-PowerTools"
  1625. w:id="7"
  1626. w:date="2017-06-07T12:23:22.8601285-07:00">
  1627. <w:r>
  1628. <w:rPr pt14:Unid="ec75a71361c84562a757eee8b28fc229">
  1629. <w:rFonts w:cs="Times New Roman Bold"
  1630. pt14:Unid="16bb355df5964ba09854f9152c97242b" />
  1631. <w:b w:val="0"
  1632. pt14:Unid="9abcec54ad414791a5627cbb198e8aa9" />
  1633. <w:bCs pt14:Unid="71ecd2eba85e4bfaa92b3d618e2f8829" />
  1634. <w:position w:val="6"
  1635. pt14:Unid="61793f6a5f494700b7f2a3a753ce9055" />
  1636. <w:sz w:val="16"
  1637. pt14:Unid="60b3cd020c214d0ea07e5a68ae0e4efe" />
  1638. <w:szCs w:val="16"
  1639. pt14:Unid="9ae61a724de44a75868180aac44ea380" />
  1640. </w:rPr>
  1641. <w:footnoteReference w:customMarkFollows="1"
  1642. w:id="1"
  1643. pt14:Status="Deleted" />
  1644. <w:delText>1</w:delText>
  1645. </w:r>
  1646. </w:del>
  1647. #endif
  1648. // this is pretty random - a bug in Word prevents display of a document if the delText element does not immediately follow the footnoteReference element, in the same run.
  1649. var mainXDoc = wDocWithRevisions.MainDocumentPart.GetXDocument();
  1650. var newRoot = (XElement)FootnoteEndnoteReferenceCleanupTransform(mainXDoc.Root);
  1651. mainXDoc.Root.ReplaceWith(newRoot);
  1652. wDocWithRevisions.MainDocumentPart.PutXDocument();
  1653. }
  1654. private static object FootnoteEndnoteReferenceCleanupTransform(XNode node)
  1655. {
  1656. var element = node as XElement;
  1657. if (element != null)
  1658. {
  1659. // small optimization to eliminate the work for most elements
  1660. if (element.Element(W.del) != null || element.Element(W.ins) != null)
  1661. {
  1662. var hasFootnoteEndnoteReferencesThatNeedCleanedUp = element
  1663. .Elements()
  1664. .Where(e => e.Name == W.del || e.Name == W.ins)
  1665. .Elements(W.r)
  1666. .Elements()
  1667. .Where(e => e.Name == W.footnoteReference || e.Name == W.endnoteReference)
  1668. .Attributes(W.customMarkFollows)
  1669. .Any();
  1670. if (hasFootnoteEndnoteReferencesThatNeedCleanedUp)
  1671. {
  1672. var clone = new XElement(element.Name,
  1673. element.Attributes(),
  1674. element.Nodes().Select(n => FootnoteEndnoteReferenceCleanupTransform(n)));
  1675. var footnoteEndnoteReferencesToAdjust = clone
  1676. .Descendants()
  1677. .Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference)
  1678. .Where(d => d.Attribute(W.customMarkFollows) != null);
  1679. foreach (var fnenr in footnoteEndnoteReferencesToAdjust)
  1680. {
  1681. var par = fnenr.Parent;
  1682. var gp = fnenr.Parent.Parent;
  1683. if (par.Name == W.r &&
  1684. gp.Name == W.del)
  1685. {
  1686. if (par.Element(W.delText) != null)
  1687. continue;
  1688. var afterGp = gp.ElementsAfterSelf().FirstOrDefault();
  1689. if (afterGp == null)
  1690. continue;
  1691. var afterGpDelText = afterGp.Elements(W.r).Elements(W.delText);
  1692. if (afterGpDelText.Any())
  1693. {
  1694. par.Add(afterGpDelText); // this will clone and add to run that contains the reference
  1695. afterGpDelText.Remove(); // this leaves an empty run, does not matter.
  1696. }
  1697. }
  1698. if (par.Name == W.r &&
  1699. gp.Name == W.ins)
  1700. {
  1701. if (par.Element(W.t) != null)
  1702. continue;
  1703. var afterGp = gp.ElementsAfterSelf().FirstOrDefault();
  1704. if (afterGp == null)
  1705. continue;
  1706. var afterGpText = afterGp.Elements(W.r).Elements(W.t);
  1707. if (afterGpText.Any())
  1708. {
  1709. par.Add(afterGpText); // this will clone and add to run that contains the reference
  1710. afterGpText.Remove(); // this leaves an empty run, does not matter.
  1711. }
  1712. }
  1713. }
  1714. return clone;
  1715. }
  1716. }
  1717. else
  1718. {
  1719. return new XElement(element.Name,
  1720. element.Attributes(),
  1721. element.Nodes().Select(n => FootnoteEndnoteReferenceCleanupTransform(n)));
  1722. }
  1723. }
  1724. return node;
  1725. }
  1726. private static void CopyMissingStylesFromOneDocToAnother(WordprocessingDocument wDocFrom, WordprocessingDocument wDocTo)
  1727. {
  1728. var revisionsStylesXDoc = wDocTo.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
  1729. var afterStylesXDoc = wDocFrom.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
  1730. foreach (var style in afterStylesXDoc.Root.Elements(W.style))
  1731. {
  1732. var type = (string)style.Attribute(W.type);
  1733. var styleId = (string)style.Attribute(W.styleId);
  1734. var styleInRevDoc = revisionsStylesXDoc
  1735. .Root
  1736. .Elements(W.style)
  1737. .FirstOrDefault(st => (string)st.Attribute(W.type) == type &&
  1738. (string)st.Attribute(W.styleId) == styleId);
  1739. if (styleInRevDoc != null)
  1740. continue;
  1741. var cloned = new XElement(style);
  1742. if (cloned.Attribute(W._default) != null)
  1743. cloned.Attribute(W._default).Remove();
  1744. revisionsStylesXDoc.Root.Add(cloned);
  1745. }
  1746. wDocTo.MainDocumentPart.StyleDefinitionsPart.PutXDocument();
  1747. }
  1748. private static void AddFootnotesEndnotesStyles(WordprocessingDocument wDocWithRevisions)
  1749. {
  1750. var mainXDoc = wDocWithRevisions.MainDocumentPart.GetXDocument();
  1751. var hasFootnotes = mainXDoc.Descendants(W.footnoteReference).Any();
  1752. var hasEndnotes = mainXDoc.Descendants(W.endnoteReference).Any();
  1753. var styleDefinitionsPart = wDocWithRevisions.MainDocumentPart.StyleDefinitionsPart;
  1754. var sXDoc = styleDefinitionsPart.GetXDocument();
  1755. if (hasFootnotes)
  1756. {
  1757. var footnoteTextStyle = sXDoc
  1758. .Root
  1759. .Elements(W.style)
  1760. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "FootnoteText");
  1761. if (footnoteTextStyle == null)
  1762. {
  1763. var footnoteTextStyleMarkup =
  1764. @"<w:style w:type=""paragraph""
  1765. w:styleId=""FootnoteText""
  1766. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1767. <w:name w:val=""footnote text""/>
  1768. <w:basedOn w:val=""Normal""/>
  1769. <w:link w:val=""FootnoteTextChar""/>
  1770. <w:uiPriority w:val=""99""/>
  1771. <w:semiHidden/>
  1772. <w:unhideWhenUsed/>
  1773. <w:pPr>
  1774. <w:spacing w:after=""0""
  1775. w:line=""240""
  1776. w:lineRule=""auto""/>
  1777. </w:pPr>
  1778. <w:rPr>
  1779. <w:sz w:val=""20""/>
  1780. <w:szCs w:val=""20""/>
  1781. </w:rPr>
  1782. </w:style>";
  1783. var ftsElement = XElement.Parse(footnoteTextStyleMarkup);
  1784. sXDoc.Root.Add(ftsElement);
  1785. }
  1786. var footnoteTextCharStyle = sXDoc
  1787. .Root
  1788. .Elements(W.style)
  1789. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "FootnoteTextChar");
  1790. if (footnoteTextCharStyle == null)
  1791. {
  1792. var footnoteTextCharStyleMarkup =
  1793. @"<w:style w:type=""character""
  1794. w:customStyle=""1""
  1795. w:styleId=""FootnoteTextChar""
  1796. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1797. <w:name w:val=""Footnote Text Char""/>
  1798. <w:basedOn w:val=""DefaultParagraphFont""/>
  1799. <w:link w:val=""FootnoteText""/>
  1800. <w:uiPriority w:val=""99""/>
  1801. <w:semiHidden/>
  1802. <w:rPr>
  1803. <w:sz w:val=""20""/>
  1804. <w:szCs w:val=""20""/>
  1805. </w:rPr>
  1806. </w:style>";
  1807. var fntcsElement = XElement.Parse(footnoteTextCharStyleMarkup);
  1808. sXDoc.Root.Add(fntcsElement);
  1809. }
  1810. var footnoteReferenceStyle = sXDoc
  1811. .Root
  1812. .Elements(W.style)
  1813. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "FootnoteReference");
  1814. if (footnoteReferenceStyle == null)
  1815. {
  1816. var footnoteReferenceStyleMarkup =
  1817. @"<w:style w:type=""character""
  1818. w:styleId=""FootnoteReference""
  1819. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1820. <w:name w:val=""footnote reference""/>
  1821. <w:basedOn w:val=""DefaultParagraphFont""/>
  1822. <w:uiPriority w:val=""99""/>
  1823. <w:semiHidden/>
  1824. <w:unhideWhenUsed/>
  1825. <w:rPr>
  1826. <w:vertAlign w:val=""superscript""/>
  1827. </w:rPr>
  1828. </w:style>";
  1829. var fnrsElement = XElement.Parse(footnoteReferenceStyleMarkup);
  1830. sXDoc.Root.Add(fnrsElement);
  1831. }
  1832. }
  1833. if (hasEndnotes)
  1834. {
  1835. var endnoteTextStyle = sXDoc
  1836. .Root
  1837. .Elements(W.style)
  1838. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "EndnoteText");
  1839. if (endnoteTextStyle == null)
  1840. {
  1841. var endnoteTextStyleMarkup =
  1842. @"<w:style w:type=""paragraph""
  1843. w:styleId=""EndnoteText""
  1844. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1845. <w:name w:val=""endnote text""/>
  1846. <w:basedOn w:val=""Normal""/>
  1847. <w:link w:val=""EndnoteTextChar""/>
  1848. <w:uiPriority w:val=""99""/>
  1849. <w:semiHidden/>
  1850. <w:unhideWhenUsed/>
  1851. <w:pPr>
  1852. <w:spacing w:after=""0""
  1853. w:line=""240""
  1854. w:lineRule=""auto""/>
  1855. </w:pPr>
  1856. <w:rPr>
  1857. <w:sz w:val=""20""/>
  1858. <w:szCs w:val=""20""/>
  1859. </w:rPr>
  1860. </w:style>";
  1861. var etsElement = XElement.Parse(endnoteTextStyleMarkup);
  1862. sXDoc.Root.Add(etsElement);
  1863. }
  1864. var endnoteTextCharStyle = sXDoc
  1865. .Root
  1866. .Elements(W.style)
  1867. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "EndnoteTextChar");
  1868. if (endnoteTextCharStyle == null)
  1869. {
  1870. var endnoteTextCharStyleMarkup =
  1871. @"<w:style w:type=""character""
  1872. w:customStyle=""1""
  1873. w:styleId=""EndnoteTextChar""
  1874. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1875. <w:name w:val=""Endnote Text Char""/>
  1876. <w:basedOn w:val=""DefaultParagraphFont""/>
  1877. <w:link w:val=""EndnoteText""/>
  1878. <w:uiPriority w:val=""99""/>
  1879. <w:semiHidden/>
  1880. <w:rPr>
  1881. <w:sz w:val=""20""/>
  1882. <w:szCs w:val=""20""/>
  1883. </w:rPr>
  1884. </w:style>";
  1885. var entcsElement = XElement.Parse(endnoteTextCharStyleMarkup);
  1886. sXDoc.Root.Add(entcsElement);
  1887. }
  1888. var endnoteReferenceStyle = sXDoc
  1889. .Root
  1890. .Elements(W.style)
  1891. .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "EndnoteReference");
  1892. if (endnoteReferenceStyle == null)
  1893. {
  1894. var endnoteReferenceStyleMarkup =
  1895. @"<w:style w:type=""character""
  1896. w:styleId=""EndnoteReference""
  1897. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1898. <w:name w:val=""endnote reference""/>
  1899. <w:basedOn w:val=""DefaultParagraphFont""/>
  1900. <w:uiPriority w:val=""99""/>
  1901. <w:semiHidden/>
  1902. <w:unhideWhenUsed/>
  1903. <w:rPr>
  1904. <w:vertAlign w:val=""superscript""/>
  1905. </w:rPr>
  1906. </w:style>";
  1907. var enrsElement = XElement.Parse(endnoteReferenceStyleMarkup);
  1908. sXDoc.Root.Add(enrsElement);
  1909. }
  1910. }
  1911. if (hasFootnotes || hasEndnotes)
  1912. {
  1913. styleDefinitionsPart.PutXDocument();
  1914. }
  1915. }
  1916. // it is possible, per the algorithm, for the algorithm to find that the paragraph mark for a single paragraph has been
  1917. // inserted and deleted. If the algorithm sets them to equal, then sometimes it will equate paragraph marks that should
  1918. // not be equated.
  1919. private static void ConjoinDeletedInsertedParagraphMarks(MainDocumentPart mainDocumentPart, XDocument newXDoc)
  1920. {
  1921. ConjoinMultipleParagraphMarks(newXDoc);
  1922. if (mainDocumentPart.FootnotesPart != null)
  1923. {
  1924. var fnXDoc = mainDocumentPart.FootnotesPart.GetXDocument();
  1925. ConjoinMultipleParagraphMarks(fnXDoc);
  1926. mainDocumentPart.FootnotesPart.PutXDocument();
  1927. }
  1928. if (mainDocumentPart.EndnotesPart != null)
  1929. {
  1930. var fnXDoc = mainDocumentPart.EndnotesPart.GetXDocument();
  1931. ConjoinMultipleParagraphMarks(fnXDoc);
  1932. mainDocumentPart.EndnotesPart.PutXDocument();
  1933. }
  1934. }
  1935. private static void ConjoinMultipleParagraphMarks(XDocument xDoc)
  1936. {
  1937. var newRoot = ConjoinTransform(xDoc.Root);
  1938. xDoc.Root.ReplaceWith(newRoot);
  1939. }
  1940. private static object ConjoinTransform(XNode node)
  1941. {
  1942. var element = node as XElement;
  1943. if (element != null)
  1944. {
  1945. if (element.Name == W.p && element.Elements(W.pPr).Count() >= 2)
  1946. {
  1947. var pPr = new XElement(element.Element(W.pPr));
  1948. pPr.Elements(W.rPr).Elements().Where(r => r.Name == W.ins || r.Name == W.del).Remove();
  1949. pPr.Attributes(PtOpenXml.Status).Remove();
  1950. var newPara = new XElement(W.p,
  1951. element.Attributes(),
  1952. pPr,
  1953. element.Elements().Where(c => c.Name != W.pPr));
  1954. return newPara;
  1955. }
  1956. return new XElement(element.Name,
  1957. element.Attributes(),
  1958. element.Nodes().Select(n => ConjoinTransform(n)));
  1959. }
  1960. return node;
  1961. }
  1962. private static void MarkContentAsDeletedOrInserted(XDocument newXDoc, WmlComparerSettings settings)
  1963. {
  1964. var newRoot = MarkContentAsDeletedOrInsertedTransform(newXDoc.Root, settings);
  1965. newXDoc.Root.ReplaceWith(newRoot);
  1966. }
  1967. private static object MarkContentAsDeletedOrInsertedTransform(XNode node, WmlComparerSettings settings)
  1968. {
  1969. XElement element = node as XElement;
  1970. if (element != null)
  1971. {
  1972. if (element.Name == W.r)
  1973. {
  1974. var statusList = element
  1975. .DescendantsTrimmed(W.txbxContent)
  1976. .Where(d => d.Name == W.t || d.Name == W.delText || AllowableRunChildren.Contains(d.Name))
  1977. .Attributes(PtOpenXml.Status)
  1978. .Select(a => (string)a)
  1979. .Distinct()
  1980. .ToList();
  1981. if (statusList.Count() > 1)
  1982. throw new OpenXmlPowerToolsException("Internal error - have both deleted and inserted text elements in the same run.");
  1983. if (statusList.Count() == 0)
  1984. return new XElement(W.r,
  1985. element.Attributes(),
  1986. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)));
  1987. if (statusList.First() == "Deleted")
  1988. {
  1989. return new XElement(W.del,
  1990. new XAttribute(W.author, settings.AuthorForRevisions),
  1991. new XAttribute(W.id, s_MaxId++),
  1992. new XAttribute(W.date, settings.DateTimeForRevisions),
  1993. new XElement(W.r,
  1994. element.Attributes(),
  1995. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))));
  1996. }
  1997. else if (statusList.First() == "Inserted")
  1998. {
  1999. return new XElement(W.ins,
  2000. new XAttribute(W.author, settings.AuthorForRevisions),
  2001. new XAttribute(W.id, s_MaxId++),
  2002. new XAttribute(W.date, settings.DateTimeForRevisions),
  2003. new XElement(W.r,
  2004. element.Attributes(),
  2005. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))));
  2006. }
  2007. }
  2008. if (element.Name == W.pPr)
  2009. {
  2010. var status = (string)element.Attribute(PtOpenXml.Status);
  2011. if (status == null)
  2012. return new XElement(W.pPr,
  2013. element.Attributes(),
  2014. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)));
  2015. var pPr = new XElement(element);
  2016. if (status == "Deleted")
  2017. {
  2018. XElement rPr = pPr.Element(W.rPr);
  2019. if (rPr == null)
  2020. rPr = new XElement(W.rPr);
  2021. rPr.Add(new XElement(W.del,
  2022. new XAttribute(W.author, settings.AuthorForRevisions),
  2023. new XAttribute(W.id, s_MaxId++),
  2024. new XAttribute(W.date, settings.DateTimeForRevisions)));
  2025. if (pPr.Element(W.rPr) != null)
  2026. pPr.Element(W.rPr).ReplaceWith(rPr);
  2027. else
  2028. pPr.AddFirst(rPr);
  2029. }
  2030. else if (status == "Inserted")
  2031. {
  2032. XElement rPr = pPr.Element(W.rPr);
  2033. if (rPr == null)
  2034. rPr = new XElement(W.rPr);
  2035. rPr.Add(new XElement(W.ins,
  2036. new XAttribute(W.author, settings.AuthorForRevisions),
  2037. new XAttribute(W.id, s_MaxId++),
  2038. new XAttribute(W.date, settings.DateTimeForRevisions)));
  2039. if (pPr.Element(W.rPr) != null)
  2040. pPr.Element(W.rPr).ReplaceWith(rPr);
  2041. else
  2042. pPr.AddFirst(rPr);
  2043. }
  2044. else
  2045. throw new OpenXmlPowerToolsException("Internal error");
  2046. return pPr;
  2047. }
  2048. return new XElement(element.Name,
  2049. element.Attributes(),
  2050. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)));
  2051. }
  2052. return node;
  2053. }
  2054. private static void FixUpRevisionIds(WordprocessingDocument wDocWithRevisions, XDocument newXDoc)
  2055. {
  2056. IEnumerable<XElement> footnoteRevisions = Enumerable.Empty<XElement>();
  2057. if (wDocWithRevisions.MainDocumentPart.FootnotesPart != null)
  2058. {
  2059. var fnxd = wDocWithRevisions.MainDocumentPart.FootnotesPart.GetXDocument();
  2060. footnoteRevisions = fnxd
  2061. .Descendants()
  2062. .Where(d => d.Name == W.ins || d.Name == W.del);
  2063. }
  2064. IEnumerable<XElement> endnoteRevisions = Enumerable.Empty<XElement>();
  2065. if (wDocWithRevisions.MainDocumentPart.EndnotesPart != null)
  2066. {
  2067. var fnxd = wDocWithRevisions.MainDocumentPart.EndnotesPart.GetXDocument();
  2068. endnoteRevisions = fnxd
  2069. .Descendants()
  2070. .Where(d => d.Name == W.ins || d.Name == W.del);
  2071. }
  2072. var mainRevisions = newXDoc
  2073. .Descendants()
  2074. .Where(d => d.Name == W.ins || d.Name == W.del);
  2075. var allRevisions = mainRevisions
  2076. .Concat(footnoteRevisions)
  2077. .Concat(endnoteRevisions)
  2078. .Select((r, i) =>
  2079. {
  2080. return new
  2081. {
  2082. Rev = r,
  2083. Idx = i + 1,
  2084. };
  2085. });
  2086. foreach (var item in allRevisions)
  2087. item.Rev.Attribute(W.id).Value = item.Idx.ToString();
  2088. if (wDocWithRevisions.MainDocumentPart.FootnotesPart != null)
  2089. wDocWithRevisions.MainDocumentPart.FootnotesPart.PutXDocument();
  2090. if (wDocWithRevisions.MainDocumentPart.EndnotesPart != null)
  2091. wDocWithRevisions.MainDocumentPart.EndnotesPart.PutXDocument();
  2092. }
  2093. private static void IgnorePt14Namespace(XElement root)
  2094. {
  2095. if (root.Attribute(XNamespace.Xmlns + "pt14") == null)
  2096. {
  2097. root.Add(new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName));
  2098. }
  2099. var ignorable = (string)root.Attribute(MC.Ignorable);
  2100. if (ignorable != null)
  2101. {
  2102. var list = ignorable.Split(' ');
  2103. if (!list.Contains("pt14"))
  2104. {
  2105. ignorable += " pt14";
  2106. root.Attribute(MC.Ignorable).Value = ignorable;
  2107. }
  2108. }
  2109. else
  2110. {
  2111. root.Add(new XAttribute(MC.Ignorable, "pt14"));
  2112. }
  2113. }
  2114. private static void CoalesceAdjacentRunsWithIdenticalFormatting(XDocument xDoc)
  2115. {
  2116. var paras = xDoc.Root.DescendantsTrimmed(W.txbxContent).Where(d => d.Name == W.p);
  2117. foreach (var para in paras)
  2118. {
  2119. var newPara = WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(para);
  2120. para.ReplaceNodes(newPara.Nodes());
  2121. }
  2122. }
  2123. private static void ProcessFootnoteEndnote(
  2124. WmlComparerSettings settings,
  2125. List<ComparisonUnitAtom> listOfComparisonUnitAtoms,
  2126. MainDocumentPart mainDocumentPartBefore,
  2127. MainDocumentPart mainDocumentPartAfter,
  2128. XDocument mainDocumentXDoc)
  2129. {
  2130. var footnotesPartBefore = mainDocumentPartBefore.FootnotesPart;
  2131. var endnotesPartBefore = mainDocumentPartBefore.EndnotesPart;
  2132. var footnotesPartAfter = mainDocumentPartAfter.FootnotesPart;
  2133. var endnotesPartAfter = mainDocumentPartAfter.EndnotesPart;
  2134. XDocument footnotesPartBeforeXDoc = null;
  2135. if (footnotesPartBefore != null)
  2136. footnotesPartBeforeXDoc = footnotesPartBefore.GetXDocument();
  2137. XDocument footnotesPartAfterXDoc = null;
  2138. if (footnotesPartAfter != null)
  2139. footnotesPartAfterXDoc = footnotesPartAfter.GetXDocument();
  2140. XDocument endnotesPartBeforeXDoc = null;
  2141. if (endnotesPartBefore != null)
  2142. endnotesPartBeforeXDoc = endnotesPartBefore.GetXDocument();
  2143. XDocument endnotesPartAfterXDoc = null;
  2144. if (endnotesPartAfter != null)
  2145. endnotesPartAfterXDoc = endnotesPartAfter.GetXDocument();
  2146. var possiblyModifiedFootnotesEndNotes = listOfComparisonUnitAtoms
  2147. .Where(cua =>
  2148. cua.ContentElement.Name == W.footnoteReference ||
  2149. cua.ContentElement.Name == W.endnoteReference)
  2150. .ToList();
  2151. foreach (var fn in possiblyModifiedFootnotesEndNotes)
  2152. {
  2153. string beforeId = null;
  2154. if (fn.ContentElementBefore != null)
  2155. beforeId = (string)fn.ContentElementBefore.Attribute(W.id);
  2156. var afterId = (string)fn.ContentElement.Attribute(W.id);
  2157. XElement footnoteEndnoteBefore = null;
  2158. XElement footnoteEndnoteAfter = null;
  2159. OpenXmlPart partToUseBefore = null;
  2160. OpenXmlPart partToUseAfter = null;
  2161. XDocument partToUseBeforeXDoc = null;
  2162. XDocument partToUseAfterXDoc = null;
  2163. if (fn.CorrelationStatus == CorrelationStatus.Equal)
  2164. {
  2165. if (fn.ContentElement.Name == W.footnoteReference)
  2166. {
  2167. footnoteEndnoteBefore = footnotesPartBeforeXDoc
  2168. .Root
  2169. .Elements()
  2170. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == beforeId);
  2171. footnoteEndnoteAfter = footnotesPartAfterXDoc
  2172. .Root
  2173. .Elements()
  2174. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2175. partToUseBefore = footnotesPartBefore;
  2176. partToUseAfter = footnotesPartAfter;
  2177. partToUseBeforeXDoc = footnotesPartBeforeXDoc;
  2178. partToUseAfterXDoc = footnotesPartAfterXDoc;
  2179. }
  2180. else
  2181. {
  2182. footnoteEndnoteBefore = endnotesPartBeforeXDoc
  2183. .Root
  2184. .Elements()
  2185. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == beforeId);
  2186. footnoteEndnoteAfter = endnotesPartAfterXDoc
  2187. .Root
  2188. .Elements()
  2189. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2190. partToUseBefore = endnotesPartBefore;
  2191. partToUseAfter = endnotesPartAfter;
  2192. partToUseBeforeXDoc = endnotesPartBeforeXDoc;
  2193. partToUseAfterXDoc = endnotesPartAfterXDoc;
  2194. }
  2195. AddSha1HashToBlockLevelContent(partToUseBefore, footnoteEndnoteBefore, settings);
  2196. AddSha1HashToBlockLevelContent(partToUseAfter, footnoteEndnoteAfter, settings);
  2197. var fncal1 = WmlComparer.CreateComparisonUnitAtomList(partToUseBefore, footnoteEndnoteBefore, settings);
  2198. var fncus1 = GetComparisonUnitList(fncal1, settings);
  2199. var fncal2 = WmlComparer.CreateComparisonUnitAtomList(partToUseAfter, footnoteEndnoteAfter, settings);
  2200. var fncus2 = GetComparisonUnitList(fncal2, settings);
  2201. if (!(fncus1.Length == 0 && fncus2.Length == 0))
  2202. {
  2203. var fnCorrelatedSequence = Lcs(fncus1, fncus2, settings);
  2204. if (s_False)
  2205. {
  2206. var sb = new StringBuilder();
  2207. foreach (var item in fnCorrelatedSequence)
  2208. sb.Append(item.ToString()).Append(Environment.NewLine);
  2209. var sbs = sb.ToString();
  2210. DocxComparerUtil.NotePad(sbs);
  2211. }
  2212. // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore
  2213. // when generating the document, the appropriate row will be marked as deleted or inserted.
  2214. MarkRowsAsDeletedOrInserted(settings, fnCorrelatedSequence);
  2215. // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal
  2216. var fnListOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(fnCorrelatedSequence, settings);
  2217. if (s_False)
  2218. {
  2219. var sb = new StringBuilder();
  2220. foreach (var item in fnListOfComparisonUnitAtoms)
  2221. sb.Append(item.ToString() + Environment.NewLine);
  2222. var sbs = sb.ToString();
  2223. DocxComparerUtil.NotePad(sbs);
  2224. }
  2225. // hack = set the guid ID of the table, row, or cell from the 'before' document to be equal to the 'after' document.
  2226. // note - we don't want to do the hack until after flattening all of the groups. At the end of the flattening, we should simply
  2227. // have a list of ComparisonUnitAtoms, appropriately marked as equal, inserted, or deleted.
  2228. // the table id will be hacked in the normal course of events.
  2229. // in the case where a row is deleted, not necessary to hack - the deleted row ID will do.
  2230. // in the case where a row is inserted, not necessary to hack - the inserted row ID will do as well.
  2231. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms);
  2232. var newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseAfter, fnListOfComparisonUnitAtoms, settings);
  2233. var tempElement = new XElement(W.body, newFootnoteEndnoteChildren);
  2234. var hasFootnoteReference = tempElement.Descendants(W.r).Any(r =>
  2235. {
  2236. var b = false;
  2237. if ((string)r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == "FootnoteReference")
  2238. b = true;
  2239. if (r.Descendants(W.footnoteRef).Any())
  2240. b = true;
  2241. return b;
  2242. });
  2243. if (!hasFootnoteReference)
  2244. {
  2245. var firstPara = tempElement.Descendants(W.p).FirstOrDefault();
  2246. if (firstPara != null)
  2247. {
  2248. var firstRun = firstPara.Element(W.r);
  2249. if (firstRun != null)
  2250. {
  2251. if (fn.ContentElement.Name == W.footnoteReference)
  2252. firstRun.AddBeforeSelf(
  2253. new XElement(W.r,
  2254. new XElement(W.rPr,
  2255. new XElement(W.rStyle,
  2256. new XAttribute(W.val, "FootnoteReference"))),
  2257. new XElement(W.footnoteRef)));
  2258. else
  2259. firstRun.AddBeforeSelf(
  2260. new XElement(W.r,
  2261. new XElement(W.rPr,
  2262. new XElement(W.rStyle,
  2263. new XAttribute(W.val, "EndnoteReference"))),
  2264. new XElement(W.endnoteRef)));
  2265. }
  2266. }
  2267. }
  2268. XElement newTempElement = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement);
  2269. var newContentElement = newTempElement.Descendants().FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote);
  2270. if (newContentElement == null)
  2271. throw new OpenXmlPowerToolsException("Internal error");
  2272. footnoteEndnoteAfter.ReplaceNodes(newContentElement.Nodes());
  2273. }
  2274. }
  2275. else if (fn.CorrelationStatus == CorrelationStatus.Inserted)
  2276. {
  2277. if (fn.ContentElement.Name == W.footnoteReference)
  2278. {
  2279. footnoteEndnoteAfter = footnotesPartAfterXDoc
  2280. .Root
  2281. .Elements()
  2282. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2283. partToUseAfter = footnotesPartAfter;
  2284. partToUseAfterXDoc = footnotesPartAfterXDoc;
  2285. }
  2286. else
  2287. {
  2288. footnoteEndnoteAfter = endnotesPartAfterXDoc
  2289. .Root
  2290. .Elements()
  2291. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2292. partToUseAfter = endnotesPartAfter;
  2293. partToUseAfterXDoc = endnotesPartAfterXDoc;
  2294. }
  2295. AddSha1HashToBlockLevelContent(partToUseAfter, footnoteEndnoteAfter, settings);
  2296. var fncal2 = WmlComparer.CreateComparisonUnitAtomList(partToUseAfter, footnoteEndnoteAfter, settings);
  2297. var fncus2 = GetComparisonUnitList(fncal2, settings);
  2298. var insertedCorrSequ = new List<CorrelatedSequence>() {
  2299. new CorrelatedSequence()
  2300. {
  2301. ComparisonUnitArray1 = null,
  2302. ComparisonUnitArray2 = fncus2,
  2303. CorrelationStatus = CorrelationStatus.Inserted,
  2304. },
  2305. };
  2306. if (s_False)
  2307. {
  2308. var sb = new StringBuilder();
  2309. foreach (var item in insertedCorrSequ)
  2310. sb.Append(item.ToString()).Append(Environment.NewLine);
  2311. var sbs = sb.ToString();
  2312. DocxComparerUtil.NotePad(sbs);
  2313. }
  2314. MarkRowsAsDeletedOrInserted(settings, insertedCorrSequ);
  2315. var fnListOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(insertedCorrSequ, settings);
  2316. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms);
  2317. var newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseAfter,
  2318. fnListOfComparisonUnitAtoms, settings);
  2319. var tempElement = new XElement(W.body, newFootnoteEndnoteChildren);
  2320. var hasFootnoteReference = tempElement.Descendants(W.r).Any(r =>
  2321. {
  2322. var b = false;
  2323. if ((string)r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == "FootnoteReference")
  2324. b = true;
  2325. if (r.Descendants(W.footnoteRef).Any())
  2326. b = true;
  2327. return b;
  2328. });
  2329. if (!hasFootnoteReference)
  2330. {
  2331. var firstPara = tempElement.Descendants(W.p).FirstOrDefault();
  2332. if (firstPara != null)
  2333. {
  2334. var firstRun = firstPara.Descendants(W.r).FirstOrDefault();
  2335. if (firstRun != null)
  2336. {
  2337. if (fn.ContentElement.Name == W.footnoteReference)
  2338. firstRun.AddBeforeSelf(
  2339. new XElement(W.r,
  2340. new XElement(W.rPr,
  2341. new XElement(W.rStyle,
  2342. new XAttribute(W.val, "FootnoteReference"))),
  2343. new XElement(W.footnoteRef)));
  2344. else
  2345. firstRun.AddBeforeSelf(
  2346. new XElement(W.r,
  2347. new XElement(W.rPr,
  2348. new XElement(W.rStyle,
  2349. new XAttribute(W.val, "EndnoteReference"))),
  2350. new XElement(W.endnoteRef)));
  2351. }
  2352. }
  2353. }
  2354. XElement newTempElement = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement);
  2355. var newContentElement = newTempElement
  2356. .Descendants()
  2357. .FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote);
  2358. if (newContentElement != null)
  2359. { //throw new OpenXmlPowerToolsException("Internal error");
  2360. footnoteEndnoteAfter.ReplaceNodes(newContentElement.Nodes());
  2361. }
  2362. }
  2363. else if (fn.CorrelationStatus == CorrelationStatus.Deleted)
  2364. {
  2365. if (fn.ContentElement.Name == W.footnoteReference)
  2366. {
  2367. footnoteEndnoteBefore = footnotesPartBeforeXDoc
  2368. .Root
  2369. .Elements()
  2370. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2371. partToUseAfter = footnotesPartAfter;
  2372. partToUseAfterXDoc = footnotesPartAfterXDoc;
  2373. }
  2374. else
  2375. {
  2376. footnoteEndnoteBefore = endnotesPartBeforeXDoc
  2377. .Root
  2378. .Elements()
  2379. .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId);
  2380. partToUseBefore = endnotesPartBefore;
  2381. partToUseBeforeXDoc = endnotesPartBeforeXDoc;
  2382. }
  2383. AddSha1HashToBlockLevelContent(partToUseBefore, footnoteEndnoteBefore, settings);
  2384. var fncal2 = WmlComparer.CreateComparisonUnitAtomList(partToUseBefore, footnoteEndnoteBefore, settings);
  2385. var fncus2 = GetComparisonUnitList(fncal2, settings);
  2386. var deletedCorrSequ = new List<CorrelatedSequence>() {
  2387. new CorrelatedSequence()
  2388. {
  2389. ComparisonUnitArray1 = fncus2,
  2390. ComparisonUnitArray2 = null,
  2391. CorrelationStatus = CorrelationStatus.Deleted,
  2392. },
  2393. };
  2394. if (s_False)
  2395. {
  2396. var sb = new StringBuilder();
  2397. foreach (var item in deletedCorrSequ)
  2398. sb.Append(item.ToString()).Append(Environment.NewLine);
  2399. var sbs = sb.ToString();
  2400. DocxComparerUtil.NotePad(sbs);
  2401. }
  2402. MarkRowsAsDeletedOrInserted(settings, deletedCorrSequ);
  2403. var fnListOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(deletedCorrSequ, settings);
  2404. if (fnListOfComparisonUnitAtoms.Any())
  2405. {
  2406. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms);
  2407. var newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseBefore,
  2408. fnListOfComparisonUnitAtoms, settings);
  2409. var tempElement = new XElement(W.body, newFootnoteEndnoteChildren);
  2410. var hasFootnoteReference = tempElement.Descendants(W.r).Any(r =>
  2411. {
  2412. var b = false;
  2413. if ((string)r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == "FootnoteReference")
  2414. b = true;
  2415. if (r.Descendants(W.footnoteRef).Any())
  2416. b = true;
  2417. return b;
  2418. });
  2419. if (!hasFootnoteReference)
  2420. {
  2421. var firstPara = tempElement.Descendants(W.p).FirstOrDefault();
  2422. if (firstPara != null)
  2423. {
  2424. var firstRun = firstPara.Descendants(W.r).FirstOrDefault();
  2425. if (firstRun != null)
  2426. {
  2427. if (fn.ContentElement.Name == W.footnoteReference)
  2428. firstRun.AddBeforeSelf(
  2429. new XElement(W.r,
  2430. new XElement(W.rPr,
  2431. new XElement(W.rStyle,
  2432. new XAttribute(W.val, "FootnoteReference"))),
  2433. new XElement(W.footnoteRef)));
  2434. else
  2435. firstRun.AddBeforeSelf(
  2436. new XElement(W.r,
  2437. new XElement(W.rPr,
  2438. new XElement(W.rStyle,
  2439. new XAttribute(W.val, "EndnoteReference"))),
  2440. new XElement(W.endnoteRef)));
  2441. }
  2442. }
  2443. }
  2444. XElement newTempElement = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement);
  2445. var newContentElement = newTempElement.Descendants().FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote);
  2446. if (newContentElement == null)
  2447. throw new OpenXmlPowerToolsException("Internal error");
  2448. footnoteEndnoteBefore.ReplaceNodes(newContentElement.Nodes());
  2449. }
  2450. }
  2451. else
  2452. throw new OpenXmlPowerToolsException("Internal error");
  2453. }
  2454. }
  2455. private static void RectifyFootnoteEndnoteIds(
  2456. MainDocumentPart mainDocumentPartBefore,
  2457. MainDocumentPart mainDocumentPartAfter,
  2458. MainDocumentPart mainDocumentPartWithRevisions,
  2459. XDocument mainDocumentXDoc,
  2460. WmlComparerSettings settings)
  2461. {
  2462. var footnotesPartBefore = mainDocumentPartBefore.FootnotesPart;
  2463. var endnotesPartBefore = mainDocumentPartBefore.EndnotesPart;
  2464. var footnotesPartAfter = mainDocumentPartAfter.FootnotesPart;
  2465. var endnotesPartAfter = mainDocumentPartAfter.EndnotesPart;
  2466. var footnotesPartWithRevisions = mainDocumentPartWithRevisions.FootnotesPart;
  2467. var endnotesPartWithRevisions = mainDocumentPartWithRevisions.EndnotesPart;
  2468. XDocument footnotesPartBeforeXDoc = null;
  2469. if (footnotesPartBefore != null)
  2470. footnotesPartBeforeXDoc = footnotesPartBefore.GetXDocument();
  2471. XDocument footnotesPartAfterXDoc = null;
  2472. if (footnotesPartAfter != null)
  2473. footnotesPartAfterXDoc = footnotesPartAfter.GetXDocument();
  2474. XDocument footnotesPartWithRevisionsXDoc = null;
  2475. if (footnotesPartWithRevisions != null)
  2476. {
  2477. footnotesPartWithRevisionsXDoc = footnotesPartWithRevisions.GetXDocument();
  2478. footnotesPartWithRevisionsXDoc
  2479. .Root
  2480. .Elements(W.footnote)
  2481. .Where(e => (string)e.Attribute(W.id) != "-1" && (string)e.Attribute(W.id) != "0")
  2482. .Remove();
  2483. }
  2484. XDocument endnotesPartBeforeXDoc = null;
  2485. if (endnotesPartBefore != null)
  2486. endnotesPartBeforeXDoc = endnotesPartBefore.GetXDocument();
  2487. XDocument endnotesPartAfterXDoc = null;
  2488. if (endnotesPartAfter != null)
  2489. endnotesPartAfterXDoc = endnotesPartAfter.GetXDocument();
  2490. XDocument endnotesPartWithRevisionsXDoc = null;
  2491. if (endnotesPartWithRevisions != null)
  2492. {
  2493. endnotesPartWithRevisionsXDoc = endnotesPartWithRevisions.GetXDocument();
  2494. endnotesPartWithRevisionsXDoc
  2495. .Root
  2496. .Elements(W.endnote)
  2497. .Where(e => (string)e.Attribute(W.id) != "-1" && (string)e.Attribute(W.id) != "0")
  2498. .Remove();
  2499. }
  2500. var footnotesRefs = mainDocumentXDoc
  2501. .Descendants(W.footnoteReference)
  2502. .Select((fn, idx) =>
  2503. {
  2504. return new
  2505. {
  2506. FootNote = fn,
  2507. Idx = idx,
  2508. };
  2509. });
  2510. foreach (var fn in footnotesRefs)
  2511. {
  2512. var oldId = (string)fn.FootNote.Attribute(W.id);
  2513. var newId = (fn.Idx + 1).ToString();
  2514. fn.FootNote.Attribute(W.id).Value = newId;
  2515. var footnote = footnotesPartAfterXDoc
  2516. .Root
  2517. .Elements()
  2518. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  2519. if (footnote == null)
  2520. {
  2521. footnote = footnotesPartBeforeXDoc
  2522. .Root
  2523. .Elements()
  2524. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  2525. }
  2526. if (footnote == null)
  2527. throw new OpenXmlPowerToolsException("Internal error");
  2528. var cloned = new XElement(footnote);
  2529. cloned.Attribute(W.id).Value = newId;
  2530. footnotesPartWithRevisionsXDoc
  2531. .Root
  2532. .Add(cloned);
  2533. }
  2534. var endnotesRefs = mainDocumentXDoc
  2535. .Descendants(W.endnoteReference)
  2536. .Select((fn, idx) =>
  2537. {
  2538. return new
  2539. {
  2540. Endnote = fn,
  2541. Idx = idx,
  2542. };
  2543. });
  2544. foreach (var fn in endnotesRefs)
  2545. {
  2546. var oldId = (string)fn.Endnote.Attribute(W.id);
  2547. var newId = (fn.Idx + 1).ToString();
  2548. fn.Endnote.Attribute(W.id).Value = newId;
  2549. var endnote = endnotesPartAfterXDoc
  2550. .Root
  2551. .Elements()
  2552. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  2553. if (endnote == null)
  2554. {
  2555. endnote = endnotesPartBeforeXDoc
  2556. .Root
  2557. .Elements()
  2558. .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId);
  2559. }
  2560. if (endnote == null)
  2561. throw new OpenXmlPowerToolsException("Internal error");
  2562. var cloned = new XElement(endnote);
  2563. cloned.Attribute(W.id).Value = newId;
  2564. endnotesPartWithRevisionsXDoc
  2565. .Root
  2566. .Add(cloned);
  2567. }
  2568. if (footnotesPartWithRevisionsXDoc != null)
  2569. {
  2570. MarkContentAsDeletedOrInserted(footnotesPartWithRevisionsXDoc, settings);
  2571. CoalesceAdjacentRunsWithIdenticalFormatting(footnotesPartWithRevisionsXDoc);
  2572. XElement newXDocRoot = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(footnotesPartWithRevisionsXDoc.Root);
  2573. footnotesPartWithRevisionsXDoc.Root.ReplaceWith(newXDocRoot);
  2574. IgnorePt14Namespace(footnotesPartWithRevisionsXDoc.Root);
  2575. footnotesPartWithRevisions.PutXDocument();
  2576. }
  2577. if (endnotesPartWithRevisionsXDoc != null)
  2578. {
  2579. MarkContentAsDeletedOrInserted(endnotesPartWithRevisionsXDoc, settings);
  2580. CoalesceAdjacentRunsWithIdenticalFormatting(endnotesPartWithRevisionsXDoc);
  2581. XElement newXDocRoot = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(endnotesPartWithRevisionsXDoc.Root);
  2582. endnotesPartWithRevisionsXDoc.Root.ReplaceWith(newXDocRoot);
  2583. IgnorePt14Namespace(endnotesPartWithRevisionsXDoc.Root);
  2584. endnotesPartWithRevisions.PutXDocument();
  2585. }
  2586. }
  2587. /// Here is the crux of the fix to the algorithm. After assembling the entire list of ComparisonUnitAtoms, we do the following:
  2588. /// - First, figure out the maximum hierarchy depth, considering only paragraphs, txbx, txbxContent, tables, rows, cells, and content controls.
  2589. /// - For documents that do not contain tables, nor text boxes, this maximum hierarchy depth will always be 1.
  2590. /// - For atoms within a table, the depth will be 4. The first level is the table, the second level is row, third is cell, fourth is paragraph.
  2591. /// - For atoms within a nested table, the depth will be 7: Table / Row / Cell / Table / Row / Cell / Paragraph
  2592. /// - For atoms within a text box, the depth will be 3: Paragraph / txbxContent / Paragraph
  2593. /// - For atoms within a table in a text box, the depth will be 5: Paragraph / txbxContent / Table / Row / Cell / Paragraph
  2594. /// In any case, we figure out the maximum depth.
  2595. ///
  2596. /// Then we iterate through the list of content atoms backwards. We do this n times, where n is the maximum depth.
  2597. ///
  2598. /// At each level, we find a paragraph mark, and working backwards, we set the guids in the hierarchy so that the content will be assembled together correctly.
  2599. ///
  2600. /// For each iteration, we only set unids at the level that we are working at.
  2601. ///
  2602. /// So first we will set all unids at level 1. When we find a paragraph mark, we get the unid for that level, and then working backwards, until we find another
  2603. /// paragraph mark, we set all unids at level 1 to the same unid as level 1 of the paragraph mark.
  2604. ///
  2605. /// Then we set all unids at level 2. When we find a paragraph mark, we get the unid for that level, and then working backwards, until we find another paragraph
  2606. /// mark, we set all unids at level 2 to the same unid as level 2 of the paragraph mark. At some point, we will find a paragraph mark with no level 2. This is
  2607. /// not a problem. We stop setting anything until we find another paragraph mark that has a level 2, at which point we resume setting values at level 2.
  2608. ///
  2609. /// Same process for level 3, and so on, until we have processed to the maximum depth of the hierarchy.
  2610. ///
  2611. /// At the end of this process, we will be able to do the coalsce recurse algorithm, and the content atom list will be put back together into a beautiful tree,
  2612. /// where every element is correctly positioned in the hierarchy.
  2613. ///
  2614. /// This should also properly assemble the test where just the paragraph marks have been deleted for a range of paragraphs.
  2615. ///
  2616. /// There is an interesting thought - it is possible that I have set two runs of text that were initially in the same paragraph, but then after
  2617. /// processing, they match up to text in different paragraphs. Therefore this will not work. We need to actually keep a list of reconstructed ancestor
  2618. /// Unids, because the same paragraph would get set to two different IDs - two ComparisonUnitAtoms need to be in separate paragraphs in the reconstructed
  2619. /// document, but their ancestors actually point to the same paragraph.
  2620. ///
  2621. /// Fix this in the algorithm, and also keep the appropriate list in ComparisonUnitAtom class.
  2622. private static void AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(List<ComparisonUnitAtom> comparisonUnitAtomList)
  2623. {
  2624. if (s_False)
  2625. {
  2626. var sb = new StringBuilder();
  2627. foreach (var item in comparisonUnitAtomList)
  2628. sb.Append(item.ToString()).Append(Environment.NewLine);
  2629. var sbs = sb.ToString();
  2630. DocxComparerUtil.NotePad(sbs);
  2631. }
  2632. // the following loop sets all ancestor unids in the after document to the unids in the before document for all pPr where the status is equal.
  2633. // this should always be true.
  2634. // one additional modification to make to this loop - where we find a pPr in a text box, we want to do this as well, regardless of whether the status is equal, inserted, or deleted.
  2635. // reason being that this module does not support insertion / deletion of text boxes themselves. If a text box is in the before or after document, it will be in the document that
  2636. // contains deltas. It may have inserted or deleted text, but regardless, it will be in the result document.
  2637. foreach (var cua in comparisonUnitAtomList)
  2638. {
  2639. var doSet = false;
  2640. if (cua.ContentElement.Name == W.pPr)
  2641. {
  2642. if (cua.AncestorElements.Any(ae => ae.Name == W.txbxContent))
  2643. doSet = true;
  2644. if (cua.CorrelationStatus == CorrelationStatus.Equal)
  2645. doSet = true;
  2646. }
  2647. if (doSet)
  2648. {
  2649. var cuaBefore = cua.ComparisonUnitAtomBefore;
  2650. var ancestorsAfter = cua.AncestorElements;
  2651. if (cuaBefore != null)
  2652. {
  2653. var ancestorsBefore = cuaBefore.AncestorElements;
  2654. if (ancestorsAfter.Length == ancestorsBefore.Length)
  2655. {
  2656. var zipped = ancestorsBefore.Zip(ancestorsAfter, (b, a) =>
  2657. new
  2658. {
  2659. After = a,
  2660. Before = b,
  2661. });
  2662. foreach (var z in zipped)
  2663. {
  2664. var afterUnidAtt = z.After.Attribute(PtOpenXml.Unid);
  2665. var beforeUnidAtt = z.Before.Attribute(PtOpenXml.Unid);
  2666. if (afterUnidAtt != null && beforeUnidAtt != null)
  2667. afterUnidAtt.Value = beforeUnidAtt.Value;
  2668. }
  2669. }
  2670. }
  2671. }
  2672. }
  2673. if (s_False)
  2674. {
  2675. var sb = new StringBuilder();
  2676. foreach (var item in comparisonUnitAtomList)
  2677. sb.Append(item.ToString()).Append(Environment.NewLine);
  2678. var sbs = sb.ToString();
  2679. DocxComparerUtil.NotePad(sbs);
  2680. }
  2681. var rComparisonUnitAtomList = ((IEnumerable<ComparisonUnitAtom>)comparisonUnitAtomList).Reverse().ToList();
  2682. // the following should always succeed, because there will always be at least one element in rComparisonUnitAtomList, and there will always be at least one
  2683. // ancestor in AncestorElements
  2684. string deepestAncestorUnid = null;
  2685. if (rComparisonUnitAtomList.Any())
  2686. {
  2687. var deepestAncestor = rComparisonUnitAtomList.First().AncestorElements.First();
  2688. var deepestAncestorName = deepestAncestor.Name;
  2689. if (deepestAncestorName == W.footnote || deepestAncestorName == W.endnote)
  2690. {
  2691. deepestAncestorUnid = (string)deepestAncestor.Attribute(PtOpenXml.Unid);
  2692. }
  2693. }
  2694. /// If the following loop finds a pPr that is in a text box, then continue on, processing the pPr and all of its contents as though it were
  2695. /// content in the containing text box. This is going to leave it after this loop where the AncestorUnids for the content in the text box will be
  2696. /// incomplete. We then will need to go through the rComparisonUnitAtomList a second time, processing all of the text boxes.
  2697. /// Note that this makes the basic assumption that a text box can't be nested inside of a text box, which, as far as I know, is a good assumption.
  2698. /// This also makes the basic assumption that an endnote / footnote can't contain a text box, which I believe is a good assumption.
  2699. string[] currentAncestorUnids = null;
  2700. foreach (var cua in rComparisonUnitAtomList)
  2701. {
  2702. if (cua.ContentElement.Name == W.pPr)
  2703. {
  2704. var pPr_inTextBox = cua
  2705. .AncestorElements
  2706. .Any(ae => ae.Name == W.txbxContent);
  2707. if (!pPr_inTextBox)
  2708. {
  2709. // this will collect the ancestor unids for the paragraph.
  2710. // my hypothesis is that these ancestor unids should be the same for all content unit atoms within that paragraph.
  2711. currentAncestorUnids = cua
  2712. .AncestorElements
  2713. .Select(ae =>
  2714. {
  2715. var thisUnid = (string)ae.Attribute(PtOpenXml.Unid);
  2716. if (thisUnid == null)
  2717. throw new OpenXmlPowerToolsException("Internal error");
  2718. return thisUnid;
  2719. })
  2720. .ToArray();
  2721. cua.AncestorUnids = currentAncestorUnids;
  2722. if (deepestAncestorUnid != null)
  2723. cua.AncestorUnids[0] = deepestAncestorUnid;
  2724. continue;
  2725. }
  2726. }
  2727. var thisDepth = cua.AncestorElements.Length;
  2728. var additionalAncestorUnids = cua
  2729. .AncestorElements
  2730. .Skip(currentAncestorUnids.Length)
  2731. .Select(ae =>
  2732. {
  2733. var thisUnid = (string)ae.Attribute(PtOpenXml.Unid);
  2734. if (thisUnid == null)
  2735. Guid.NewGuid().ToString().Replace("-", "");
  2736. return thisUnid;
  2737. });
  2738. var thisAncestorUnids = currentAncestorUnids
  2739. .Concat(additionalAncestorUnids)
  2740. .ToArray();
  2741. cua.AncestorUnids = thisAncestorUnids;
  2742. if (deepestAncestorUnid != null)
  2743. cua.AncestorUnids[0] = deepestAncestorUnid;
  2744. }
  2745. if (s_False)
  2746. {
  2747. var sb = new StringBuilder();
  2748. foreach (var item in comparisonUnitAtomList)
  2749. sb.Append(item.ToString()).Append(Environment.NewLine);
  2750. var sbs = sb.ToString();
  2751. DocxComparerUtil.NotePad(sbs);
  2752. }
  2753. // this is the second loop that processes all text boxes.
  2754. currentAncestorUnids = null;
  2755. bool skipUntilNextPpr = false;
  2756. foreach (var cua in rComparisonUnitAtomList)
  2757. {
  2758. if (currentAncestorUnids != null && cua.AncestorElements.Length < currentAncestorUnids.Length)
  2759. {
  2760. skipUntilNextPpr = true;
  2761. currentAncestorUnids = null;
  2762. continue;
  2763. }
  2764. if (cua.ContentElement.Name == W.pPr)
  2765. {
  2766. //if (s_True)
  2767. //{
  2768. // var sb = new StringBuilder();
  2769. // foreach (var item in comparisonUnitAtomList)
  2770. // sb.Append(item.ToString()).Append(Environment.NewLine);
  2771. // var sbs = sb.ToString();
  2772. // DocxComparerUtil.NotePad(sbs);
  2773. //}
  2774. var pPr_inTextBox = cua
  2775. .AncestorElements
  2776. .Any(ae => ae.Name == W.txbxContent);
  2777. if (!pPr_inTextBox)
  2778. {
  2779. skipUntilNextPpr = true;
  2780. currentAncestorUnids = null;
  2781. continue;
  2782. }
  2783. else
  2784. {
  2785. skipUntilNextPpr = false;
  2786. currentAncestorUnids = cua
  2787. .AncestorElements
  2788. .Select(ae =>
  2789. {
  2790. var thisUnid = (string)ae.Attribute(PtOpenXml.Unid);
  2791. if (thisUnid == null)
  2792. throw new OpenXmlPowerToolsException("Internal error");
  2793. return thisUnid;
  2794. })
  2795. .ToArray();
  2796. cua.AncestorUnids = currentAncestorUnids;
  2797. continue;
  2798. }
  2799. }
  2800. if (skipUntilNextPpr)
  2801. continue;
  2802. var thisDepth = cua.AncestorElements.Length;
  2803. var additionalAncestorUnids = cua
  2804. .AncestorElements
  2805. .Skip(currentAncestorUnids.Length)
  2806. .Select(ae =>
  2807. {
  2808. var thisUnid = (string)ae.Attribute(PtOpenXml.Unid);
  2809. if (thisUnid == null)
  2810. Guid.NewGuid().ToString().Replace("-", "");
  2811. return thisUnid;
  2812. });
  2813. var thisAncestorUnids = currentAncestorUnids
  2814. .Concat(additionalAncestorUnids)
  2815. .ToArray();
  2816. cua.AncestorUnids = thisAncestorUnids;
  2817. }
  2818. if (s_False)
  2819. {
  2820. var sb = new StringBuilder();
  2821. foreach (var item in comparisonUnitAtomList)
  2822. sb.Append(item.ToStringAncestorUnids()).Append(Environment.NewLine);
  2823. var sbs = sb.ToString();
  2824. DocxComparerUtil.NotePad(sbs);
  2825. }
  2826. }
  2827. // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal
  2828. private static List<ComparisonUnitAtom> FlattenToComparisonUnitAtomList(List<CorrelatedSequence> correlatedSequence, WmlComparerSettings settings)
  2829. {
  2830. var listOfComparisonUnitAtoms = correlatedSequence
  2831. .Select(cs =>
  2832. {
  2833. // need to write some code here to find out if we are assembling a paragraph (or anything) that contains the following unid.
  2834. // why do are we dropping content???????
  2835. //string searchFor = "0ecb9184";
  2836. if (cs.CorrelationStatus == CorrelationStatus.Equal)
  2837. {
  2838. var contentAtomsBefore = cs
  2839. .ComparisonUnitArray1
  2840. .Select(ca => ca.DescendantContentAtoms())
  2841. .SelectMany(m => m);
  2842. var contentAtomsAfter = cs
  2843. .ComparisonUnitArray2
  2844. .Select(ca => ca.DescendantContentAtoms())
  2845. .SelectMany(m => m);
  2846. var comparisonUnitAtomList = contentAtomsBefore
  2847. .Zip(contentAtomsAfter,
  2848. (before, after) =>
  2849. {
  2850. return new ComparisonUnitAtom(after.ContentElement, after.AncestorElements, after.Part, settings)
  2851. {
  2852. CorrelationStatus = CorrelationStatus.Equal,
  2853. ContentElementBefore = before.ContentElement,
  2854. ComparisonUnitAtomBefore = before,
  2855. };
  2856. })
  2857. .ToList();
  2858. return comparisonUnitAtomList;
  2859. }
  2860. else if (cs.CorrelationStatus == CorrelationStatus.Deleted)
  2861. {
  2862. var comparisonUnitAtomList = cs
  2863. .ComparisonUnitArray1
  2864. .Select(ca => ca.DescendantContentAtoms())
  2865. .SelectMany(m => m)
  2866. .Select(ca =>
  2867. new ComparisonUnitAtom(ca.ContentElement, ca.AncestorElements, ca.Part, settings)
  2868. {
  2869. CorrelationStatus = CorrelationStatus.Deleted,
  2870. });
  2871. return comparisonUnitAtomList;
  2872. }
  2873. else if (cs.CorrelationStatus == CorrelationStatus.Inserted)
  2874. {
  2875. var comparisonUnitAtomList = cs
  2876. .ComparisonUnitArray2
  2877. .Select(ca => ca.DescendantContentAtoms())
  2878. .SelectMany(m => m)
  2879. .Select(ca =>
  2880. new ComparisonUnitAtom(ca.ContentElement, ca.AncestorElements, ca.Part, settings)
  2881. {
  2882. CorrelationStatus = CorrelationStatus.Inserted,
  2883. });
  2884. return comparisonUnitAtomList;
  2885. }
  2886. else
  2887. throw new OpenXmlPowerToolsException("Internal error");
  2888. })
  2889. .SelectMany(m => m)
  2890. .ToList();
  2891. if (s_False)
  2892. {
  2893. var sb = new StringBuilder();
  2894. foreach (var item in listOfComparisonUnitAtoms)
  2895. sb.Append(item.ToString()).Append(Environment.NewLine);
  2896. var sbs = sb.ToString();
  2897. DocxComparerUtil.NotePad(sbs);
  2898. }
  2899. return listOfComparisonUnitAtoms;
  2900. }
  2901. // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore
  2902. // when generating the document, the appropriate row will be marked as deleted or inserted.
  2903. private static void MarkRowsAsDeletedOrInserted(WmlComparerSettings settings, List<CorrelatedSequence> correlatedSequence)
  2904. {
  2905. foreach (var dcs in correlatedSequence.Where(cs =>
  2906. cs.CorrelationStatus == CorrelationStatus.Deleted || cs.CorrelationStatus == CorrelationStatus.Inserted))
  2907. {
  2908. // iterate through all deleted/inserted items in dcs.ComparisonUnitArray1/ComparisonUnitArray2
  2909. var toIterateThrough = dcs.ComparisonUnitArray1;
  2910. if (dcs.CorrelationStatus == CorrelationStatus.Inserted)
  2911. toIterateThrough = dcs.ComparisonUnitArray2;
  2912. foreach (var ca in toIterateThrough)
  2913. {
  2914. var cug = ca as ComparisonUnitGroup;
  2915. // this works because we will never see a table in this list, only rows. If tables were in this list, would need to recursively
  2916. // go into children, but tables are always flattened in the LCS process.
  2917. // when we have a row, it is only necessary to find the first content atom of the row, then find the row ancestor, and then tweak
  2918. // the w:trPr
  2919. if (cug != null && cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  2920. {
  2921. var firstContentAtom = cug.DescendantContentAtoms().FirstOrDefault();
  2922. if (firstContentAtom == null)
  2923. throw new OpenXmlPowerToolsException("Internal error");
  2924. var tr = firstContentAtom
  2925. .AncestorElements
  2926. .Reverse()
  2927. .FirstOrDefault(a => a.Name == W.tr);
  2928. if (tr == null)
  2929. throw new OpenXmlPowerToolsException("Internal error");
  2930. var trPr = tr.Element(W.trPr);
  2931. if (trPr == null)
  2932. {
  2933. trPr = new XElement(W.trPr);
  2934. tr.AddFirst(trPr);
  2935. }
  2936. XName revTrackElementName = null;
  2937. if (dcs.CorrelationStatus == CorrelationStatus.Deleted)
  2938. revTrackElementName = W.del;
  2939. else if (dcs.CorrelationStatus == CorrelationStatus.Inserted)
  2940. revTrackElementName = W.ins;
  2941. trPr.Add(new XElement(revTrackElementName,
  2942. new XAttribute(W.author, settings.AuthorForRevisions),
  2943. new XAttribute(W.id, s_MaxId++),
  2944. new XAttribute(W.date, settings.DateTimeForRevisions)));
  2945. }
  2946. }
  2947. }
  2948. }
  2949. public enum WmlComparerRevisionType
  2950. {
  2951. Inserted,
  2952. Deleted,
  2953. }
  2954. public class WmlComparerRevision
  2955. {
  2956. public WmlComparerRevisionType RevisionType;
  2957. public string Text;
  2958. public string Author;
  2959. public string Date;
  2960. public XElement ContentXElement;
  2961. public XElement RevisionXElement;
  2962. public Uri PartUri;
  2963. public string PartContentType;
  2964. }
  2965. private static XName[] RevElementsWithNoText = new XName[] {
  2966. M.oMath,
  2967. M.oMathPara,
  2968. W.drawing,
  2969. };
  2970. public static List<WmlComparerRevision> GetRevisions(WmlDocument source, WmlComparerSettings settings)
  2971. {
  2972. using (MemoryStream ms = new MemoryStream())
  2973. {
  2974. ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
  2975. using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true))
  2976. {
  2977. TestForInvalidContent(wDoc);
  2978. RemoveExistingPowerToolsMarkup(wDoc);
  2979. var contentParent = wDoc.MainDocumentPart.GetXDocument().Root.Element(W.body);
  2980. var atomList = WmlComparer.CreateComparisonUnitAtomList(wDoc.MainDocumentPart, contentParent, settings).ToArray();
  2981. if (s_False)
  2982. {
  2983. var sb = new StringBuilder();
  2984. foreach (var item in atomList)
  2985. sb.Append(item.ToString() + Environment.NewLine);
  2986. var sbs = sb.ToString();
  2987. DocxComparerUtil.NotePad(sbs);
  2988. }
  2989. var grouped = atomList
  2990. .GroupAdjacent(a =>
  2991. {
  2992. var key = a.CorrelationStatus.ToString();
  2993. if (a.CorrelationStatus != CorrelationStatus.Equal)
  2994. {
  2995. var rt = new XElement(a.RevTrackElement.Name,
  2996. new XAttribute(XNamespace.Xmlns + "w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"),
  2997. a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid));
  2998. key += rt.ToString(SaveOptions.DisableFormatting);
  2999. }
  3000. return key;
  3001. })
  3002. .ToList();
  3003. var revisions = grouped
  3004. .Where(k => k.Key != "Equal")
  3005. .ToList();
  3006. if (s_False)
  3007. {
  3008. var sb = new StringBuilder();
  3009. foreach (var item in revisions)
  3010. sb.Append(item.Key + Environment.NewLine);
  3011. var sbs = sb.ToString();
  3012. DocxComparerUtil.NotePad(sbs);
  3013. }
  3014. var mainDocPartRevisionList = revisions
  3015. .Select(rg =>
  3016. {
  3017. var rev = new WmlComparerRevision();
  3018. if (rg.Key.StartsWith("Inserted"))
  3019. rev.RevisionType = WmlComparerRevisionType.Inserted;
  3020. else if (rg.Key.StartsWith("Deleted"))
  3021. rev.RevisionType = WmlComparerRevisionType.Deleted;
  3022. var revTrackElement = rg.First().RevTrackElement;
  3023. rev.RevisionXElement = revTrackElement;
  3024. rev.Author = (string)revTrackElement.Attribute(W.author);
  3025. rev.ContentXElement = rg.First().ContentElement;
  3026. rev.Date = (string)revTrackElement.Attribute(W.date);
  3027. rev.PartUri = wDoc.MainDocumentPart.Uri;
  3028. rev.PartContentType = wDoc.MainDocumentPart.ContentType;
  3029. if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name))
  3030. {
  3031. rev.Text = rg
  3032. .Select(rgc =>
  3033. {
  3034. if (rgc.ContentElement.Name == W.pPr)
  3035. return Environment.NewLine;
  3036. return rgc.ContentElement.Value;
  3037. })
  3038. .StringConcatenate();
  3039. }
  3040. return rev;
  3041. })
  3042. .ToList();
  3043. var footnotesRevisionList = GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.FootnotesPart, W.footnote, settings);
  3044. var endnotesRevisionList = GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.EndnotesPart, W.endnote, settings);
  3045. var finalRevisionList = mainDocPartRevisionList.Concat(footnotesRevisionList).Concat(endnotesRevisionList).ToList();
  3046. return finalRevisionList;
  3047. }
  3048. }
  3049. }
  3050. private static IEnumerable<WmlComparerRevision> GetFootnoteEndnoteRevisionList(OpenXmlPart footnotesEndnotesPart,
  3051. XName footnoteEndnoteElementName,
  3052. WmlComparerSettings settings)
  3053. {
  3054. if (footnotesEndnotesPart == null)
  3055. return Enumerable.Empty<WmlComparerRevision>();
  3056. var xDoc = footnotesEndnotesPart.GetXDocument();
  3057. var footnotesEndnotes = xDoc.Root.Elements(footnoteEndnoteElementName);
  3058. List<WmlComparerRevision> revisionsForPart = new List<WmlComparerRevision>();
  3059. foreach (var fn in footnotesEndnotes)
  3060. {
  3061. var atomList = WmlComparer.CreateComparisonUnitAtomList(footnotesEndnotesPart, fn, settings).ToArray();
  3062. if (s_False)
  3063. {
  3064. var sb = new StringBuilder();
  3065. foreach (var item in atomList)
  3066. sb.Append(item.ToString() + Environment.NewLine);
  3067. var sbs = sb.ToString();
  3068. DocxComparerUtil.NotePad(sbs);
  3069. }
  3070. var grouped = atomList
  3071. .GroupAdjacent(a =>
  3072. {
  3073. var key = a.CorrelationStatus.ToString();
  3074. if (a.CorrelationStatus != CorrelationStatus.Equal)
  3075. {
  3076. var rt = new XElement(a.RevTrackElement.Name,
  3077. new XAttribute(XNamespace.Xmlns + "w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"),
  3078. a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid));
  3079. key += rt.ToString(SaveOptions.DisableFormatting);
  3080. }
  3081. return key;
  3082. })
  3083. .ToList();
  3084. var revisions = grouped
  3085. .Where(k => k.Key != "Equal")
  3086. .ToList();
  3087. var thisNoteRevisionList = revisions
  3088. .Select(rg =>
  3089. {
  3090. var rev = new WmlComparerRevision();
  3091. if (rg.Key.StartsWith("Inserted"))
  3092. rev.RevisionType = WmlComparerRevisionType.Inserted;
  3093. else if (rg.Key.StartsWith("Deleted"))
  3094. rev.RevisionType = WmlComparerRevisionType.Deleted;
  3095. var revTrackElement = rg.First().RevTrackElement;
  3096. rev.RevisionXElement = revTrackElement;
  3097. rev.Author = (string)revTrackElement.Attribute(W.author);
  3098. rev.ContentXElement = rg.First().ContentElement;
  3099. rev.Date = (string)revTrackElement.Attribute(W.date);
  3100. rev.PartUri = footnotesEndnotesPart.Uri;
  3101. rev.PartContentType = footnotesEndnotesPart.ContentType;
  3102. if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name))
  3103. {
  3104. rev.Text = rg
  3105. .Select(rgc =>
  3106. {
  3107. if (rgc.ContentElement.Name == W.pPr)
  3108. return Environment.NewLine;
  3109. return rgc.ContentElement.Value;
  3110. })
  3111. .StringConcatenate();
  3112. }
  3113. return rev;
  3114. });
  3115. foreach (var item in thisNoteRevisionList)
  3116. revisionsForPart.Add(item);
  3117. }
  3118. return revisionsForPart;
  3119. }
  3120. // prohibit
  3121. // - altChunk
  3122. // - subDoc
  3123. // - contentPart
  3124. private static void TestForInvalidContent(WordprocessingDocument wDoc)
  3125. {
  3126. foreach (var part in wDoc.ContentParts())
  3127. {
  3128. var xDoc = part.GetXDocument();
  3129. if (xDoc.Descendants(W.altChunk).Any())
  3130. throw new OpenXmlPowerToolsException("Unsupported document, contains w:altChunk");
  3131. if (xDoc.Descendants(W.subDoc).Any())
  3132. throw new OpenXmlPowerToolsException("Unsupported document, contains w:subDoc");
  3133. if (xDoc.Descendants(W.contentPart).Any())
  3134. throw new OpenXmlPowerToolsException("Unsupported document, contains w:contentPart");
  3135. }
  3136. }
  3137. private static void RemoveExistingPowerToolsMarkup(WordprocessingDocument wDoc)
  3138. {
  3139. wDoc.MainDocumentPart
  3140. .GetXDocument()
  3141. .Root
  3142. .Descendants()
  3143. .Attributes()
  3144. .Where(a => a.Name.Namespace == PtOpenXml.pt)
  3145. .Where(a => a.Name != PtOpenXml.Unid)
  3146. .Remove();
  3147. wDoc.MainDocumentPart.PutXDocument();
  3148. var fnPart = wDoc.MainDocumentPart.FootnotesPart;
  3149. if (fnPart != null)
  3150. {
  3151. var fnXDoc = fnPart.GetXDocument();
  3152. fnXDoc
  3153. .Root
  3154. .Descendants()
  3155. .Attributes()
  3156. .Where(a => a.Name.Namespace == PtOpenXml.pt)
  3157. .Where(a => a.Name != PtOpenXml.Unid)
  3158. .Remove();
  3159. fnPart.PutXDocument();
  3160. }
  3161. var enPart = wDoc.MainDocumentPart.EndnotesPart;
  3162. if (enPart != null)
  3163. {
  3164. var enXDoc = enPart.GetXDocument();
  3165. enXDoc
  3166. .Root
  3167. .Descendants()
  3168. .Attributes()
  3169. .Where(a => a.Name.Namespace == PtOpenXml.pt)
  3170. .Where(a => a.Name != PtOpenXml.Unid)
  3171. .Remove();
  3172. enPart.PutXDocument();
  3173. }
  3174. }
  3175. private static void AddSha1HashToBlockLevelContent(OpenXmlPart part, XElement contentParent, WmlComparerSettings settings)
  3176. {
  3177. var blockLevelContentToAnnotate = contentParent
  3178. .Descendants()
  3179. .Where(d => ElementsToHaveSha1Hash.Contains(d.Name));
  3180. foreach (var blockLevelContent in blockLevelContentToAnnotate)
  3181. {
  3182. var cloneBlockLevelContentForHashing = (XElement)CloneBlockLevelContentForHashing(part, blockLevelContent, true, settings);
  3183. var shaString = cloneBlockLevelContentForHashing.ToString(SaveOptions.DisableFormatting)
  3184. .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", "");
  3185. var sha1Hash = PtUtils.SHA1HashStringForUTF8String(shaString);
  3186. blockLevelContent.Add(new XAttribute(PtOpenXml.SHA1Hash, sha1Hash));
  3187. if (blockLevelContent.Name == W.tbl ||
  3188. blockLevelContent.Name == W.tr)
  3189. {
  3190. var clonedForStructureHash = (XElement)CloneForStructureHash(cloneBlockLevelContentForHashing);
  3191. // this is a convenient place to look at why tables are being compared as different.
  3192. //if (blockLevelContent.Name == W.tbl)
  3193. // Console.WriteLine();
  3194. var shaString2 = clonedForStructureHash.ToString(SaveOptions.DisableFormatting)
  3195. .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", "");
  3196. var sha1Hash2 = PtUtils.SHA1HashStringForUTF8String(shaString2);
  3197. blockLevelContent.Add(new XAttribute(PtOpenXml.StructureSHA1Hash, sha1Hash2));
  3198. }
  3199. }
  3200. }
  3201. // This strips all text nodes from the XML tree, thereby leaving only the structure.
  3202. private static object CloneForStructureHash(XNode node)
  3203. {
  3204. XElement element = node as XElement;
  3205. if (element != null)
  3206. {
  3207. return new XElement(element.Name,
  3208. element.Attributes(),
  3209. element.Elements().Select(e => CloneForStructureHash(e)));
  3210. }
  3211. return null;
  3212. }
  3213. static XName[] AttributesToTrimWhenCloning = new XName[] {
  3214. WP14.anchorId,
  3215. WP14.editId,
  3216. "ObjectID",
  3217. "ShapeID",
  3218. "id",
  3219. "type",
  3220. };
  3221. private static XElement CloneBlockLevelContentForHashing(OpenXmlPart mainDocumentPart, XNode node, bool includeRelatedParts, WmlComparerSettings settings)
  3222. {
  3223. var rValue = (XElement)CloneBlockLevelContentForHashingInternal(mainDocumentPart, node, includeRelatedParts, settings);
  3224. rValue.DescendantsAndSelf().Attributes().Where(a => a.IsNamespaceDeclaration).Remove();
  3225. return rValue;
  3226. }
  3227. private static object CloneBlockLevelContentForHashingInternal(OpenXmlPart mainDocumentPart, XNode node, bool includeRelatedParts, WmlComparerSettings settings)
  3228. {
  3229. var element = node as XElement;
  3230. if (element != null)
  3231. {
  3232. if (element.Name == W.bookmarkStart ||
  3233. element.Name == W.bookmarkEnd ||
  3234. element.Name == W.pPr ||
  3235. element.Name == W.rPr)
  3236. return null;
  3237. if (element.Name.Namespace == A14.a14)
  3238. return null;
  3239. if (element.Name == W.p)
  3240. {
  3241. var clonedPara = new XElement(element.Name,
  3242. element.Attributes().Where(a => a.Name != W.rsid &&
  3243. a.Name != W.rsidDel &&
  3244. a.Name != W.rsidP &&
  3245. a.Name != W.rsidR &&
  3246. a.Name != W.rsidRDefault &&
  3247. a.Name != W.rsidRPr &&
  3248. a.Name != W.rsidSect &&
  3249. a.Name != W.rsidTr &&
  3250. a.Name.Namespace != PtOpenXml.pt),
  3251. element.Nodes().Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3252. var groupedRuns = clonedPara
  3253. .Elements()
  3254. .GroupAdjacent(e => e.Name == W.r &&
  3255. e.Elements().Count() == 1 &&
  3256. e.Element(W.t) != null);
  3257. var clonedParaWithGroupedRuns = new XElement(element.Name,
  3258. groupedRuns.Select(g =>
  3259. {
  3260. if (g.Key)
  3261. {
  3262. var text = g.Select(t => t.Value).StringConcatenate();
  3263. if (settings.CaseInsensitive)
  3264. text = text.ToUpper(settings.CultureInfo);
  3265. if (settings.ConflateBreakingAndNonbreakingSpaces)
  3266. text = text.Replace(' ', '\x00a0');
  3267. var newRun = (object)new XElement(W.r,
  3268. new XElement(W.t,
  3269. text));
  3270. return newRun;
  3271. }
  3272. return g;
  3273. }));
  3274. return clonedParaWithGroupedRuns;
  3275. }
  3276. if (element.Name == W.r)
  3277. {
  3278. var clonedRuns = element
  3279. .Elements()
  3280. .Where(e => e.Name != W.rPr)
  3281. .Select(rc => new XElement(W.r, CloneBlockLevelContentForHashingInternal(mainDocumentPart, rc, includeRelatedParts, settings)));
  3282. return clonedRuns;
  3283. }
  3284. if (element.Name == W.tbl)
  3285. {
  3286. var clonedTable = new XElement(W.tbl,
  3287. element.Elements(W.tr).Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3288. return clonedTable;
  3289. }
  3290. if (element.Name == W.tr)
  3291. {
  3292. var clonedRow = new XElement(W.tr,
  3293. element.Elements(W.tc).Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3294. return clonedRow;
  3295. }
  3296. if (element.Name == W.tc)
  3297. {
  3298. var clonedCell = new XElement(W.tc,
  3299. element.Elements().Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3300. return clonedCell;
  3301. }
  3302. if (element.Name == W.tcPr)
  3303. {
  3304. var clonedCellProps = new XElement(W.tcPr,
  3305. element.Elements(W.gridSpan).Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3306. return clonedCellProps;
  3307. }
  3308. if (element.Name == W.gridSpan)
  3309. {
  3310. var clonedGridSpan = new XElement(W.gridSpan,
  3311. new XAttribute("val", (string)element.Attribute(W.val)));
  3312. return clonedGridSpan;
  3313. }
  3314. if (element.Name == W.txbxContent)
  3315. {
  3316. var clonedTextbox = new XElement(W.txbxContent,
  3317. element.Elements().Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3318. return clonedTextbox;
  3319. }
  3320. if (includeRelatedParts)
  3321. {
  3322. if (ComparisonUnitWord.s_ElementsWithRelationshipIds.Contains(element.Name))
  3323. {
  3324. var newElement = new XElement(element.Name,
  3325. element.Attributes()
  3326. .Where(a => a.Name.Namespace != PtOpenXml.pt)
  3327. .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name))
  3328. .Select(a =>
  3329. {
  3330. if (!ComparisonUnitWord.s_RelationshipAttributeNames.Contains(a.Name))
  3331. return a;
  3332. var rId = (string)a;
  3333. // could be an hyperlink relationship
  3334. try
  3335. {
  3336. OpenXmlPart oxp = mainDocumentPart.GetPartById(rId);
  3337. if (oxp == null)
  3338. throw new FileFormatException("Invalid WordprocessingML Document");
  3339. var anno = oxp.Annotation<PartSHA1HashAnnotation>();
  3340. if (anno != null)
  3341. return new XAttribute(a.Name, anno.Hash);
  3342. if (!oxp.ContentType.EndsWith("xml"))
  3343. {
  3344. using (var str = oxp.GetStream())
  3345. {
  3346. byte[] ba;
  3347. using (BinaryReader br = new BinaryReader(str))
  3348. {
  3349. ba = br.ReadBytes((int)str.Length);
  3350. }
  3351. var sha1 = PtUtils.SHA1HashStringForByteArray(ba);
  3352. oxp.AddAnnotation(new PartSHA1HashAnnotation(sha1));
  3353. return new XAttribute(a.Name, sha1);
  3354. }
  3355. }
  3356. }
  3357. catch (ArgumentOutOfRangeException)
  3358. {
  3359. HyperlinkRelationship hr = mainDocumentPart.HyperlinkRelationships.FirstOrDefault(z => z.Id == rId);
  3360. if (hr != null)
  3361. {
  3362. var str = hr.Uri.ToString();
  3363. return new XAttribute(a.Name, str);
  3364. }
  3365. // could be an external relationship
  3366. ExternalRelationship er = mainDocumentPart.ExternalRelationships.FirstOrDefault(z => z.Id == rId);
  3367. if (er != null)
  3368. {
  3369. var str = er.Uri.ToString();
  3370. return new XAttribute(a.Name, str);
  3371. }
  3372. return new XAttribute(a.Name, "NULL Relationship");
  3373. }
  3374. return null;
  3375. }),
  3376. element.Nodes().Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3377. return newElement;
  3378. }
  3379. }
  3380. if (element.Name == VML.shape)
  3381. {
  3382. return new XElement(element.Name,
  3383. element.Attributes()
  3384. .Where(a => a.Name.Namespace != PtOpenXml.pt)
  3385. .Where(a => a.Name != "style" && a.Name != "id" && a.Name != "type"),
  3386. element.Nodes().Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3387. }
  3388. if (element.Name == O.OLEObject)
  3389. {
  3390. var o = new XElement(element.Name,
  3391. element.Attributes()
  3392. .Where(a => a.Name.Namespace != PtOpenXml.pt)
  3393. .Where(a => a.Name != "ObjectID" && a.Name != R.id),
  3394. element.Nodes().Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3395. return o;
  3396. }
  3397. if (element.Name == W._object)
  3398. {
  3399. var o = new XElement(element.Name,
  3400. element.Attributes()
  3401. .Where(a => a.Name.Namespace != PtOpenXml.pt),
  3402. element.Nodes().Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3403. return o;
  3404. }
  3405. if (element.Name == WP.docPr)
  3406. {
  3407. return new XElement(element.Name,
  3408. element.Attributes()
  3409. .Where(a => a.Name.Namespace != PtOpenXml.pt && a.Name != "id"),
  3410. element.Nodes().Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3411. }
  3412. if (element.Name == W.footnoteReference || element.Name == W.endnoteReference)
  3413. {
  3414. return new XElement(element.Name,
  3415. element.Attributes()
  3416. .Where(a => a.Name.Namespace != PtOpenXml.pt && a.Name != W.id),
  3417. element.Nodes().Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3418. }
  3419. return new XElement(element.Name,
  3420. element.Attributes()
  3421. .Where(a => a.Name.Namespace != PtOpenXml.pt)
  3422. .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name)),
  3423. element.Nodes().Select(n => CloneBlockLevelContentForHashingInternal(mainDocumentPart, n, includeRelatedParts, settings)));
  3424. }
  3425. if (settings.CaseInsensitive || settings.ConflateBreakingAndNonbreakingSpaces)
  3426. {
  3427. var xt = node as XText;
  3428. if (xt != null)
  3429. {
  3430. var text = xt.Value;
  3431. if (settings.CaseInsensitive)
  3432. text = text.ToUpper(settings.CultureInfo);
  3433. if (settings.ConflateBreakingAndNonbreakingSpaces)
  3434. text = text.Replace(' ', '\x00a0');
  3435. return new XText(text);
  3436. }
  3437. }
  3438. return node;
  3439. }
  3440. private static List<CorrelatedSequence> FindCommonAtBeginningAndEnd(CorrelatedSequence unknown, WmlComparerSettings settings)
  3441. {
  3442. int lengthToCompare = Math.Min(unknown.ComparisonUnitArray1.Length, unknown.ComparisonUnitArray2.Length);
  3443. var countCommonAtBeginning = unknown
  3444. .ComparisonUnitArray1
  3445. .Take(lengthToCompare)
  3446. .Zip(unknown.ComparisonUnitArray2,
  3447. (pu1, pu2) =>
  3448. {
  3449. return new
  3450. {
  3451. Pu1 = pu1,
  3452. Pu2 = pu2,
  3453. };
  3454. })
  3455. .TakeWhile(pair => pair.Pu1.SHA1Hash == pair.Pu2.SHA1Hash)
  3456. .Count();
  3457. if (countCommonAtBeginning != 0 && ((double)countCommonAtBeginning / (double)lengthToCompare) < settings.DetailThreshold)
  3458. countCommonAtBeginning = 0;
  3459. if (countCommonAtBeginning != 0)
  3460. {
  3461. var newSequence = new List<CorrelatedSequence>();
  3462. CorrelatedSequence csEqual = new CorrelatedSequence();
  3463. csEqual.CorrelationStatus = CorrelationStatus.Equal;
  3464. csEqual.ComparisonUnitArray1 = unknown
  3465. .ComparisonUnitArray1
  3466. .Take(countCommonAtBeginning)
  3467. .ToArray();
  3468. csEqual.ComparisonUnitArray2 = unknown
  3469. .ComparisonUnitArray2
  3470. .Take(countCommonAtBeginning)
  3471. .ToArray();
  3472. newSequence.Add(csEqual);
  3473. var remainingLeft = unknown.ComparisonUnitArray1.Length - countCommonAtBeginning;
  3474. var remainingRight = unknown.ComparisonUnitArray2.Length - countCommonAtBeginning;
  3475. if (remainingLeft != 0 && remainingRight == 0)
  3476. {
  3477. CorrelatedSequence csDeleted = new CorrelatedSequence();
  3478. csDeleted.CorrelationStatus = CorrelationStatus.Deleted;
  3479. csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(countCommonAtBeginning).ToArray();
  3480. csDeleted.ComparisonUnitArray2 = null;
  3481. newSequence.Add(csDeleted);
  3482. }
  3483. else if (remainingLeft == 0 && remainingRight != 0)
  3484. {
  3485. CorrelatedSequence csInserted = new CorrelatedSequence();
  3486. csInserted.CorrelationStatus = CorrelationStatus.Inserted;
  3487. csInserted.ComparisonUnitArray1 = null;
  3488. csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(countCommonAtBeginning).ToArray();
  3489. newSequence.Add(csInserted);
  3490. }
  3491. else if (remainingLeft != 0 && remainingRight != 0)
  3492. {
  3493. var first1 = unknown.ComparisonUnitArray1[0] as ComparisonUnitWord;
  3494. var first2 = unknown.ComparisonUnitArray2[0] as ComparisonUnitWord;
  3495. if (first1 != null && first2 != null)
  3496. {
  3497. // if operating at the word level and
  3498. // if the last word on the left != pPr && last word on right != pPr
  3499. // then create an unknown for the rest of the paragraph, and create an unknown for the rest of the unknown
  3500. // if the last word on the left != pPr and last word on right == pPr
  3501. // then create deleted for the left, and create an unknown for the rest of the unknown
  3502. // if the last word on the left == pPr and last word on right != pPr
  3503. // then create inserted for the right, and create an unknown for the rest of the unknown
  3504. // if the last word on the left == pPr and last word on right == pPr
  3505. // then create an unknown for the rest of the unknown
  3506. var remainingInLeft = unknown
  3507. .ComparisonUnitArray1
  3508. .Skip(countCommonAtBeginning)
  3509. .ToArray();
  3510. var remainingInRight = unknown
  3511. .ComparisonUnitArray2
  3512. .Skip(countCommonAtBeginning)
  3513. .ToArray();
  3514. var lastContentAtomLeft = unknown.ComparisonUnitArray1[countCommonAtBeginning - 1].DescendantContentAtoms().FirstOrDefault();
  3515. var lastContentAtomRight = unknown.ComparisonUnitArray2[countCommonAtBeginning - 1].DescendantContentAtoms().FirstOrDefault();
  3516. if (lastContentAtomLeft.ContentElement.Name != W.pPr && lastContentAtomRight.ContentElement.Name != W.pPr)
  3517. {
  3518. var split1 = SplitAtParagraphMark(remainingInLeft);
  3519. var split2 = SplitAtParagraphMark(remainingInRight);
  3520. if (split1.Count() == 1 && split2.Count() == 1)
  3521. {
  3522. CorrelatedSequence csUnknown2 = new CorrelatedSequence();
  3523. csUnknown2.CorrelationStatus = CorrelationStatus.Unknown;
  3524. csUnknown2.ComparisonUnitArray1 = split1.First();
  3525. csUnknown2.ComparisonUnitArray2 = split2.First();
  3526. newSequence.Add(csUnknown2);
  3527. return newSequence;
  3528. }
  3529. else if (split1.Count == 2 && split2.Count == 2)
  3530. {
  3531. CorrelatedSequence csUnknown2 = new CorrelatedSequence();
  3532. csUnknown2.CorrelationStatus = CorrelationStatus.Unknown;
  3533. csUnknown2.ComparisonUnitArray1 = split1.First();
  3534. csUnknown2.ComparisonUnitArray2 = split2.First();
  3535. newSequence.Add(csUnknown2);
  3536. CorrelatedSequence csUnknown3 = new CorrelatedSequence();
  3537. csUnknown3.CorrelationStatus = CorrelationStatus.Unknown;
  3538. csUnknown3.ComparisonUnitArray1 = split1.Skip(1).First();
  3539. csUnknown3.ComparisonUnitArray2 = split2.Skip(1).First();
  3540. newSequence.Add(csUnknown3);
  3541. return newSequence;
  3542. }
  3543. }
  3544. }
  3545. CorrelatedSequence csUnknown = new CorrelatedSequence();
  3546. csUnknown.CorrelationStatus = CorrelationStatus.Unknown;
  3547. csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(countCommonAtBeginning).ToArray();
  3548. csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(countCommonAtBeginning).ToArray();
  3549. newSequence.Add(csUnknown);
  3550. }
  3551. else if (remainingLeft == 0 && remainingRight == 0)
  3552. {
  3553. // nothing to do
  3554. }
  3555. return newSequence;
  3556. }
  3557. // if we get to here, then countCommonAtBeginning == 0
  3558. var countCommonAtEnd = unknown
  3559. .ComparisonUnitArray1
  3560. .Reverse()
  3561. .Take(lengthToCompare)
  3562. .Zip(unknown
  3563. .ComparisonUnitArray2
  3564. .Reverse()
  3565. .Take(lengthToCompare),
  3566. (pu1, pu2) =>
  3567. {
  3568. return new
  3569. {
  3570. Pu1 = pu1,
  3571. Pu2 = pu2,
  3572. };
  3573. })
  3574. .TakeWhile(pair => pair.Pu1.SHA1Hash == pair.Pu2.SHA1Hash)
  3575. .Count();
  3576. // never start a common section with a paragraph mark. However, it is OK to set two paragraph marks as equal.
  3577. while (true)
  3578. {
  3579. if (countCommonAtEnd <= 1)
  3580. break;
  3581. var firstCommon = unknown
  3582. .ComparisonUnitArray1
  3583. .Reverse()
  3584. .Take(countCommonAtEnd)
  3585. .LastOrDefault();
  3586. var firstCommonWord = firstCommon as ComparisonUnitWord;
  3587. if (firstCommonWord == null)
  3588. break;
  3589. // if the word contains more than one atom, then not a paragraph mark
  3590. if (firstCommonWord.Contents.Count() != 1)
  3591. break;
  3592. var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom;
  3593. if (firstCommonAtom == null)
  3594. break;
  3595. if (firstCommonAtom.ContentElement.Name != W.pPr)
  3596. break;
  3597. countCommonAtEnd--;
  3598. }
  3599. bool isOnlyParagraphMark = false;
  3600. if (countCommonAtEnd == 1)
  3601. {
  3602. var firstCommon = unknown
  3603. .ComparisonUnitArray1
  3604. .Reverse()
  3605. .Take(countCommonAtEnd)
  3606. .LastOrDefault();
  3607. var firstCommonWord = firstCommon as ComparisonUnitWord;
  3608. if (firstCommonWord != null)
  3609. {
  3610. // if the word contains more than one atom, then not a paragraph mark
  3611. if (firstCommonWord.Contents.Count() == 1)
  3612. {
  3613. var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom;
  3614. if (firstCommonAtom != null)
  3615. {
  3616. if (firstCommonAtom.ContentElement.Name == W.pPr)
  3617. isOnlyParagraphMark = true;
  3618. }
  3619. }
  3620. }
  3621. }
  3622. if (countCommonAtEnd == 2)
  3623. {
  3624. var firstCommon = unknown
  3625. .ComparisonUnitArray1
  3626. .Reverse()
  3627. .Take(countCommonAtEnd)
  3628. .LastOrDefault();
  3629. var secondCommon = unknown
  3630. .ComparisonUnitArray1
  3631. .Reverse()
  3632. .Take(countCommonAtEnd)
  3633. .FirstOrDefault();
  3634. var firstCommonWord = firstCommon as ComparisonUnitWord;
  3635. var secondCommonWord = secondCommon as ComparisonUnitWord;
  3636. if (firstCommonWord != null && secondCommonWord != null)
  3637. {
  3638. // if the word contains more than one atom, then not a paragraph mark
  3639. if (firstCommonWord.Contents.Count() == 1 && secondCommonWord.Contents.Count() == 1)
  3640. {
  3641. var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom;
  3642. var secondCommonAtom = secondCommonWord.Contents.First() as ComparisonUnitAtom;
  3643. if (firstCommonAtom != null && secondCommonAtom != null)
  3644. {
  3645. if (secondCommonAtom.ContentElement.Name == W.pPr)
  3646. isOnlyParagraphMark = true;
  3647. }
  3648. }
  3649. }
  3650. }
  3651. if (!isOnlyParagraphMark && countCommonAtEnd != 0 && ((double)countCommonAtEnd / (double)lengthToCompare) < settings.DetailThreshold)
  3652. countCommonAtEnd = 0;
  3653. // If the following test is not there, the test below sets the end paragraph mark of the entire document equal to the end paragraph
  3654. // mark of the first paragraph in the other document, causing lines to be out of order.
  3655. // [InlineData("WC010-Para-Before-Table-Unmodified.docx", "WC010-Para-Before-Table-Mod.docx", 3)]
  3656. if (isOnlyParagraphMark)
  3657. countCommonAtEnd = 0;
  3658. if (countCommonAtEnd == 0)
  3659. return null;
  3660. // if countCommonAtEnd != 0, and if it contains a paragraph mark, then if there are comparison units in the same paragraph before the common at end (in either version)
  3661. // then we want to put all of those comparison units into a single unknown, where they must be resolved against each other. We don't want those comparison units to go into the middle unknown comparison unit.
  3662. if (countCommonAtEnd != 0)
  3663. {
  3664. int remainingInLeftParagraph = 0;
  3665. int remainingInRightParagraph = 0;
  3666. var commonEndSeq = unknown
  3667. .ComparisonUnitArray1
  3668. .Reverse()
  3669. .Take(countCommonAtEnd)
  3670. .Reverse()
  3671. .ToList();
  3672. var firstOfCommonEndSeq = commonEndSeq.First();
  3673. if (firstOfCommonEndSeq is ComparisonUnitWord)
  3674. {
  3675. // are there any paragraph marks in the common seq at end?
  3676. //if (commonEndSeq.Any(cu => cu.Contents.OfType<ComparisonUnitAtom>().First().ContentElement.Name == W.pPr))
  3677. if (commonEndSeq.Any(cu =>
  3678. {
  3679. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  3680. if (firstComparisonUnitAtom == null)
  3681. return false;
  3682. return firstComparisonUnitAtom.ContentElement.Name == W.pPr;
  3683. }))
  3684. {
  3685. remainingInLeftParagraph = unknown
  3686. .ComparisonUnitArray1
  3687. .Reverse()
  3688. .Skip(countCommonAtEnd)
  3689. .TakeWhile(cu =>
  3690. {
  3691. if (!(cu is ComparisonUnitWord))
  3692. return false;
  3693. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  3694. if (firstComparisonUnitAtom == null)
  3695. return true;
  3696. return firstComparisonUnitAtom.ContentElement.Name != W.pPr;
  3697. })
  3698. .Count();
  3699. remainingInRightParagraph = unknown
  3700. .ComparisonUnitArray2
  3701. .Reverse()
  3702. .Skip(countCommonAtEnd)
  3703. .TakeWhile(cu =>
  3704. {
  3705. if (!(cu is ComparisonUnitWord))
  3706. return false;
  3707. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  3708. if (firstComparisonUnitAtom == null)
  3709. return true;
  3710. return firstComparisonUnitAtom.ContentElement.Name != W.pPr;
  3711. })
  3712. .Count();
  3713. }
  3714. }
  3715. var newSequence = new List<CorrelatedSequence>();
  3716. int beforeCommonParagraphLeft = unknown.ComparisonUnitArray1.Length - remainingInLeftParagraph - countCommonAtEnd;
  3717. int beforeCommonParagraphRight = unknown.ComparisonUnitArray2.Length - remainingInRightParagraph - countCommonAtEnd;
  3718. if (beforeCommonParagraphLeft != 0 && beforeCommonParagraphRight == 0)
  3719. {
  3720. CorrelatedSequence csDeleted = new CorrelatedSequence();
  3721. csDeleted.CorrelationStatus = CorrelationStatus.Deleted;
  3722. csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Take(beforeCommonParagraphLeft).ToArray();
  3723. csDeleted.ComparisonUnitArray2 = null;
  3724. newSequence.Add(csDeleted);
  3725. }
  3726. else if (beforeCommonParagraphLeft == 0 && beforeCommonParagraphRight != 0)
  3727. {
  3728. CorrelatedSequence csInserted = new CorrelatedSequence();
  3729. csInserted.CorrelationStatus = CorrelationStatus.Inserted;
  3730. csInserted.ComparisonUnitArray1 = null;
  3731. csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Take(beforeCommonParagraphRight).ToArray();
  3732. newSequence.Add(csInserted);
  3733. }
  3734. else if (beforeCommonParagraphLeft != 0 && beforeCommonParagraphRight != 0)
  3735. {
  3736. CorrelatedSequence csUnknown = new CorrelatedSequence();
  3737. csUnknown.CorrelationStatus = CorrelationStatus.Unknown;
  3738. csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Take(beforeCommonParagraphLeft).ToArray();
  3739. csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Take(beforeCommonParagraphRight).ToArray();
  3740. newSequence.Add(csUnknown);
  3741. }
  3742. else if (beforeCommonParagraphLeft == 0 && beforeCommonParagraphRight == 0)
  3743. {
  3744. // nothing to do
  3745. }
  3746. if (remainingInLeftParagraph != 0 && remainingInRightParagraph == 0)
  3747. {
  3748. CorrelatedSequence csDeleted = new CorrelatedSequence();
  3749. csDeleted.CorrelationStatus = CorrelationStatus.Deleted;
  3750. csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(beforeCommonParagraphLeft).Take(remainingInLeftParagraph).ToArray();
  3751. csDeleted.ComparisonUnitArray2 = null;
  3752. newSequence.Add(csDeleted);
  3753. }
  3754. else if (remainingInLeftParagraph == 0 && remainingInRightParagraph != 0)
  3755. {
  3756. CorrelatedSequence csInserted = new CorrelatedSequence();
  3757. csInserted.CorrelationStatus = CorrelationStatus.Inserted;
  3758. csInserted.ComparisonUnitArray1 = null;
  3759. csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(beforeCommonParagraphRight).Take(remainingInRightParagraph).ToArray();
  3760. newSequence.Add(csInserted);
  3761. }
  3762. else if (remainingInLeftParagraph != 0 && remainingInRightParagraph != 0)
  3763. {
  3764. CorrelatedSequence csUnknown = new CorrelatedSequence();
  3765. csUnknown.CorrelationStatus = CorrelationStatus.Unknown;
  3766. csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(beforeCommonParagraphLeft).Take(remainingInLeftParagraph).ToArray();
  3767. csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(beforeCommonParagraphRight).Take(remainingInRightParagraph).ToArray();
  3768. newSequence.Add(csUnknown);
  3769. }
  3770. else if (remainingInLeftParagraph == 0 && remainingInRightParagraph == 0)
  3771. {
  3772. // nothing to do
  3773. }
  3774. CorrelatedSequence csEqual = new CorrelatedSequence();
  3775. csEqual.CorrelationStatus = CorrelationStatus.Equal;
  3776. csEqual.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(unknown.ComparisonUnitArray1.Length - countCommonAtEnd).ToArray();
  3777. csEqual.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(unknown.ComparisonUnitArray2.Length - countCommonAtEnd).ToArray();
  3778. newSequence.Add(csEqual);
  3779. return newSequence;
  3780. }
  3781. return null;
  3782. #if false
  3783. var middleLeft = unknown
  3784. .ComparisonUnitArray1
  3785. .Skip(countCommonAtBeginning)
  3786. .SkipLast(remainingInLeftParagraph)
  3787. .SkipLast(countCommonAtEnd)
  3788. .ToArray();
  3789. var middleRight = unknown
  3790. .ComparisonUnitArray2
  3791. .Skip(countCommonAtBeginning)
  3792. .SkipLast(remainingInRightParagraph)
  3793. .SkipLast(countCommonAtEnd)
  3794. .ToArray();
  3795. if (middleLeft.Length > 0 && middleRight.Length == 0)
  3796. {
  3797. CorrelatedSequence cs = new CorrelatedSequence();
  3798. cs.CorrelationStatus = CorrelationStatus.Deleted;
  3799. cs.ComparisonUnitArray1 = middleLeft;
  3800. cs.ComparisonUnitArray2 = null;
  3801. newSequence.Add(cs);
  3802. }
  3803. else if (middleLeft.Length == 0 && middleRight.Length > 0)
  3804. {
  3805. CorrelatedSequence cs = new CorrelatedSequence();
  3806. cs.CorrelationStatus = CorrelationStatus.Inserted;
  3807. cs.ComparisonUnitArray1 = null;
  3808. cs.ComparisonUnitArray2 = middleRight;
  3809. newSequence.Add(cs);
  3810. }
  3811. else if (middleLeft.Length > 0 && middleRight.Length > 0)
  3812. {
  3813. CorrelatedSequence cs = new CorrelatedSequence();
  3814. cs.CorrelationStatus = CorrelationStatus.Unknown;
  3815. cs.ComparisonUnitArray1 = middleLeft;
  3816. cs.ComparisonUnitArray2 = middleRight;
  3817. newSequence.Add(cs);
  3818. }
  3819. var remainingInParaLeft = unknown
  3820. .ComparisonUnitArray1
  3821. .Skip(countCommonAtBeginning)
  3822. .Skip(middleLeft.Length)
  3823. .Take(remainingInLeftParagraph)
  3824. .ToArray();
  3825. var remainingInParaRight = unknown
  3826. .ComparisonUnitArray2
  3827. .Skip(countCommonAtBeginning)
  3828. .Skip(middleRight.Length)
  3829. .Take(remainingInRightParagraph)
  3830. .ToArray();
  3831. if (remainingInParaLeft.Length > 0 && remainingInParaRight.Length == 0)
  3832. {
  3833. CorrelatedSequence cs = new CorrelatedSequence();
  3834. cs.CorrelationStatus = CorrelationStatus.Deleted;
  3835. cs.ComparisonUnitArray1 = remainingInParaLeft;
  3836. cs.ComparisonUnitArray2 = null;
  3837. newSequence.Add(cs);
  3838. }
  3839. else if (remainingInParaLeft.Length == 0 && remainingInParaRight.Length > 0)
  3840. {
  3841. CorrelatedSequence cs = new CorrelatedSequence();
  3842. cs.CorrelationStatus = CorrelationStatus.Inserted;
  3843. cs.ComparisonUnitArray1 = null;
  3844. cs.ComparisonUnitArray2 = remainingInParaRight;
  3845. newSequence.Add(cs);
  3846. }
  3847. else if (remainingInParaLeft.Length > 0 && remainingInParaRight.Length > 0)
  3848. {
  3849. CorrelatedSequence cs = new CorrelatedSequence();
  3850. cs.CorrelationStatus = CorrelationStatus.Unknown;
  3851. cs.ComparisonUnitArray1 = remainingInParaLeft;
  3852. cs.ComparisonUnitArray2 = remainingInParaRight;
  3853. newSequence.Add(cs);
  3854. }
  3855. if (countCommonAtEnd != 0)
  3856. {
  3857. CorrelatedSequence cs = new CorrelatedSequence();
  3858. cs.CorrelationStatus = CorrelationStatus.Equal;
  3859. cs.ComparisonUnitArray1 = unknown
  3860. .ComparisonUnitArray1
  3861. .Skip(countCommonAtBeginning + middleLeft.Length + remainingInParaLeft.Length)
  3862. .ToArray();
  3863. cs.ComparisonUnitArray2 = unknown
  3864. .ComparisonUnitArray2
  3865. .Skip(countCommonAtBeginning + middleRight.Length + remainingInParaRight.Length)
  3866. .ToArray();
  3867. if (cs.ComparisonUnitArray1.Length != cs.ComparisonUnitArray2.Length)
  3868. throw new OpenXmlPowerToolsException("Internal error");
  3869. newSequence.Add(cs);
  3870. }
  3871. return newSequence;
  3872. #endif
  3873. }
  3874. private static List<ComparisonUnit[]> SplitAtParagraphMark(ComparisonUnit[] cua)
  3875. {
  3876. int i;
  3877. for (i = 0; i < cua.Length; i++)
  3878. {
  3879. var atom = cua[i].DescendantContentAtoms().FirstOrDefault();
  3880. if (atom != null && atom.ContentElement.Name == W.pPr)
  3881. break;
  3882. }
  3883. if (i == cua.Length)
  3884. {
  3885. return new List<ComparisonUnit[]>()
  3886. {
  3887. cua
  3888. };
  3889. }
  3890. return new List<ComparisonUnit[]>()
  3891. {
  3892. cua.Take(i).ToArray(),
  3893. cua.Skip(i).ToArray(),
  3894. };
  3895. }
  3896. private static void MoveLastSectPrToChildOfBody(XDocument newXDoc)
  3897. {
  3898. var lastParaWithSectPr = newXDoc
  3899. .Root
  3900. .Elements(W.body)
  3901. .Elements(W.p)
  3902. .Where(p => p.Elements(W.pPr).Elements(W.sectPr).Any())
  3903. .LastOrDefault();
  3904. if (lastParaWithSectPr != null)
  3905. {
  3906. newXDoc.Root.Element(W.body).Add(lastParaWithSectPr.Elements(W.pPr).Elements(W.sectPr));
  3907. lastParaWithSectPr.Elements(W.pPr).Elements(W.sectPr).Remove();
  3908. }
  3909. }
  3910. private static int s_MaxId = 0;
  3911. private static object ProduceNewWmlMarkupFromCorrelatedSequence(OpenXmlPart part,
  3912. IEnumerable<ComparisonUnitAtom> comparisonUnitAtomList,
  3913. WmlComparerSettings settings)
  3914. {
  3915. // fabricate new MainDocumentPart from correlatedSequence
  3916. s_MaxId = 0;
  3917. var newBodyChildren = CoalesceRecurse(part, comparisonUnitAtomList, 0, settings);
  3918. return newBodyChildren;
  3919. }
  3920. private static void FixUpDocPrIds(WordprocessingDocument wDoc)
  3921. {
  3922. var elementToFind = WP.docPr;
  3923. var docPrToChange = wDoc
  3924. .ContentParts()
  3925. .Select(cp => cp.GetXDocument())
  3926. .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind))
  3927. .SelectMany(m => m);
  3928. var nextId = 1;
  3929. foreach (var item in docPrToChange)
  3930. {
  3931. var idAtt = item.Attribute("id");
  3932. if (idAtt != null)
  3933. idAtt.Value = (nextId++).ToString();
  3934. }
  3935. foreach (var cp in wDoc.ContentParts())
  3936. cp.PutXDocument();
  3937. }
  3938. private static void FixUpRevMarkIds(WordprocessingDocument wDoc)
  3939. {
  3940. var revMarksToChange = wDoc
  3941. .ContentParts()
  3942. .Select(cp => cp.GetXDocument())
  3943. .Select(xd => xd.Descendants().Where(d => d.Name == W.ins || d.Name == W.del))
  3944. .SelectMany(m => m);
  3945. var nextId = 0;
  3946. foreach (var item in revMarksToChange)
  3947. {
  3948. var idAtt = item.Attribute(W.id);
  3949. if (idAtt != null)
  3950. idAtt.Value = (nextId++).ToString();
  3951. }
  3952. foreach (var cp in wDoc.ContentParts())
  3953. cp.PutXDocument();
  3954. }
  3955. private static void FixUpShapeIds(WordprocessingDocument wDoc)
  3956. {
  3957. var elementToFind = VML.shape;
  3958. var shapeIdsToChange = wDoc
  3959. .ContentParts()
  3960. .Select(cp => cp.GetXDocument())
  3961. .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind))
  3962. .SelectMany(m => m);
  3963. var nextId = 1;
  3964. foreach (var item in shapeIdsToChange)
  3965. {
  3966. var thisId = nextId++;
  3967. var idAtt = item.Attribute("id");
  3968. if (idAtt != null)
  3969. idAtt.Value = thisId.ToString();
  3970. var oleObject = item.Parent.Element(O.OLEObject);
  3971. if (oleObject != null)
  3972. {
  3973. var shapeIdAtt = oleObject.Attribute("ShapeID");
  3974. if (shapeIdAtt != null)
  3975. shapeIdAtt.Value = thisId.ToString();
  3976. }
  3977. }
  3978. foreach (var cp in wDoc.ContentParts())
  3979. cp.PutXDocument();
  3980. }
  3981. private static void FixUpGroupIds(WordprocessingDocument wDoc)
  3982. {
  3983. var elementToFind = VML.group;
  3984. var groupIdsToChange = wDoc
  3985. .ContentParts()
  3986. .Select(cp => cp.GetXDocument())
  3987. .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind))
  3988. .SelectMany(m => m);
  3989. var nextId = 1;
  3990. foreach (var item in groupIdsToChange)
  3991. {
  3992. var thisId = nextId++;
  3993. var idAtt = item.Attribute("id");
  3994. if (idAtt != null)
  3995. idAtt.Value = thisId.ToString();
  3996. }
  3997. foreach (var cp in wDoc.ContentParts())
  3998. cp.PutXDocument();
  3999. }
  4000. private static void FixUpShapeTypeIds(WordprocessingDocument wDoc)
  4001. {
  4002. var elementToFind = VML.shapetype;
  4003. var shapeTypeIdsToChange = wDoc
  4004. .ContentParts()
  4005. .Select(cp => cp.GetXDocument())
  4006. .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind))
  4007. .SelectMany(m => m);
  4008. var nextId = 1;
  4009. foreach (var item in shapeTypeIdsToChange)
  4010. {
  4011. var thisId = nextId++;
  4012. var idAtt = item.Attribute("id");
  4013. if (idAtt != null)
  4014. idAtt.Value = thisId.ToString();
  4015. var shape = item.Parent.Element(VML.shape);
  4016. if (shape != null)
  4017. {
  4018. var typeAtt = shape.Attribute("type");
  4019. if (typeAtt != null)
  4020. typeAtt.Value = thisId.ToString();
  4021. }
  4022. }
  4023. foreach (var cp in wDoc.ContentParts())
  4024. cp.PutXDocument();
  4025. }
  4026. private static object CoalesceRecurse(OpenXmlPart part, IEnumerable<ComparisonUnitAtom> list, int level, WmlComparerSettings settings)
  4027. {
  4028. var grouped = list.GroupBy(ca =>
  4029. {
  4030. if (level >= ca.AncestorElements.Length)
  4031. return "";
  4032. return ca.AncestorUnids[level];
  4033. })
  4034. .Where(g => g.Key != "");
  4035. // if there are no deeper children, then we're done.
  4036. if (!grouped.Any())
  4037. return null;
  4038. if (s_False)
  4039. {
  4040. var sb = new StringBuilder();
  4041. foreach (var group in grouped)
  4042. {
  4043. sb.AppendFormat("Group Key: {0}", group.Key);
  4044. sb.Append(Environment.NewLine);
  4045. foreach (var groupChildItem in group)
  4046. {
  4047. sb.Append(" ");
  4048. sb.Append(groupChildItem.ToString(0));
  4049. sb.Append(Environment.NewLine);
  4050. }
  4051. sb.Append(Environment.NewLine);
  4052. }
  4053. var sbs = sb.ToString();
  4054. DocxComparerUtil.NotePad(sbs);
  4055. }
  4056. var elementList = grouped
  4057. .Select(g =>
  4058. {
  4059. var ancestorBeingConstructed = g.First().AncestorElements[level]; // these will all be the same, by definition
  4060. // need to group by corr stat
  4061. var groupedChildren = g
  4062. .GroupAdjacent(gc =>
  4063. {
  4064. var key = "";
  4065. if (level < (gc.AncestorElements.Length - 1))
  4066. {
  4067. key = gc.AncestorUnids[level + 1];
  4068. }
  4069. if (gc.AncestorElements.Skip(level).Any(ae => ae.Name == W.txbxContent))
  4070. key += "|" + CorrelationStatus.Equal.ToString();
  4071. else
  4072. key += "|" + gc.CorrelationStatus.ToString();
  4073. return key;
  4074. })
  4075. .ToList();
  4076. if (ancestorBeingConstructed.Name == W.p)
  4077. {
  4078. var newChildElements = groupedChildren
  4079. .Select(gc =>
  4080. {
  4081. var spl = gc.Key.Split('|');
  4082. if (spl[0] == "")
  4083. return (object)gc.Select(gcc =>
  4084. {
  4085. var dup = new XElement(gcc.ContentElement);
  4086. if (spl[1] == "Deleted")
  4087. dup.Add(new XAttribute(PtOpenXml.Status, "Deleted"));
  4088. else if (spl[1] == "Inserted")
  4089. dup.Add(new XAttribute(PtOpenXml.Status, "Inserted"));
  4090. return dup;
  4091. });
  4092. else
  4093. {
  4094. return CoalesceRecurse(part, gc, level + 1, settings);
  4095. }
  4096. })
  4097. .ToList();
  4098. var newPara = new XElement(W.p,
  4099. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  4100. new XAttribute(PtOpenXml.Unid, g.Key),
  4101. newChildElements);
  4102. return newPara;
  4103. }
  4104. if (ancestorBeingConstructed.Name == W.r)
  4105. {
  4106. var newChildElements = groupedChildren
  4107. .Select(gc =>
  4108. {
  4109. var spl = gc.Key.Split('|');
  4110. if (spl[0] == "")
  4111. return (object)gc.Select(gcc =>
  4112. {
  4113. var dup = new XElement(gcc.ContentElement);
  4114. if (spl[1] == "Deleted")
  4115. dup.Add(new XAttribute(PtOpenXml.Status, "Deleted"));
  4116. else if (spl[1] == "Inserted")
  4117. dup.Add(new XAttribute(PtOpenXml.Status, "Inserted"));
  4118. return dup;
  4119. });
  4120. else
  4121. {
  4122. return CoalesceRecurse(part, gc, level + 1, settings);
  4123. }
  4124. })
  4125. .ToList();
  4126. XElement rPr = ancestorBeingConstructed.Element(W.rPr);
  4127. var newRun = new XElement(W.r,
  4128. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  4129. rPr,
  4130. newChildElements);
  4131. return newRun;
  4132. }
  4133. if (ancestorBeingConstructed.Name == W.t)
  4134. {
  4135. var newChildElements = groupedChildren
  4136. .Select(gc =>
  4137. {
  4138. var textOfTextElement = gc.Select(gce => gce.ContentElement.Value).StringConcatenate();
  4139. var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  4140. var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  4141. if (del)
  4142. return (object)(new XElement(W.delText,
  4143. new XAttribute(PtOpenXml.Status, "Deleted"),
  4144. GetXmlSpaceAttribute(textOfTextElement),
  4145. textOfTextElement));
  4146. else if (ins)
  4147. return (object)(new XElement(W.t,
  4148. new XAttribute(PtOpenXml.Status, "Inserted"),
  4149. GetXmlSpaceAttribute(textOfTextElement),
  4150. textOfTextElement));
  4151. else
  4152. return (object)(new XElement(W.t,
  4153. GetXmlSpaceAttribute(textOfTextElement),
  4154. textOfTextElement));
  4155. })
  4156. .ToList();
  4157. return newChildElements;
  4158. }
  4159. if (ancestorBeingConstructed.Name == W.drawing)
  4160. {
  4161. var newChildElements = groupedChildren
  4162. .Select(gc =>
  4163. {
  4164. var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  4165. var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  4166. if (del)
  4167. {
  4168. return (object)gc.Select(gcc =>
  4169. {
  4170. var newDrawing = new XElement(gcc.ContentElement);
  4171. newDrawing.Add(new XAttribute(PtOpenXml.Status, "Deleted"));
  4172. var openXmlPartOfDeletedContent = gc.First().Part;
  4173. var openXmlPartInNewDocument = part;
  4174. return gc.Select(gce =>
  4175. {
  4176. Package packageOfDeletedContent = openXmlPartOfDeletedContent.OpenXmlPackage.Package;
  4177. Package packageOfNewContent = openXmlPartInNewDocument.OpenXmlPackage.Package;
  4178. PackagePart partInDeletedDocument = packageOfDeletedContent.GetPart(part.Uri);
  4179. PackagePart partInNewDocument = packageOfNewContent.GetPart(part.Uri);
  4180. return MoveRelatedPartsToDestination(partInDeletedDocument, partInNewDocument, newDrawing);
  4181. });
  4182. });
  4183. }
  4184. else if (ins)
  4185. {
  4186. return gc.Select(gcc =>
  4187. {
  4188. var newDrawing = new XElement(gcc.ContentElement);
  4189. newDrawing.Add(new XAttribute(PtOpenXml.Status, "Inserted"));
  4190. var openXmlPartOfInsertedContent = gc.First().Part;
  4191. var openXmlPartInNewDocument = part;
  4192. return gc.Select(gce =>
  4193. {
  4194. Package packageOfSourceContent = openXmlPartOfInsertedContent.OpenXmlPackage.Package;
  4195. Package packageOfNewContent = openXmlPartInNewDocument.OpenXmlPackage.Package;
  4196. PackagePart partInDeletedDocument = packageOfSourceContent.GetPart(part.Uri);
  4197. PackagePart partInNewDocument = packageOfNewContent.GetPart(part.Uri);
  4198. return MoveRelatedPartsToDestination(partInDeletedDocument, partInNewDocument, newDrawing);
  4199. });
  4200. });
  4201. }
  4202. else
  4203. {
  4204. return gc.Select(gcc =>
  4205. {
  4206. return gcc.ContentElement;
  4207. });
  4208. }
  4209. })
  4210. .ToList();
  4211. return newChildElements;
  4212. }
  4213. if (ancestorBeingConstructed.Name == M.oMath || ancestorBeingConstructed.Name == M.oMathPara)
  4214. {
  4215. var newChildElements = groupedChildren
  4216. .Select(gc =>
  4217. {
  4218. var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  4219. var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  4220. if (del)
  4221. {
  4222. return gc.Select(gcc =>
  4223. {
  4224. return new XElement(W.del,
  4225. new XAttribute(W.author, settings.AuthorForRevisions),
  4226. new XAttribute(W.id, s_MaxId++),
  4227. new XAttribute(W.date, settings.DateTimeForRevisions),
  4228. gcc.ContentElement);
  4229. });
  4230. }
  4231. else if (ins)
  4232. {
  4233. return gc.Select(gcc =>
  4234. {
  4235. return new XElement(W.ins,
  4236. new XAttribute(W.author, settings.AuthorForRevisions),
  4237. new XAttribute(W.id, s_MaxId++),
  4238. new XAttribute(W.date, settings.DateTimeForRevisions),
  4239. gcc.ContentElement);
  4240. });
  4241. }
  4242. else
  4243. {
  4244. return gc.Select(gcc => gcc.ContentElement);
  4245. }
  4246. })
  4247. .ToList();
  4248. return newChildElements;
  4249. }
  4250. if (AllowableRunChildren.Contains(ancestorBeingConstructed.Name))
  4251. {
  4252. var newChildElements = groupedChildren
  4253. .Select(gc =>
  4254. {
  4255. var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  4256. var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  4257. if (del)
  4258. {
  4259. return gc.Select(gcc =>
  4260. {
  4261. var dup = new XElement(ancestorBeingConstructed.Name,
  4262. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  4263. new XAttribute(PtOpenXml.Status, "Deleted"));
  4264. return dup;
  4265. });
  4266. }
  4267. else if (ins)
  4268. {
  4269. return gc.Select(gcc =>
  4270. {
  4271. var dup = new XElement(ancestorBeingConstructed.Name,
  4272. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  4273. new XAttribute(PtOpenXml.Status, "Inserted"));
  4274. return dup;
  4275. });
  4276. }
  4277. else
  4278. {
  4279. return gc.Select(gcc => gcc.ContentElement);
  4280. }
  4281. })
  4282. .ToList();
  4283. return newChildElements;
  4284. }
  4285. if (ancestorBeingConstructed.Name == W.tbl)
  4286. return ReconstructElement(part, g, ancestorBeingConstructed, W.tblPr, W.tblGrid, null, level, settings);
  4287. if (ancestorBeingConstructed.Name == W.tr)
  4288. return ReconstructElement(part, g, ancestorBeingConstructed, W.trPr, null, null, level, settings);
  4289. if (ancestorBeingConstructed.Name == W.tc)
  4290. return ReconstructElement(part, g, ancestorBeingConstructed, W.tcPr, null, null, level, settings);
  4291. if (ancestorBeingConstructed.Name == W.sdt)
  4292. return ReconstructElement(part, g, ancestorBeingConstructed, W.sdtPr, W.sdtEndPr, null, level, settings);
  4293. if (ancestorBeingConstructed.Name == W.pict)
  4294. return ReconstructElement(part, g, ancestorBeingConstructed, VML.shapetype, null, null, level, settings);
  4295. if (ancestorBeingConstructed.Name == VML.shape)
  4296. return ReconstructElement(part, g, ancestorBeingConstructed, W10.wrap, null, null, level, settings);
  4297. if (ancestorBeingConstructed.Name == W._object)
  4298. return ReconstructElement(part, g, ancestorBeingConstructed, VML.shapetype, VML.shape, O.OLEObject, level, settings);
  4299. if (ancestorBeingConstructed.Name == W.ruby)
  4300. return ReconstructElement(part, g, ancestorBeingConstructed, W.rubyPr, null, null, level, settings);
  4301. return (object)ReconstructElement(part, g, ancestorBeingConstructed, null, null, null, level, settings);
  4302. })
  4303. .ToList();
  4304. return elementList;
  4305. }
  4306. private static XElement MoveRelatedPartsToDestination(PackagePart partOfDeletedContent, PackagePart partInNewDocument,
  4307. XElement contentElement)
  4308. {
  4309. var elementsToUpdate = contentElement
  4310. .Descendants()
  4311. .Where(d => d.Attributes().Any(a => ComparisonUnitWord.s_RelationshipAttributeNames.Contains(a.Name)))
  4312. .Where(d => d.Name != C.externalData)
  4313. .ToList();
  4314. foreach (var element in elementsToUpdate)
  4315. {
  4316. var attributesToUpdate = element
  4317. .Attributes()
  4318. .Where(a => ComparisonUnitWord.s_RelationshipAttributeNames.Contains(a.Name))
  4319. .ToList();
  4320. foreach (var att in attributesToUpdate)
  4321. {
  4322. var rId = (string)att;
  4323. var relationshipForDeletedPart = partOfDeletedContent.GetRelationship(rId);
  4324. if (relationshipForDeletedPart == null)
  4325. throw new FileFormatException("Invalid document");
  4326. var tartString = relationshipForDeletedPart.TargetUri.ToString();
  4327. Uri targetUri;
  4328. try
  4329. {
  4330. targetUri = PackUriHelper
  4331. .ResolvePartUri(
  4332. new Uri(partOfDeletedContent.Uri.ToString(), UriKind.RelativeOrAbsolute),
  4333. new Uri(tartString, UriKind.RelativeOrAbsolute));
  4334. }
  4335. catch (System.ArgumentException)
  4336. {
  4337. targetUri = null;
  4338. }
  4339. if (targetUri != null)
  4340. {
  4341. var relatedPackagePart = partOfDeletedContent.Package.GetPart(targetUri);
  4342. var uriSplit = relatedPackagePart.Uri.ToString().Split('/');
  4343. var last = uriSplit[uriSplit.Length - 1].Split('.');
  4344. string uriString = null;
  4345. if (last.Length == 2)
  4346. {
  4347. uriString = uriSplit.PtSkipLast(1).Select(p => p + "/").StringConcatenate() +
  4348. "P" + Guid.NewGuid().ToString().Replace("-", "") + "." + last[1];
  4349. }
  4350. else
  4351. {
  4352. uriString = uriSplit.PtSkipLast(1).Select(p => p + "/").StringConcatenate() +
  4353. "P" + Guid.NewGuid().ToString().Replace("-", "");
  4354. }
  4355. Uri uri = null;
  4356. if (relatedPackagePart.Uri.IsAbsoluteUri)
  4357. uri = new Uri(uriString, UriKind.Absolute);
  4358. else
  4359. uri = new Uri(uriString, UriKind.Relative);
  4360. var newPart = partInNewDocument.Package.CreatePart(uri, relatedPackagePart.ContentType);
  4361. using (var oldPartStream = relatedPackagePart.GetStream())
  4362. using (var newPartStream = newPart.GetStream())
  4363. FileUtils.CopyStream(oldPartStream, newPartStream);
  4364. var newRid = "R" + Guid.NewGuid().ToString().Replace("-", "");
  4365. partInNewDocument.CreateRelationship(newPart.Uri, TargetMode.Internal, relationshipForDeletedPart.RelationshipType, newRid);
  4366. att.Value = newRid;
  4367. if (newPart.ContentType.EndsWith("xml"))
  4368. {
  4369. XDocument newPartXDoc = null;
  4370. using (var stream = newPart.GetStream())
  4371. {
  4372. newPartXDoc = XDocument.Load(stream);
  4373. MoveRelatedPartsToDestination(relatedPackagePart, newPart, newPartXDoc.Root);
  4374. }
  4375. using (var stream = newPart.GetStream())
  4376. newPartXDoc.Save(stream);
  4377. }
  4378. }
  4379. }
  4380. }
  4381. return contentElement;
  4382. }
  4383. private static XAttribute GetXmlSpaceAttribute(string textOfTextElement)
  4384. {
  4385. if (char.IsWhiteSpace(textOfTextElement[0]) ||
  4386. char.IsWhiteSpace(textOfTextElement[textOfTextElement.Length - 1]))
  4387. return new XAttribute(XNamespace.Xml + "space", "preserve");
  4388. return null;
  4389. }
  4390. private static XElement ReconstructElement(OpenXmlPart part, IGrouping<string, ComparisonUnitAtom> g, XElement ancestorBeingConstructed, XName props1XName,
  4391. XName props2XName, XName props3XName, int level, WmlComparerSettings settings)
  4392. {
  4393. var newChildElements = CoalesceRecurse(part, g, level + 1, settings);
  4394. object props1 = null;
  4395. if (props1XName != null)
  4396. props1 = ancestorBeingConstructed.Elements(props1XName);
  4397. object props2 = null;
  4398. if (props2XName != null)
  4399. props2 = ancestorBeingConstructed.Elements(props2XName);
  4400. object props3 = null;
  4401. if (props3XName != null)
  4402. props3 = ancestorBeingConstructed.Elements(props3XName);
  4403. var reconstructedElement = new XElement(ancestorBeingConstructed.Name,
  4404. ancestorBeingConstructed.Attributes(),
  4405. props1, props2, props3, newChildElements);
  4406. return reconstructedElement;
  4407. }
  4408. private static List<CorrelatedSequence> DetectUnrelatedSources(ComparisonUnit[] cu1, ComparisonUnit[] cu2, WmlComparerSettings settings)
  4409. {
  4410. if (cu1.OfType<ComparisonUnitGroup>().Take(4).Count() > 3 &&
  4411. cu2.OfType<ComparisonUnitGroup>().Take(4).Count() > 3)
  4412. {
  4413. var list1 = cu1.OfType<ComparisonUnitGroup>().Select(g => g.SHA1Hash).ToList();
  4414. var list2 = cu2.OfType<ComparisonUnitGroup>().Select(g => g.SHA1Hash).ToList();
  4415. var intersect = list1.Intersect(list2).ToList();
  4416. if (intersect.Count() == 0)
  4417. {
  4418. var newListOfCorrelatedSequence = new List<CorrelatedSequence>();
  4419. var cul1 = cu1;
  4420. var cul2 = cu2;
  4421. var deletedCorrelatedSequence = new CorrelatedSequence();
  4422. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  4423. deletedCorrelatedSequence.ComparisonUnitArray1 = cul1;
  4424. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  4425. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  4426. var insertedCorrelatedSequence = new CorrelatedSequence();
  4427. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  4428. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  4429. insertedCorrelatedSequence.ComparisonUnitArray2 = cul2;
  4430. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  4431. return newListOfCorrelatedSequence;
  4432. }
  4433. }
  4434. return null;
  4435. }
  4436. private static List<CorrelatedSequence> Lcs(ComparisonUnit[] cu1, ComparisonUnit[] cu2, WmlComparerSettings settings)
  4437. {
  4438. // set up initial state - one CorrelatedSequence, UnKnown, contents == entire sequences (both)
  4439. CorrelatedSequence cs = new CorrelatedSequence()
  4440. {
  4441. CorrelationStatus = CorrelationStatus.Unknown,
  4442. ComparisonUnitArray1 = cu1,
  4443. ComparisonUnitArray2 = cu2,
  4444. };
  4445. List<CorrelatedSequence> csList = new List<CorrelatedSequence>()
  4446. {
  4447. cs
  4448. };
  4449. while (true)
  4450. {
  4451. if (s_False)
  4452. {
  4453. var sb = new StringBuilder();
  4454. foreach (var item in csList)
  4455. sb.Append(item.ToString()).Append(Environment.NewLine);
  4456. var sbs = sb.ToString();
  4457. DocxComparerUtil.NotePad(sbs);
  4458. }
  4459. var unknown = csList
  4460. .FirstOrDefault(z => z.CorrelationStatus == CorrelationStatus.Unknown);
  4461. if (unknown != null)
  4462. {
  4463. // if unknown consists of a single group of the same type in each side, then can set some Unids in the 'after' document.
  4464. // if the unknown is a pair of single tables, then can set table Unid.
  4465. // if the unknown is a pair of single rows, then can set table and rows Unids.
  4466. // if the unknown is a pair of single cells, then can set table, row, and cell Unids.
  4467. // if the unknown is a pair of paragraphs, then can set paragraph (and all ancestor) Unids.
  4468. SetAfterUnids(unknown);
  4469. if (s_False)
  4470. {
  4471. var sb = new StringBuilder();
  4472. sb.Append(unknown.ToString());
  4473. var sbs = sb.ToString();
  4474. DocxComparerUtil.NotePad(sbs);
  4475. }
  4476. List<CorrelatedSequence> newSequence = ProcessCorrelatedHashes(unknown, settings);
  4477. if (newSequence == null)
  4478. {
  4479. newSequence = FindCommonAtBeginningAndEnd(unknown, settings);
  4480. if (newSequence == null)
  4481. {
  4482. newSequence = DoLcsAlgorithm(unknown, settings);
  4483. }
  4484. }
  4485. var indexOfUnknown = csList.IndexOf(unknown);
  4486. csList.Remove(unknown);
  4487. newSequence.Reverse();
  4488. foreach (var item in newSequence)
  4489. csList.Insert(indexOfUnknown, item);
  4490. continue;
  4491. }
  4492. return csList;
  4493. }
  4494. }
  4495. private static void SetAfterUnids(CorrelatedSequence unknown)
  4496. {
  4497. if (unknown.ComparisonUnitArray1.Length == 1 && unknown.ComparisonUnitArray2.Length == 1)
  4498. {
  4499. var cua1 = unknown.ComparisonUnitArray1[0] as ComparisonUnitGroup;
  4500. var cua2 = unknown.ComparisonUnitArray2[0] as ComparisonUnitGroup;
  4501. if (cua1 != null &&
  4502. cua2 != null &&
  4503. cua1.ComparisonUnitGroupType == cua2.ComparisonUnitGroupType)
  4504. {
  4505. var groupType = cua1.ComparisonUnitGroupType;
  4506. var da1 = cua1.DescendantContentAtoms();
  4507. var da2 = cua2.DescendantContentAtoms();
  4508. XName takeThruName = null;
  4509. switch (groupType)
  4510. {
  4511. case ComparisonUnitGroupType.Paragraph:
  4512. takeThruName = W.p;
  4513. break;
  4514. case ComparisonUnitGroupType.Table:
  4515. takeThruName = W.tbl;
  4516. break;
  4517. case ComparisonUnitGroupType.Row:
  4518. takeThruName = W.tr;
  4519. break;
  4520. case ComparisonUnitGroupType.Cell:
  4521. takeThruName = W.tc;
  4522. break;
  4523. case ComparisonUnitGroupType.Textbox:
  4524. takeThruName = W.txbxContent;
  4525. break;
  4526. }
  4527. if (takeThruName == null)
  4528. throw new OpenXmlPowerToolsException("Internal error");
  4529. var relevantAncestors = new List<XElement>();
  4530. foreach (var ae in da1.First().AncestorElements)
  4531. {
  4532. if (ae.Name != takeThruName)
  4533. {
  4534. relevantAncestors.Add(ae);
  4535. continue;
  4536. }
  4537. relevantAncestors.Add(ae);
  4538. break;
  4539. }
  4540. var unidList = relevantAncestors
  4541. .Select(a =>
  4542. {
  4543. var unid = (string)a.Attribute(PtOpenXml.Unid);
  4544. if (unid == null)
  4545. throw new OpenXmlPowerToolsException("Internal error");
  4546. return unid;
  4547. })
  4548. .ToArray();
  4549. foreach (var da in da2)
  4550. {
  4551. var ancestorsToSet = da.AncestorElements.Take(unidList.Length);
  4552. var zipped = ancestorsToSet.Zip(unidList, (a, u) =>
  4553. new
  4554. {
  4555. Ancestor = a,
  4556. Unid = u,
  4557. });
  4558. foreach (var z in zipped)
  4559. {
  4560. var unid = z.Ancestor.Attribute(PtOpenXml.Unid);
  4561. if (z.Ancestor.Name == W.footnotes || z.Ancestor.Name == W.endnotes)
  4562. continue;
  4563. if (unid == null)
  4564. throw new OpenXmlPowerToolsException("Internal error");
  4565. unid.Value = z.Unid;
  4566. }
  4567. }
  4568. }
  4569. }
  4570. }
  4571. private static List<CorrelatedSequence> ProcessCorrelatedHashes(CorrelatedSequence unknown, WmlComparerSettings settings)
  4572. {
  4573. // never attempt this optimization if there are less than 3 groups
  4574. var maxd = Math.Min(unknown.ComparisonUnitArray1.Length, unknown.ComparisonUnitArray2.Length);
  4575. if (maxd < 3)
  4576. return null;
  4577. var firstInCu1 = unknown.ComparisonUnitArray1.FirstOrDefault() as ComparisonUnitGroup;
  4578. var firstInCu2 = unknown.ComparisonUnitArray2.FirstOrDefault() as ComparisonUnitGroup;
  4579. if (firstInCu1 != null && firstInCu2 != null)
  4580. {
  4581. if ((firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph ||
  4582. firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Table ||
  4583. firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) &&
  4584. (firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph ||
  4585. firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Table ||
  4586. firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Row))
  4587. {
  4588. var groupType = firstInCu1.ComparisonUnitGroupType;
  4589. // Next want to do the lcs algorithm on this.
  4590. // potentially, we will find all paragraphs are correlated, but they may not be for two reasons-
  4591. // - if there were changes that were not tracked
  4592. // - if the anomolies in the change tracking cause there to be a mismatch in the number of paragraphs
  4593. // therefore we are going to do the whole LCS algorithm thing
  4594. // and at the end of the process, we set up the correlated sequence list where correlated paragraphs are together in their
  4595. // own unknown correlated sequence.
  4596. var cul1 = unknown.ComparisonUnitArray1;
  4597. var cul2 = unknown.ComparisonUnitArray2;
  4598. int currentLongestCommonSequenceLength = 0;
  4599. int currentLongestCommonSequenceAtomCount = 0;
  4600. int currentI1 = -1;
  4601. int currentI2 = -1;
  4602. for (int i1 = 0; i1 < cul1.Length; i1++)
  4603. {
  4604. for (int i2 = 0; i2 < cul2.Length; i2++)
  4605. {
  4606. var thisSequenceLength = 0;
  4607. var thisSequenceAtomCount = 0;
  4608. var thisI1 = i1;
  4609. var thisI2 = i2;
  4610. while (true)
  4611. {
  4612. var group1 = cul1[thisI1] as ComparisonUnitGroup;
  4613. var group2 = cul2[thisI2] as ComparisonUnitGroup;
  4614. bool match = group1 != null &&
  4615. group2 != null &&
  4616. group1.ComparisonUnitGroupType == group2.ComparisonUnitGroupType &&
  4617. group1.CorrelatedSHA1Hash != null &&
  4618. group2.CorrelatedSHA1Hash != null &&
  4619. group1.CorrelatedSHA1Hash == group2.CorrelatedSHA1Hash;
  4620. if (match)
  4621. {
  4622. thisSequenceAtomCount += cul1[thisI1].DescendantContentAtomsCount;
  4623. thisI1++;
  4624. thisI2++;
  4625. thisSequenceLength++;
  4626. if (thisI1 == cul1.Length || thisI2 == cul2.Length)
  4627. {
  4628. if (thisSequenceAtomCount > currentLongestCommonSequenceAtomCount)
  4629. {
  4630. currentLongestCommonSequenceLength = thisSequenceLength;
  4631. currentLongestCommonSequenceAtomCount = thisSequenceAtomCount;
  4632. currentI1 = i1;
  4633. currentI2 = i2;
  4634. }
  4635. break;
  4636. }
  4637. continue;
  4638. }
  4639. else
  4640. {
  4641. if (thisSequenceAtomCount > currentLongestCommonSequenceAtomCount)
  4642. {
  4643. currentLongestCommonSequenceLength = thisSequenceLength;
  4644. currentLongestCommonSequenceAtomCount = thisSequenceAtomCount;
  4645. currentI1 = i1;
  4646. currentI2 = i2;
  4647. }
  4648. break;
  4649. }
  4650. }
  4651. }
  4652. }
  4653. // here we want to have some sort of threshold, and if the currentLongestCommonSequenceLength is not longer than the threshold, then don't do anything
  4654. bool doCorrelation = false;
  4655. if (currentLongestCommonSequenceLength == 1)
  4656. {
  4657. var numberOfAtoms1 = unknown.ComparisonUnitArray1[currentI1].DescendantContentAtoms().Count();
  4658. var numberOfAtoms2 = unknown.ComparisonUnitArray2[currentI2].DescendantContentAtoms().Count();
  4659. if (numberOfAtoms1 > 16 && numberOfAtoms2 > 16)
  4660. doCorrelation = true;
  4661. }
  4662. else if (currentLongestCommonSequenceLength > 1 && currentLongestCommonSequenceLength <= 3)
  4663. {
  4664. var numberOfAtoms1 = unknown.ComparisonUnitArray1.Skip(currentI1).Take(currentLongestCommonSequenceLength).Select(z => z.DescendantContentAtoms().Count()).Sum();
  4665. var numberOfAtoms2 = unknown.ComparisonUnitArray2.Skip(currentI2).Take(currentLongestCommonSequenceLength).Select(z => z.DescendantContentAtoms().Count()).Sum();
  4666. if (numberOfAtoms1 > 32 && numberOfAtoms2 > 32)
  4667. doCorrelation = true;
  4668. }
  4669. else if (currentLongestCommonSequenceLength > 3)
  4670. doCorrelation = true;
  4671. if (doCorrelation)
  4672. {
  4673. var newListOfCorrelatedSequence = new List<CorrelatedSequence>();
  4674. if (currentI1 > 0 && currentI2 == 0)
  4675. {
  4676. var deletedCorrelatedSequence = new CorrelatedSequence();
  4677. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  4678. deletedCorrelatedSequence.ComparisonUnitArray1 = cul1
  4679. .Take(currentI1)
  4680. .ToArray();
  4681. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  4682. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  4683. }
  4684. else if (currentI1 == 0 && currentI2 > 0)
  4685. {
  4686. var insertedCorrelatedSequence = new CorrelatedSequence();
  4687. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  4688. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  4689. insertedCorrelatedSequence.ComparisonUnitArray2 = cul2
  4690. .Take(currentI2)
  4691. .ToArray();
  4692. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  4693. }
  4694. else if (currentI1 > 0 && currentI2 > 0)
  4695. {
  4696. var unknownCorrelatedSequence = new CorrelatedSequence();
  4697. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  4698. unknownCorrelatedSequence.ComparisonUnitArray1 = cul1
  4699. .Take(currentI1)
  4700. .ToArray();
  4701. unknownCorrelatedSequence.ComparisonUnitArray2 = cul2
  4702. .Take(currentI2)
  4703. .ToArray();
  4704. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  4705. }
  4706. else if (currentI1 == 0 && currentI2 == 0)
  4707. {
  4708. // nothing to do
  4709. }
  4710. for (int i = 0; i < currentLongestCommonSequenceLength; i++)
  4711. {
  4712. var unknownCorrelatedSequence = new CorrelatedSequence();
  4713. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  4714. unknownCorrelatedSequence.ComparisonUnitArray1 = cul1
  4715. .Skip(currentI1)
  4716. .Skip(i)
  4717. .Take(1)
  4718. .ToArray();
  4719. unknownCorrelatedSequence.ComparisonUnitArray2 = cul2
  4720. .Skip(currentI2)
  4721. .Skip(i)
  4722. .Take(1)
  4723. .ToArray();
  4724. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  4725. }
  4726. int endI1 = currentI1 + currentLongestCommonSequenceLength;
  4727. int endI2 = currentI2 + currentLongestCommonSequenceLength;
  4728. if (endI1 < cul1.Length && endI2 == cul2.Length)
  4729. {
  4730. var deletedCorrelatedSequence = new CorrelatedSequence();
  4731. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  4732. deletedCorrelatedSequence.ComparisonUnitArray1 = cul1
  4733. .Skip(endI1)
  4734. .ToArray();
  4735. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  4736. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  4737. }
  4738. else if (endI1 == cul1.Length && endI2 < cul2.Length)
  4739. {
  4740. var insertedCorrelatedSequence = new CorrelatedSequence();
  4741. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  4742. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  4743. insertedCorrelatedSequence.ComparisonUnitArray2 = cul2
  4744. .Skip(endI2)
  4745. .ToArray();
  4746. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  4747. }
  4748. else if (endI1 < cul1.Length && endI2 < cul2.Length)
  4749. {
  4750. var unknownCorrelatedSequence = new CorrelatedSequence();
  4751. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  4752. unknownCorrelatedSequence.ComparisonUnitArray1 = cul1
  4753. .Skip(endI1)
  4754. .ToArray();
  4755. unknownCorrelatedSequence.ComparisonUnitArray2 = cul2
  4756. .Skip(endI2)
  4757. .ToArray();
  4758. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  4759. }
  4760. else if (endI1 == cul1.Length && endI2 == cul2.Length)
  4761. {
  4762. // nothing to do
  4763. }
  4764. return newListOfCorrelatedSequence;
  4765. }
  4766. return null;
  4767. }
  4768. }
  4769. return null;
  4770. }
  4771. private static List<CorrelatedSequence> DoLcsAlgorithm(CorrelatedSequence unknown, WmlComparerSettings settings)
  4772. {
  4773. var newListOfCorrelatedSequence = new List<CorrelatedSequence>();
  4774. var cul1 = unknown.ComparisonUnitArray1;
  4775. var cul2 = unknown.ComparisonUnitArray2;
  4776. // first thing to do - if we have an unknown with zero length on left or right side, create appropriate
  4777. // this is a code optimization that enables easier processing of cases elsewhere.
  4778. if (cul1.Length > 0 && cul2.Length == 0)
  4779. {
  4780. var deletedCorrelatedSequence = new CorrelatedSequence();
  4781. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  4782. deletedCorrelatedSequence.ComparisonUnitArray1 = cul1;
  4783. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  4784. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  4785. return newListOfCorrelatedSequence;
  4786. }
  4787. else if (cul1.Length == 0 && cul2.Length > 0)
  4788. {
  4789. var insertedCorrelatedSequence = new CorrelatedSequence();
  4790. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  4791. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  4792. insertedCorrelatedSequence.ComparisonUnitArray2 = cul2;
  4793. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  4794. return newListOfCorrelatedSequence;
  4795. }
  4796. else if (cul1.Length == 0 && cul2.Length == 0)
  4797. {
  4798. return newListOfCorrelatedSequence; // this will effectively remove the unknown with no data on either side from the current data model.
  4799. }
  4800. int currentLongestCommonSequenceLength = 0;
  4801. int currentI1 = -1;
  4802. int currentI2 = -1;
  4803. for (int i1 = 0; i1 < cul1.Length - currentLongestCommonSequenceLength; i1++)
  4804. {
  4805. for (int i2 = 0; i2 < cul2.Length - currentLongestCommonSequenceLength; i2++)
  4806. {
  4807. var thisSequenceLength = 0;
  4808. var thisI1 = i1;
  4809. var thisI2 = i2;
  4810. while (true)
  4811. {
  4812. if (cul1[thisI1].SHA1Hash == cul2[thisI2].SHA1Hash)
  4813. {
  4814. thisI1++;
  4815. thisI2++;
  4816. thisSequenceLength++;
  4817. if (thisI1 == cul1.Length || thisI2 == cul2.Length)
  4818. {
  4819. if (thisSequenceLength > currentLongestCommonSequenceLength)
  4820. {
  4821. currentLongestCommonSequenceLength = thisSequenceLength;
  4822. currentI1 = i1;
  4823. currentI2 = i2;
  4824. }
  4825. break;
  4826. }
  4827. continue;
  4828. }
  4829. else
  4830. {
  4831. if (thisSequenceLength > currentLongestCommonSequenceLength)
  4832. {
  4833. currentLongestCommonSequenceLength = thisSequenceLength;
  4834. currentI1 = i1;
  4835. currentI2 = i2;
  4836. }
  4837. break;
  4838. }
  4839. }
  4840. }
  4841. }
  4842. // never start a common section with a paragraph mark.
  4843. while (true)
  4844. {
  4845. if (currentLongestCommonSequenceLength <= 1)
  4846. break;
  4847. var firstCommon = cul1[currentI1];
  4848. var firstCommonWord = firstCommon as ComparisonUnitWord;
  4849. if (firstCommonWord == null)
  4850. break;
  4851. // if the word contains more than one atom, then not a paragraph mark
  4852. if (firstCommonWord.Contents.Count() != 1)
  4853. break;
  4854. var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom;
  4855. if (firstCommonAtom == null)
  4856. break;
  4857. if (firstCommonAtom.ContentElement.Name != W.pPr)
  4858. break;
  4859. --currentLongestCommonSequenceLength;
  4860. if (currentLongestCommonSequenceLength == 0)
  4861. {
  4862. currentI1 = -1;
  4863. currentI2 = -1;
  4864. }
  4865. else
  4866. {
  4867. ++currentI1;
  4868. ++currentI2;
  4869. }
  4870. }
  4871. bool isOnlyParagraphMark = false;
  4872. if (currentLongestCommonSequenceLength == 1)
  4873. {
  4874. var firstCommon = cul1[currentI1];
  4875. var firstCommonWord = firstCommon as ComparisonUnitWord;
  4876. if (firstCommonWord != null)
  4877. {
  4878. // if the word contains more than one atom, then not a paragraph mark
  4879. if (firstCommonWord.Contents.Count() == 1)
  4880. {
  4881. var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom;
  4882. if (firstCommonAtom != null)
  4883. {
  4884. if (firstCommonAtom.ContentElement.Name == W.pPr)
  4885. isOnlyParagraphMark = true;
  4886. }
  4887. }
  4888. }
  4889. }
  4890. // don't match just a single character
  4891. if (currentLongestCommonSequenceLength == 1)
  4892. {
  4893. var cuw2 = cul2[currentI2] as ComparisonUnitAtom;
  4894. if (cuw2 != null)
  4895. {
  4896. if (cuw2.ContentElement.Name == W.t && cuw2.ContentElement.Value == " ")
  4897. {
  4898. currentI1 = -1;
  4899. currentI2 = -1;
  4900. currentLongestCommonSequenceLength = 0;
  4901. }
  4902. }
  4903. }
  4904. // don't match only word break characters
  4905. if (currentLongestCommonSequenceLength > 0 && currentLongestCommonSequenceLength <= 3)
  4906. {
  4907. var commonSequence = cul1.Skip(currentI1).Take(currentLongestCommonSequenceLength).ToArray();
  4908. // if they are all ComparisonUnitWord objects
  4909. var oneIsNotWord = commonSequence.Any(cs => (cs as ComparisonUnitWord) == null);
  4910. var allAreWords = !oneIsNotWord;
  4911. if (allAreWords)
  4912. {
  4913. var contentOtherThanWordSplitChars = commonSequence
  4914. .Cast<ComparisonUnitWord>()
  4915. .Any(cs =>
  4916. {
  4917. var otherThanText = cs.DescendantContentAtoms().Any(dca => dca.ContentElement.Name != W.t);
  4918. if (otherThanText)
  4919. return true;
  4920. var otherThanWordSplit = cs
  4921. .DescendantContentAtoms()
  4922. .Any(dca =>
  4923. {
  4924. var charValue = dca.ContentElement.Value;
  4925. var isWordSplit = ((int)charValue[0] >= 0x4e00 && (int)charValue[0] <= 0x9fff);
  4926. if (!isWordSplit)
  4927. isWordSplit = settings.WordSeparators.Contains(charValue[0]);
  4928. if (isWordSplit)
  4929. return false;
  4930. return true;
  4931. });
  4932. return otherThanWordSplit;
  4933. });
  4934. if (!contentOtherThanWordSplitChars)
  4935. {
  4936. currentI1 = -1;
  4937. currentI2 = -1;
  4938. currentLongestCommonSequenceLength = 0;
  4939. }
  4940. }
  4941. }
  4942. // if we are only looking at text, and if the longest common subsequence is less than 15% of the whole, then forget it,
  4943. // don't find that LCS.
  4944. if (!isOnlyParagraphMark && currentLongestCommonSequenceLength > 0)
  4945. {
  4946. var anyButWord1 = cul1.Any(cu => (cu as ComparisonUnitWord) == null);
  4947. var anyButWord2 = cul2.Any(cu => (cu as ComparisonUnitWord) == null);
  4948. if (!anyButWord1 && !anyButWord2)
  4949. {
  4950. var maxLen = Math.Max(cul1.Length, cul2.Length);
  4951. if (((double)currentLongestCommonSequenceLength / (double)maxLen) < settings.DetailThreshold)
  4952. {
  4953. currentI1 = -1;
  4954. currentI2 = -1;
  4955. currentLongestCommonSequenceLength = 0;
  4956. }
  4957. }
  4958. }
  4959. if (currentI1 == -1 && currentI2 == -1)
  4960. {
  4961. var leftLength = unknown.ComparisonUnitArray1.Length;
  4962. var leftTables = unknown.ComparisonUnitArray1.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Table).Count();
  4963. var leftRows = unknown.ComparisonUnitArray1.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Row).Count();
  4964. var leftCells = unknown.ComparisonUnitArray1.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell).Count();
  4965. var leftParagraphs = unknown.ComparisonUnitArray1.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph).Count();
  4966. var leftTextboxes = unknown.ComparisonUnitArray1.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox).Count();
  4967. var leftWords = unknown.ComparisonUnitArray1.OfType<ComparisonUnitWord>().Count();
  4968. var rightLength = unknown.ComparisonUnitArray2.Length;
  4969. var rightTables = unknown.ComparisonUnitArray2.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Table).Count();
  4970. var rightRows = unknown.ComparisonUnitArray2.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Row).Count();
  4971. var rightCells = unknown.ComparisonUnitArray2.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell).Count();
  4972. var rightParagraphs = unknown.ComparisonUnitArray2.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph).Count();
  4973. var rightTextboxes = unknown.ComparisonUnitArray2.OfType<ComparisonUnitGroup>().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox).Count();
  4974. var rightWords = unknown.ComparisonUnitArray2.OfType<ComparisonUnitWord>().Count();
  4975. // if either side has both words, rows and text boxes, then we need to separate out into separate unknown correlated sequences
  4976. // group adjacent based on whether word, row, or textbox
  4977. // in most cases, the count of groups will be the same, but they may differ
  4978. // if the first group on either side is word, then create a deleted or inserted corr sequ for it.
  4979. // then have counter on both sides pointing to the first matched pairs of rows
  4980. // create an unknown corr sequ for it.
  4981. // increment both counters
  4982. // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done.
  4983. // if both are at the end, then done
  4984. // return the new list of corr sequ
  4985. var leftOnlyWordsRowsTextboxes = leftLength == leftWords + leftRows + leftTextboxes;
  4986. var rightOnlyWordsRowsTextboxes = rightLength == rightWords + rightRows + rightTextboxes;
  4987. if ((leftWords > 0 || rightWords > 0) &&
  4988. (leftRows > 0 || rightRows > 0 || leftTextboxes > 0 || rightTextboxes > 0) &&
  4989. (leftOnlyWordsRowsTextboxes && rightOnlyWordsRowsTextboxes))
  4990. {
  4991. var leftGrouped = unknown
  4992. .ComparisonUnitArray1
  4993. .GroupAdjacent(cu =>
  4994. {
  4995. if (cu is ComparisonUnitWord)
  4996. {
  4997. return "Word";
  4998. }
  4999. else
  5000. {
  5001. var cug = cu as ComparisonUnitGroup;
  5002. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  5003. return "Row";
  5004. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox)
  5005. return "Textbox";
  5006. throw new OpenXmlPowerToolsException("Internal error");
  5007. }
  5008. })
  5009. .ToArray();
  5010. var rightGrouped = unknown
  5011. .ComparisonUnitArray2
  5012. .GroupAdjacent(cu =>
  5013. {
  5014. if (cu is ComparisonUnitWord)
  5015. {
  5016. return "Word";
  5017. }
  5018. else
  5019. {
  5020. var cug = cu as ComparisonUnitGroup;
  5021. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  5022. return "Row";
  5023. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox)
  5024. return "Textbox";
  5025. throw new OpenXmlPowerToolsException("Internal error");
  5026. }
  5027. })
  5028. .ToArray();
  5029. int iLeft = 0;
  5030. int iRight = 0;
  5031. // create an unknown corr sequ for it.
  5032. // increment both counters
  5033. // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done.
  5034. // if both are at the end, then done
  5035. // return the new list of corr sequ
  5036. while (true)
  5037. {
  5038. if (leftGrouped[iLeft].Key == rightGrouped[iRight].Key)
  5039. {
  5040. var unknownCorrelatedSequence = new CorrelatedSequence();
  5041. unknownCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray();
  5042. unknownCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray();
  5043. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5044. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  5045. ++iLeft;
  5046. ++iRight;
  5047. }
  5048. // have to decide which of the following two branches to do first based on whether the left contains a paragraph mark
  5049. // i.e. cant insert a string of deleted text right before a table.
  5050. else if (leftGrouped[iLeft].Key == "Word" &&
  5051. leftGrouped[iLeft].Select(lg => lg.DescendantContentAtoms()).SelectMany(m => m).Last().ContentElement.Name != W.pPr &&
  5052. rightGrouped[iRight].Key == "Row")
  5053. {
  5054. var insertedCorrelatedSequence = new CorrelatedSequence();
  5055. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5056. insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray();
  5057. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5058. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5059. ++iRight;
  5060. }
  5061. else if (rightGrouped[iRight].Key == "Word" &&
  5062. rightGrouped[iRight].Select(lg => lg.DescendantContentAtoms()).SelectMany(m => m).Last().ContentElement.Name != W.pPr &&
  5063. leftGrouped[iLeft].Key == "Row")
  5064. {
  5065. var insertedCorrelatedSequence = new CorrelatedSequence();
  5066. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5067. insertedCorrelatedSequence.ComparisonUnitArray2 = leftGrouped[iLeft].ToArray();
  5068. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5069. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5070. ++iLeft;
  5071. }
  5072. else if (leftGrouped[iLeft].Key == "Word" && rightGrouped[iRight].Key != "Word")
  5073. {
  5074. var deletedCorrelatedSequence = new CorrelatedSequence();
  5075. deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray();
  5076. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5077. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5078. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5079. ++iLeft;
  5080. }
  5081. else if (leftGrouped[iLeft].Key != "Word" && rightGrouped[iRight].Key == "Word")
  5082. {
  5083. var insertedCorrelatedSequence = new CorrelatedSequence();
  5084. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5085. insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray();
  5086. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5087. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5088. ++iRight;
  5089. }
  5090. if (iLeft == leftGrouped.Length && iRight == rightGrouped.Length)
  5091. return newListOfCorrelatedSequence;
  5092. // if there is content on the left, but not content on the right
  5093. if (iRight == rightGrouped.Length)
  5094. {
  5095. for (int j = iLeft; j < leftGrouped.Length; j++)
  5096. {
  5097. var deletedCorrelatedSequence = new CorrelatedSequence();
  5098. deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[j].ToArray();
  5099. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5100. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5101. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5102. }
  5103. return newListOfCorrelatedSequence;
  5104. }
  5105. // there is content on the right but not on the left
  5106. else if (iLeft == leftGrouped.Length)
  5107. {
  5108. for (int j = iRight; j < rightGrouped.Length; j++)
  5109. {
  5110. var insertedCorrelatedSequence = new CorrelatedSequence();
  5111. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5112. insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[j].ToArray();
  5113. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5114. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5115. }
  5116. return newListOfCorrelatedSequence;
  5117. }
  5118. // else continue on next round.
  5119. }
  5120. }
  5121. // if both sides contain tables and paragraphs, then split into multiple unknown corr sequ
  5122. if (leftTables > 0 && rightTables > 0 &&
  5123. leftParagraphs > 0 && rightParagraphs > 0 &&
  5124. (leftLength > 1 || rightLength > 1))
  5125. {
  5126. var leftGrouped = unknown
  5127. .ComparisonUnitArray1
  5128. .GroupAdjacent(cu =>
  5129. {
  5130. var cug = cu as ComparisonUnitGroup;
  5131. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Table)
  5132. return "Table";
  5133. else
  5134. return "Para";
  5135. })
  5136. .ToArray();
  5137. var rightGrouped = unknown
  5138. .ComparisonUnitArray2
  5139. .GroupAdjacent(cu =>
  5140. {
  5141. var cug = cu as ComparisonUnitGroup;
  5142. if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Table)
  5143. return "Table";
  5144. else
  5145. return "Para";
  5146. })
  5147. .ToArray();
  5148. int iLeft = 0;
  5149. int iRight = 0;
  5150. // create an unknown corr sequ for it.
  5151. // increment both counters
  5152. // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done.
  5153. // if both are at the end, then done
  5154. // return the new list of corr sequ
  5155. while (true)
  5156. {
  5157. if ((leftGrouped[iLeft].Key == "Table" && rightGrouped[iRight].Key == "Table") ||
  5158. (leftGrouped[iLeft].Key == "Para" && rightGrouped[iRight].Key == "Para"))
  5159. {
  5160. var unknownCorrelatedSequence = new CorrelatedSequence();
  5161. unknownCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray();
  5162. unknownCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray();
  5163. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5164. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  5165. ++iLeft;
  5166. ++iRight;
  5167. }
  5168. else if (leftGrouped[iLeft].Key == "Para" && rightGrouped[iRight].Key == "Table")
  5169. {
  5170. var deletedCorrelatedSequence = new CorrelatedSequence();
  5171. deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray();
  5172. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5173. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5174. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5175. ++iLeft;
  5176. }
  5177. else if (leftGrouped[iLeft].Key == "Table" && rightGrouped[iRight].Key == "Para")
  5178. {
  5179. var insertedCorrelatedSequence = new CorrelatedSequence();
  5180. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5181. insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray();
  5182. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5183. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5184. ++iRight;
  5185. }
  5186. if (iLeft == leftGrouped.Length && iRight == rightGrouped.Length)
  5187. return newListOfCorrelatedSequence;
  5188. // if there is content on the left, but not content on the right
  5189. if (iRight == rightGrouped.Length)
  5190. {
  5191. for (int j = iLeft; j < leftGrouped.Length; j++)
  5192. {
  5193. var deletedCorrelatedSequence = new CorrelatedSequence();
  5194. deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[j].ToArray();
  5195. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5196. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5197. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5198. }
  5199. return newListOfCorrelatedSequence;
  5200. }
  5201. // there is content on the right but not on the left
  5202. else if (iLeft == leftGrouped.Length)
  5203. {
  5204. for (int j = iRight; j < rightGrouped.Length; j++)
  5205. {
  5206. var insertedCorrelatedSequence = new CorrelatedSequence();
  5207. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5208. insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[j].ToArray();
  5209. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5210. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5211. }
  5212. return newListOfCorrelatedSequence;
  5213. }
  5214. // else continue on next round.
  5215. }
  5216. }
  5217. // If both sides consists of a single table, and if the table contains merged cells, then mark as deleted/inserted
  5218. if (leftTables == 1 && leftLength == 1 &&
  5219. rightTables == 1 && rightLength == 1)
  5220. {
  5221. var result = DoLcsAlgorithmForTable(unknown, settings);
  5222. if (result != null)
  5223. return result;
  5224. }
  5225. // If either side contains only paras or tables, then flatten and iterate.
  5226. var leftOnlyParasTablesTextboxes = leftLength == leftTables + leftParagraphs + leftTextboxes;
  5227. var rightOnlyParasTablesTextboxes = rightLength == rightTables + rightParagraphs + rightTextboxes;
  5228. if (leftOnlyParasTablesTextboxes && rightOnlyParasTablesTextboxes)
  5229. {
  5230. // flatten paras and tables, and iterate
  5231. var left = unknown
  5232. .ComparisonUnitArray1
  5233. .Select(cu => cu.Contents)
  5234. .SelectMany(m => m)
  5235. .ToArray();
  5236. var right = unknown
  5237. .ComparisonUnitArray2
  5238. .Select(cu => cu.Contents)
  5239. .SelectMany(m => m)
  5240. .ToArray();
  5241. var unknownCorrelatedSequence = new CorrelatedSequence();
  5242. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5243. unknownCorrelatedSequence.ComparisonUnitArray1 = left;
  5244. unknownCorrelatedSequence.ComparisonUnitArray2 = right;
  5245. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  5246. return newListOfCorrelatedSequence;
  5247. }
  5248. // if first of left is a row and first of right is a row
  5249. // then flatten the row to cells and iterate.
  5250. var firstLeft = unknown
  5251. .ComparisonUnitArray1
  5252. .FirstOrDefault() as ComparisonUnitGroup;
  5253. var firstRight = unknown
  5254. .ComparisonUnitArray2
  5255. .FirstOrDefault() as ComparisonUnitGroup;
  5256. if (firstLeft != null && firstRight != null)
  5257. {
  5258. if (firstLeft.ComparisonUnitGroupType == ComparisonUnitGroupType.Row &&
  5259. firstRight.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  5260. {
  5261. ComparisonUnit[] leftContent = firstLeft.Contents.ToArray();
  5262. ComparisonUnit[] rightContent = firstRight.Contents.ToArray();
  5263. var lenLeft = leftContent.Length;
  5264. var lenRight = rightContent.Length;
  5265. if (lenLeft < lenRight)
  5266. leftContent = leftContent.Concat(Enumerable.Repeat<ComparisonUnit>(null, lenRight - lenLeft)).ToArray();
  5267. else if (lenRight < lenLeft)
  5268. rightContent = rightContent.Concat(Enumerable.Repeat<ComparisonUnit>(null, lenLeft - lenRight)).ToArray();
  5269. List<CorrelatedSequence> newCs = leftContent.Zip(rightContent, (l, r) =>
  5270. {
  5271. if (l != null && r != null)
  5272. {
  5273. var unknownCorrelatedSequence = new CorrelatedSequence();
  5274. unknownCorrelatedSequence.ComparisonUnitArray1 = new[] { l };
  5275. unknownCorrelatedSequence.ComparisonUnitArray2 = new[] { r };
  5276. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5277. return new[] { unknownCorrelatedSequence };
  5278. }
  5279. if (l == null)
  5280. {
  5281. var insertedCorrelatedSequence = new CorrelatedSequence();
  5282. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5283. insertedCorrelatedSequence.ComparisonUnitArray2 = r.Contents.ToArray();
  5284. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5285. return new[] { insertedCorrelatedSequence };
  5286. }
  5287. else if (r == null)
  5288. {
  5289. var deletedCorrelatedSequence = new CorrelatedSequence();
  5290. deletedCorrelatedSequence.ComparisonUnitArray1 = l.Contents.ToArray();
  5291. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5292. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5293. return new[] { deletedCorrelatedSequence };
  5294. }
  5295. else
  5296. throw new OpenXmlPowerToolsException("Internal error");
  5297. })
  5298. .SelectMany(m => m)
  5299. .ToList();
  5300. foreach (var cs in newCs)
  5301. newListOfCorrelatedSequence.Add(cs);
  5302. var remainderLeft = unknown
  5303. .ComparisonUnitArray1
  5304. .Skip(1)
  5305. .ToArray();
  5306. var remainderRight = unknown
  5307. .ComparisonUnitArray2
  5308. .Skip(1)
  5309. .ToArray();
  5310. if (remainderLeft.Length > 0 && remainderRight.Length == 0)
  5311. {
  5312. var deletedCorrelatedSequence = new CorrelatedSequence();
  5313. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5314. deletedCorrelatedSequence.ComparisonUnitArray1 = remainderLeft;
  5315. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5316. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5317. }
  5318. else if (remainderRight.Length > 0 && remainderLeft.Length == 0)
  5319. {
  5320. var insertedCorrelatedSequence = new CorrelatedSequence();
  5321. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5322. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5323. insertedCorrelatedSequence.ComparisonUnitArray2 = remainderRight;
  5324. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5325. }
  5326. else if (remainderLeft.Length > 0 && remainderRight.Length > 0)
  5327. {
  5328. var unknownCorrelatedSequence2 = new CorrelatedSequence();
  5329. unknownCorrelatedSequence2.CorrelationStatus = CorrelationStatus.Unknown;
  5330. unknownCorrelatedSequence2.ComparisonUnitArray1 = remainderLeft;
  5331. unknownCorrelatedSequence2.ComparisonUnitArray2 = remainderRight;
  5332. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence2);
  5333. }
  5334. if (s_False)
  5335. {
  5336. var sb = new StringBuilder();
  5337. foreach (var item in newListOfCorrelatedSequence)
  5338. sb.Append(item.ToString()).Append(Environment.NewLine);
  5339. var sbs = sb.ToString();
  5340. DocxComparerUtil.NotePad(sbs);
  5341. }
  5342. return newListOfCorrelatedSequence;
  5343. }
  5344. if (firstLeft.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell &&
  5345. firstRight.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell)
  5346. {
  5347. var left = firstLeft
  5348. .Contents
  5349. .ToArray();
  5350. var right = firstRight
  5351. .Contents
  5352. .ToArray();
  5353. var unknownCorrelatedSequence = new CorrelatedSequence();
  5354. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5355. unknownCorrelatedSequence.ComparisonUnitArray1 = left;
  5356. unknownCorrelatedSequence.ComparisonUnitArray2 = right;
  5357. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  5358. var remainderLeft = unknown
  5359. .ComparisonUnitArray1
  5360. .Skip(1)
  5361. .ToArray();
  5362. var remainderRight = unknown
  5363. .ComparisonUnitArray2
  5364. .Skip(1)
  5365. .ToArray();
  5366. if (remainderLeft.Length > 0 && remainderRight.Length == 0)
  5367. {
  5368. var deletedCorrelatedSequence = new CorrelatedSequence();
  5369. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5370. deletedCorrelatedSequence.ComparisonUnitArray1 = remainderLeft;
  5371. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5372. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5373. }
  5374. else if (remainderRight.Length > 0 && remainderLeft.Length == 0)
  5375. {
  5376. var insertedCorrelatedSequence = new CorrelatedSequence();
  5377. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5378. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5379. insertedCorrelatedSequence.ComparisonUnitArray2 = remainderRight;
  5380. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5381. }
  5382. else if (remainderLeft.Length > 0 && remainderRight.Length > 0)
  5383. {
  5384. var unknownCorrelatedSequence2 = new CorrelatedSequence();
  5385. unknownCorrelatedSequence2.CorrelationStatus = CorrelationStatus.Unknown;
  5386. unknownCorrelatedSequence2.ComparisonUnitArray1 = remainderLeft;
  5387. unknownCorrelatedSequence2.ComparisonUnitArray2 = remainderRight;
  5388. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence2);
  5389. }
  5390. return newListOfCorrelatedSequence;
  5391. }
  5392. }
  5393. if (unknown.ComparisonUnitArray1.Any() && unknown.ComparisonUnitArray2.Any())
  5394. {
  5395. var left = unknown.ComparisonUnitArray1.First() as ComparisonUnitWord;
  5396. var right = unknown.ComparisonUnitArray2.First() as ComparisonUnitGroup;
  5397. if (left != null &&
  5398. right != null &&
  5399. right.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  5400. {
  5401. var insertedCorrelatedSequence3 = new CorrelatedSequence();
  5402. insertedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Inserted;
  5403. insertedCorrelatedSequence3.ComparisonUnitArray1 = null;
  5404. insertedCorrelatedSequence3.ComparisonUnitArray2 = unknown.ComparisonUnitArray2;
  5405. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence3);
  5406. var deletedCorrelatedSequence3 = new CorrelatedSequence();
  5407. deletedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Deleted;
  5408. deletedCorrelatedSequence3.ComparisonUnitArray1 = unknown.ComparisonUnitArray1;
  5409. deletedCorrelatedSequence3.ComparisonUnitArray2 = null;
  5410. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence3);
  5411. return newListOfCorrelatedSequence;
  5412. }
  5413. var left2 = unknown.ComparisonUnitArray1.First() as ComparisonUnitGroup;
  5414. var right2 = unknown.ComparisonUnitArray2.First() as ComparisonUnitWord;
  5415. if (right2 != null &&
  5416. left2 != null &&
  5417. left2.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  5418. {
  5419. var deletedCorrelatedSequence3 = new CorrelatedSequence();
  5420. deletedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Deleted;
  5421. deletedCorrelatedSequence3.ComparisonUnitArray1 = unknown.ComparisonUnitArray1;
  5422. deletedCorrelatedSequence3.ComparisonUnitArray2 = null;
  5423. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence3);
  5424. var insertedCorrelatedSequence3 = new CorrelatedSequence();
  5425. insertedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Inserted;
  5426. insertedCorrelatedSequence3.ComparisonUnitArray1 = null;
  5427. insertedCorrelatedSequence3.ComparisonUnitArray2 = unknown.ComparisonUnitArray2;
  5428. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence3);
  5429. return newListOfCorrelatedSequence;
  5430. }
  5431. var lastContentAtomLeft = unknown.ComparisonUnitArray1.Select(cu => cu.DescendantContentAtoms().Last()).LastOrDefault();
  5432. var lastContentAtomRight = unknown.ComparisonUnitArray2.Select(cu => cu.DescendantContentAtoms().Last()).LastOrDefault();
  5433. if (lastContentAtomLeft != null && lastContentAtomRight != null)
  5434. {
  5435. if (lastContentAtomLeft.ContentElement.Name == W.pPr &&
  5436. lastContentAtomRight.ContentElement.Name != W.pPr)
  5437. {
  5438. var insertedCorrelatedSequence5 = new CorrelatedSequence();
  5439. insertedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Inserted;
  5440. insertedCorrelatedSequence5.ComparisonUnitArray1 = null;
  5441. insertedCorrelatedSequence5.ComparisonUnitArray2 = unknown.ComparisonUnitArray2;
  5442. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence5);
  5443. var deletedCorrelatedSequence5 = new CorrelatedSequence();
  5444. deletedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Deleted;
  5445. deletedCorrelatedSequence5.ComparisonUnitArray1 = unknown.ComparisonUnitArray1;
  5446. deletedCorrelatedSequence5.ComparisonUnitArray2 = null;
  5447. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence5);
  5448. return newListOfCorrelatedSequence;
  5449. }
  5450. else if (lastContentAtomLeft.ContentElement.Name != W.pPr &&
  5451. lastContentAtomRight.ContentElement.Name == W.pPr)
  5452. {
  5453. var deletedCorrelatedSequence5 = new CorrelatedSequence();
  5454. deletedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Deleted;
  5455. deletedCorrelatedSequence5.ComparisonUnitArray1 = unknown.ComparisonUnitArray1;
  5456. deletedCorrelatedSequence5.ComparisonUnitArray2 = null;
  5457. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence5);
  5458. var insertedCorrelatedSequence5 = new CorrelatedSequence();
  5459. insertedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Inserted;
  5460. insertedCorrelatedSequence5.ComparisonUnitArray1 = null;
  5461. insertedCorrelatedSequence5.ComparisonUnitArray2 = unknown.ComparisonUnitArray2;
  5462. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence5);
  5463. return newListOfCorrelatedSequence;
  5464. }
  5465. }
  5466. }
  5467. var deletedCorrelatedSequence4 = new CorrelatedSequence();
  5468. deletedCorrelatedSequence4.CorrelationStatus = CorrelationStatus.Deleted;
  5469. deletedCorrelatedSequence4.ComparisonUnitArray1 = unknown.ComparisonUnitArray1;
  5470. deletedCorrelatedSequence4.ComparisonUnitArray2 = null;
  5471. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence4);
  5472. var insertedCorrelatedSequence4 = new CorrelatedSequence();
  5473. insertedCorrelatedSequence4.CorrelationStatus = CorrelationStatus.Inserted;
  5474. insertedCorrelatedSequence4.ComparisonUnitArray1 = null;
  5475. insertedCorrelatedSequence4.ComparisonUnitArray2 = unknown.ComparisonUnitArray2;
  5476. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence4);
  5477. return newListOfCorrelatedSequence;
  5478. }
  5479. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  5480. // here we have the longest common subsequence.
  5481. // but it may start in the middle of a paragraph.
  5482. // therefore need to dispose of the content from the beginning of the longest common subsequence to the beginning of the paragraph.
  5483. // this should be in a separate unknown region
  5484. // if countCommonAtEnd != 0, and if it contains a paragraph mark, then if there are comparison units in the same paragraph before the common at end (in either version)
  5485. // then we want to put all of those comparison units into a single unknown, where they must be resolved against each other. We don't want those
  5486. // comparison units to go into the middle unknown comparison unit.
  5487. int remainingInLeftParagraph = 0;
  5488. int remainingInRightParagraph = 0;
  5489. if (currentLongestCommonSequenceLength != 0)
  5490. {
  5491. var commonSeq = unknown
  5492. .ComparisonUnitArray1
  5493. .Skip(currentI1)
  5494. .Take(currentLongestCommonSequenceLength)
  5495. .ToList();
  5496. var firstOfCommonSeq = commonSeq.First();
  5497. if (firstOfCommonSeq is ComparisonUnitWord)
  5498. {
  5499. // are there any paragraph marks in the common seq at end?
  5500. if (commonSeq.Any(cu =>
  5501. {
  5502. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  5503. if (firstComparisonUnitAtom == null)
  5504. return false;
  5505. return firstComparisonUnitAtom.ContentElement.Name == W.pPr;
  5506. }))
  5507. {
  5508. remainingInLeftParagraph = unknown
  5509. .ComparisonUnitArray1
  5510. .Take(currentI1)
  5511. .Reverse()
  5512. .TakeWhile(cu =>
  5513. {
  5514. if (!(cu is ComparisonUnitWord))
  5515. return false;
  5516. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  5517. if (firstComparisonUnitAtom == null)
  5518. return true;
  5519. return firstComparisonUnitAtom.ContentElement.Name != W.pPr;
  5520. })
  5521. .Count();
  5522. remainingInRightParagraph = unknown
  5523. .ComparisonUnitArray2
  5524. .Take(currentI2)
  5525. .Reverse()
  5526. .TakeWhile(cu =>
  5527. {
  5528. if (!(cu is ComparisonUnitWord))
  5529. return false;
  5530. var firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  5531. if (firstComparisonUnitAtom == null)
  5532. return true;
  5533. return firstComparisonUnitAtom.ContentElement.Name != W.pPr;
  5534. })
  5535. .Count();
  5536. }
  5537. }
  5538. }
  5539. var countBeforeCurrentParagraphLeft = currentI1 - remainingInLeftParagraph;
  5540. var countBeforeCurrentParagraphRight = currentI2 - remainingInRightParagraph;
  5541. if (countBeforeCurrentParagraphLeft > 0 && countBeforeCurrentParagraphRight == 0)
  5542. {
  5543. var deletedCorrelatedSequence = new CorrelatedSequence();
  5544. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5545. deletedCorrelatedSequence.ComparisonUnitArray1 = cul1
  5546. .Take(countBeforeCurrentParagraphLeft)
  5547. .ToArray();
  5548. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5549. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5550. }
  5551. else if (countBeforeCurrentParagraphLeft == 0 && countBeforeCurrentParagraphRight > 0)
  5552. {
  5553. var insertedCorrelatedSequence = new CorrelatedSequence();
  5554. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5555. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5556. insertedCorrelatedSequence.ComparisonUnitArray2 = cul2
  5557. .Take(countBeforeCurrentParagraphRight)
  5558. .ToArray();
  5559. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5560. }
  5561. else if (countBeforeCurrentParagraphLeft > 0 && countBeforeCurrentParagraphRight > 0)
  5562. {
  5563. var unknownCorrelatedSequence = new CorrelatedSequence();
  5564. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5565. unknownCorrelatedSequence.ComparisonUnitArray1 = cul1
  5566. .Take(countBeforeCurrentParagraphLeft)
  5567. .ToArray();
  5568. unknownCorrelatedSequence.ComparisonUnitArray2 = cul2
  5569. .Take(countBeforeCurrentParagraphRight)
  5570. .ToArray();
  5571. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  5572. }
  5573. else if (countBeforeCurrentParagraphLeft == 0 && countBeforeCurrentParagraphRight == 0)
  5574. {
  5575. // nothing to do
  5576. }
  5577. if (remainingInLeftParagraph > 0 && remainingInRightParagraph == 0)
  5578. {
  5579. var deletedCorrelatedSequence = new CorrelatedSequence();
  5580. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5581. deletedCorrelatedSequence.ComparisonUnitArray1 = cul1
  5582. .Skip(countBeforeCurrentParagraphLeft)
  5583. .Take(remainingInLeftParagraph)
  5584. .ToArray();
  5585. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5586. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5587. }
  5588. else if (remainingInLeftParagraph == 0 && remainingInRightParagraph > 0)
  5589. {
  5590. var insertedCorrelatedSequence = new CorrelatedSequence();
  5591. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5592. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5593. insertedCorrelatedSequence.ComparisonUnitArray2 = cul2
  5594. .Skip(countBeforeCurrentParagraphRight)
  5595. .Take(remainingInRightParagraph)
  5596. .ToArray();
  5597. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5598. }
  5599. else if (remainingInLeftParagraph > 0 && remainingInRightParagraph > 0)
  5600. {
  5601. var unknownCorrelatedSequence = new CorrelatedSequence();
  5602. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5603. unknownCorrelatedSequence.ComparisonUnitArray1 = cul1
  5604. .Skip(countBeforeCurrentParagraphLeft)
  5605. .Take(remainingInLeftParagraph)
  5606. .ToArray();
  5607. unknownCorrelatedSequence.ComparisonUnitArray2 = cul2
  5608. .Skip(countBeforeCurrentParagraphRight)
  5609. .Take(remainingInRightParagraph)
  5610. .ToArray();
  5611. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  5612. }
  5613. else if (remainingInLeftParagraph == 0 && remainingInRightParagraph == 0)
  5614. {
  5615. // nothing to do
  5616. }
  5617. var middleEqual = new CorrelatedSequence();
  5618. middleEqual.CorrelationStatus = CorrelationStatus.Equal;
  5619. middleEqual.ComparisonUnitArray1 = cul1
  5620. .Skip(currentI1)
  5621. .Take(currentLongestCommonSequenceLength)
  5622. .ToArray();
  5623. middleEqual.ComparisonUnitArray2 = cul2
  5624. .Skip(currentI2)
  5625. .Take(currentLongestCommonSequenceLength)
  5626. .ToArray();
  5627. newListOfCorrelatedSequence.Add(middleEqual);
  5628. int endI1 = currentI1 + currentLongestCommonSequenceLength;
  5629. int endI2 = currentI2 + currentLongestCommonSequenceLength;
  5630. var remaining1 = cul1
  5631. .Skip(endI1)
  5632. .ToArray();
  5633. var remaining2 = cul2
  5634. .Skip(endI2)
  5635. .ToArray();
  5636. // here is the point that we want to make a new unknown from this point to the end of the paragraph that contains the equal parts.
  5637. // this will never hurt anything, and will in many cases result in a better difference.
  5638. var leftCuw = middleEqual.ComparisonUnitArray1[middleEqual.ComparisonUnitArray1.Length - 1] as ComparisonUnitWord;
  5639. if (leftCuw != null)
  5640. {
  5641. var lastContentAtom = leftCuw.DescendantContentAtoms().LastOrDefault();
  5642. // if the middleEqual did not end with a paragraph mark
  5643. if (lastContentAtom != null && lastContentAtom.ContentElement.Name != W.pPr)
  5644. {
  5645. int idx1 = FindIndexOfNextParaMark(remaining1);
  5646. int idx2 = FindIndexOfNextParaMark(remaining2);
  5647. var unknownCorrelatedSequenceRemaining = new CorrelatedSequence();
  5648. unknownCorrelatedSequenceRemaining.CorrelationStatus = CorrelationStatus.Unknown;
  5649. unknownCorrelatedSequenceRemaining.ComparisonUnitArray1 = remaining1.Take(idx1).ToArray();
  5650. unknownCorrelatedSequenceRemaining.ComparisonUnitArray2 = remaining2.Take(idx2).ToArray();
  5651. newListOfCorrelatedSequence.Add(unknownCorrelatedSequenceRemaining);
  5652. var unknownCorrelatedSequenceAfter = new CorrelatedSequence();
  5653. unknownCorrelatedSequenceAfter.CorrelationStatus = CorrelationStatus.Unknown;
  5654. unknownCorrelatedSequenceAfter.ComparisonUnitArray1 = remaining1.Skip(idx1).ToArray();
  5655. unknownCorrelatedSequenceAfter.ComparisonUnitArray2 = remaining2.Skip(idx2).ToArray();
  5656. newListOfCorrelatedSequence.Add(unknownCorrelatedSequenceAfter);
  5657. return newListOfCorrelatedSequence;
  5658. }
  5659. }
  5660. var unknownCorrelatedSequence20 = new CorrelatedSequence();
  5661. unknownCorrelatedSequence20.CorrelationStatus = CorrelationStatus.Unknown;
  5662. unknownCorrelatedSequence20.ComparisonUnitArray1 = remaining1;
  5663. unknownCorrelatedSequence20.ComparisonUnitArray2 = remaining2;
  5664. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence20);
  5665. return newListOfCorrelatedSequence;
  5666. }
  5667. private static int FindIndexOfNextParaMark(ComparisonUnit[] cul)
  5668. {
  5669. for (int i = 0; i < cul.Length; i++)
  5670. {
  5671. var cuw = cul[i] as ComparisonUnitWord;
  5672. var lastAtom = cuw.DescendantContentAtoms().LastOrDefault();
  5673. if (lastAtom.ContentElement.Name == W.pPr)
  5674. return i;
  5675. }
  5676. return cul.Length;
  5677. }
  5678. private static List<CorrelatedSequence> DoLcsAlgorithmForTable(CorrelatedSequence unknown, WmlComparerSettings settings)
  5679. {
  5680. List<CorrelatedSequence> newListOfCorrelatedSequence = new List<CorrelatedSequence>();
  5681. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  5682. // if we have a table with the same number of rows, and all rows have equal CorrelatedSHA1Hash, then we can flatten and compare every corresponding row.
  5683. // This is true regardless of whether there are horizontally or vertically merged cells, since that characteristic is incorporated into the CorrespondingSHA1Hash.
  5684. // This is probably not very common, but it will never do any harm.
  5685. var tblGroup1 = unknown.ComparisonUnitArray1.First() as ComparisonUnitGroup;
  5686. var tblGroup2 = unknown.ComparisonUnitArray2.First() as ComparisonUnitGroup;
  5687. if (tblGroup1.Contents.Count() == tblGroup2.Contents.Count()) // if there are the same number of rows
  5688. {
  5689. var zipped = tblGroup1.Contents.Zip(tblGroup2.Contents, (r1, r2) => new
  5690. {
  5691. Row1 = r1 as ComparisonUnitGroup,
  5692. Row2 = r2 as ComparisonUnitGroup,
  5693. });
  5694. var canCollapse = true;
  5695. if (zipped.Any(z => z.Row1.CorrelatedSHA1Hash != z.Row2.CorrelatedSHA1Hash))
  5696. canCollapse = false;
  5697. if (canCollapse)
  5698. {
  5699. newListOfCorrelatedSequence = zipped
  5700. .Select(z =>
  5701. {
  5702. var unknownCorrelatedSequence = new CorrelatedSequence();
  5703. unknownCorrelatedSequence.ComparisonUnitArray1 = new[] { z.Row1 };
  5704. unknownCorrelatedSequence.ComparisonUnitArray2 = new[] { z.Row2 };
  5705. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5706. return unknownCorrelatedSequence;
  5707. })
  5708. .ToList();
  5709. return newListOfCorrelatedSequence;
  5710. }
  5711. }
  5712. var firstContentAtom1 = tblGroup1.DescendantContentAtoms().FirstOrDefault();
  5713. if (firstContentAtom1 == null)
  5714. throw new OpenXmlPowerToolsException("Internal error");
  5715. var tblElement1 = firstContentAtom1
  5716. .AncestorElements
  5717. .Reverse()
  5718. .FirstOrDefault(a => a.Name == W.tbl);
  5719. var firstContentAtom2 = tblGroup2.DescendantContentAtoms().FirstOrDefault();
  5720. if (firstContentAtom2 == null)
  5721. throw new OpenXmlPowerToolsException("Internal error");
  5722. var tblElement2 = firstContentAtom2
  5723. .AncestorElements
  5724. .Reverse()
  5725. .FirstOrDefault(a => a.Name == W.tbl);
  5726. var leftContainsMerged = tblElement1
  5727. .Descendants()
  5728. .Any(d => d.Name == W.vMerge || d.Name == W.gridSpan);
  5729. var rightContainsMerged = tblElement2
  5730. .Descendants()
  5731. .Any(d => d.Name == W.vMerge || d.Name == W.gridSpan);
  5732. if (leftContainsMerged || rightContainsMerged)
  5733. {
  5734. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  5735. // If StructureSha1Hash is the same for both tables, then we know that the structure of the tables is identical, so we can break into correlated sequences for rows.
  5736. if (tblGroup1.StructureSHA1Hash != null &&
  5737. tblGroup2.StructureSHA1Hash != null &&
  5738. tblGroup1.StructureSHA1Hash == tblGroup2.StructureSHA1Hash)
  5739. {
  5740. var zipped = tblGroup1.Contents.Zip(tblGroup2.Contents, (r1, r2) => new
  5741. {
  5742. Row1 = r1 as ComparisonUnitGroup,
  5743. Row2 = r2 as ComparisonUnitGroup,
  5744. });
  5745. newListOfCorrelatedSequence = zipped
  5746. .Select(z =>
  5747. {
  5748. var unknownCorrelatedSequence = new CorrelatedSequence();
  5749. unknownCorrelatedSequence.ComparisonUnitArray1 = new[] { z.Row1 };
  5750. unknownCorrelatedSequence.ComparisonUnitArray2 = new[] { z.Row2 };
  5751. unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown;
  5752. return unknownCorrelatedSequence;
  5753. })
  5754. .ToList();
  5755. return newListOfCorrelatedSequence;
  5756. }
  5757. // otherwise flatten to rows
  5758. var deletedCorrelatedSequence = new CorrelatedSequence();
  5759. deletedCorrelatedSequence.ComparisonUnitArray1 = unknown
  5760. .ComparisonUnitArray1
  5761. .Select(z => z.Contents)
  5762. .SelectMany(m => m)
  5763. .ToArray();
  5764. deletedCorrelatedSequence.ComparisonUnitArray2 = null;
  5765. deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted;
  5766. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  5767. var insertedCorrelatedSequence = new CorrelatedSequence();
  5768. insertedCorrelatedSequence.ComparisonUnitArray1 = null;
  5769. insertedCorrelatedSequence.ComparisonUnitArray2 = unknown
  5770. .ComparisonUnitArray2
  5771. .Select(z => z.Contents)
  5772. .SelectMany(m => m)
  5773. .ToArray();
  5774. insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted;
  5775. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  5776. return newListOfCorrelatedSequence;
  5777. }
  5778. return null;
  5779. }
  5780. private static XName[] WordBreakElements = new XName[] {
  5781. W.pPr,
  5782. W.tab,
  5783. W.br,
  5784. W.continuationSeparator,
  5785. W.cr,
  5786. W.dayLong,
  5787. W.dayShort,
  5788. W.drawing,
  5789. W.pict,
  5790. W.endnoteRef,
  5791. W.footnoteRef,
  5792. W.monthLong,
  5793. W.monthShort,
  5794. W.noBreakHyphen,
  5795. W._object,
  5796. W.ptab,
  5797. W.separator,
  5798. W.sym,
  5799. W.yearLong,
  5800. W.yearShort,
  5801. M.oMathPara,
  5802. M.oMath,
  5803. W.footnoteReference,
  5804. W.endnoteReference,
  5805. };
  5806. private class Atgbw
  5807. {
  5808. public int? Key;
  5809. public ComparisonUnitAtom ComparisonUnitAtomMember;
  5810. public int NextIndex;
  5811. }
  5812. private static ComparisonUnit[] GetComparisonUnitList(ComparisonUnitAtom[] comparisonUnitAtomList, WmlComparerSettings settings)
  5813. {
  5814. var seed = new Atgbw()
  5815. {
  5816. Key = null,
  5817. ComparisonUnitAtomMember = null,
  5818. NextIndex = 0,
  5819. };
  5820. var groupingKey = comparisonUnitAtomList
  5821. .Rollup(seed, (sr, prevAtgbw, i) =>
  5822. {
  5823. int? key = null;
  5824. var nextIndex = prevAtgbw.NextIndex;
  5825. if (sr.ContentElement.Name == W.t)
  5826. {
  5827. string chr = sr.ContentElement.Value;
  5828. var ch = chr[0];
  5829. if (ch == '.' || ch == ',')
  5830. {
  5831. bool beforeIsDigit = false;
  5832. if (i > 0)
  5833. {
  5834. var prev = comparisonUnitAtomList[i - 1];
  5835. if (prev.ContentElement.Name == W.t && char.IsDigit(prev.ContentElement.Value[0]))
  5836. beforeIsDigit = true;
  5837. }
  5838. bool afterIsDigit = false;
  5839. if (i < comparisonUnitAtomList.Length - 1)
  5840. {
  5841. var next = comparisonUnitAtomList[i + 1];
  5842. if (next.ContentElement.Name == W.t && char.IsDigit(next.ContentElement.Value[0]))
  5843. afterIsDigit = true;
  5844. }
  5845. if (beforeIsDigit || afterIsDigit)
  5846. {
  5847. key = nextIndex;
  5848. }
  5849. else
  5850. {
  5851. nextIndex++;
  5852. key = nextIndex;
  5853. nextIndex++;
  5854. }
  5855. }
  5856. else if (((int)ch >= 0x4e00 && (int)ch <= 0x9fff) || settings.WordSeparators.Contains(ch))
  5857. {
  5858. nextIndex++;
  5859. key = nextIndex;
  5860. nextIndex++;
  5861. }
  5862. else
  5863. {
  5864. key = nextIndex;
  5865. }
  5866. }
  5867. else if (WordBreakElements.Contains(sr.ContentElement.Name))
  5868. {
  5869. nextIndex++;
  5870. key = nextIndex;
  5871. nextIndex++;
  5872. }
  5873. else
  5874. {
  5875. key = nextIndex;
  5876. }
  5877. return new Atgbw()
  5878. {
  5879. Key = key,
  5880. ComparisonUnitAtomMember = sr,
  5881. NextIndex = nextIndex,
  5882. };
  5883. });
  5884. if (s_False)
  5885. {
  5886. var sb = new StringBuilder();
  5887. foreach (var item in groupingKey)
  5888. {
  5889. sb.Append(item.Key + Environment.NewLine);
  5890. sb.Append(" " + item.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
  5891. }
  5892. var sbs = sb.ToString();
  5893. DocxComparerUtil.NotePad(sbs);
  5894. }
  5895. var groupedByWords = groupingKey
  5896. .GroupAdjacent(gc => gc.Key);
  5897. if (s_False)
  5898. {
  5899. var sb = new StringBuilder();
  5900. foreach (var group in groupedByWords)
  5901. {
  5902. sb.Append("Group ===== " + group.Key + Environment.NewLine);
  5903. foreach (var gc in group)
  5904. {
  5905. sb.Append(" " + gc.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
  5906. }
  5907. }
  5908. var sbs = sb.ToString();
  5909. DocxComparerUtil.NotePad(sbs);
  5910. }
  5911. var withHierarchicalGroupingKey = groupedByWords
  5912. .Select(g =>
  5913. {
  5914. var hierarchicalGroupingArray = g
  5915. .First()
  5916. .ComparisonUnitAtomMember
  5917. .AncestorElements
  5918. .Where(a => ComparisonGroupingElements.Contains(a.Name))
  5919. .Select(a => a.Name.LocalName + ":" + (string)a.Attribute(PtOpenXml.Unid))
  5920. .ToArray();
  5921. return new WithHierarchicalGroupingKey()
  5922. {
  5923. ComparisonUnitWord = new ComparisonUnitWord(g.Select(gc => gc.ComparisonUnitAtomMember)),
  5924. HierarchicalGroupingArray = hierarchicalGroupingArray,
  5925. };
  5926. }
  5927. )
  5928. .ToArray();
  5929. if (s_False)
  5930. {
  5931. var sb = new StringBuilder();
  5932. foreach (var group in withHierarchicalGroupingKey)
  5933. {
  5934. sb.Append("Grouping Array: " + group.HierarchicalGroupingArray.Select(gam => gam + " - ").StringConcatenate() + Environment.NewLine);
  5935. foreach (var gc in group.ComparisonUnitWord.Contents)
  5936. {
  5937. sb.Append(" " + gc.ToString(0) + Environment.NewLine);
  5938. }
  5939. }
  5940. var sbs = sb.ToString();
  5941. DocxComparerUtil.NotePad(sbs);
  5942. }
  5943. var cul = GetHierarchicalComparisonUnits(withHierarchicalGroupingKey, 0).ToArray();
  5944. if (s_False)
  5945. {
  5946. var str = ComparisonUnit.ComparisonUnitListToString(cul);
  5947. DocxComparerUtil.NotePad(str);
  5948. }
  5949. return cul;
  5950. }
  5951. private static IEnumerable<ComparisonUnit> GetHierarchicalComparisonUnits(IEnumerable<WithHierarchicalGroupingKey> input, int level)
  5952. {
  5953. var grouped = input
  5954. .GroupAdjacent(whgk =>
  5955. {
  5956. if (level >= whgk.HierarchicalGroupingArray.Length)
  5957. return "";
  5958. return whgk.HierarchicalGroupingArray[level];
  5959. });
  5960. var retList = grouped
  5961. .Select(gc =>
  5962. {
  5963. if (gc.Key == "")
  5964. {
  5965. return (IEnumerable<ComparisonUnit>)gc.Select(whgk => whgk.ComparisonUnitWord).ToList();
  5966. }
  5967. else
  5968. {
  5969. ComparisonUnitGroupType? group = null;
  5970. var spl = gc.Key.Split(':');
  5971. if (spl[0] == "p")
  5972. group = ComparisonUnitGroupType.Paragraph;
  5973. else if (spl[0] == "tbl")
  5974. group = ComparisonUnitGroupType.Table;
  5975. else if (spl[0] == "tr")
  5976. group = ComparisonUnitGroupType.Row;
  5977. else if (spl[0] == "tc")
  5978. group = ComparisonUnitGroupType.Cell;
  5979. else if (spl[0] == "txbxContent")
  5980. group = ComparisonUnitGroupType.Textbox;
  5981. var childHierarchicalComparisonUnits = GetHierarchicalComparisonUnits(gc, level + 1);
  5982. var newCompUnitGroup = new ComparisonUnitGroup(childHierarchicalComparisonUnits, (ComparisonUnitGroupType)group, level);
  5983. return new[] { newCompUnitGroup };
  5984. }
  5985. })
  5986. .SelectMany(m => m)
  5987. .ToList();
  5988. return retList;
  5989. }
  5990. private static XName[] AllowableRunChildren = new XName[] {
  5991. W.br,
  5992. W.drawing,
  5993. W.cr,
  5994. W.dayLong,
  5995. W.dayShort,
  5996. W.footnoteReference,
  5997. W.endnoteReference,
  5998. W.monthLong,
  5999. W.monthShort,
  6000. W.noBreakHyphen,
  6001. //W._object,
  6002. W.pgNum,
  6003. W.ptab,
  6004. W.softHyphen,
  6005. W.sym,
  6006. W.tab,
  6007. W.yearLong,
  6008. W.yearShort,
  6009. M.oMathPara,
  6010. M.oMath,
  6011. W.fldChar,
  6012. W.instrText,
  6013. };
  6014. private static XName[] ElementsToThrowAway = new XName[] {
  6015. W.bookmarkStart,
  6016. W.bookmarkEnd,
  6017. W.commentRangeStart,
  6018. W.commentRangeEnd,
  6019. W.lastRenderedPageBreak,
  6020. W.proofErr,
  6021. W.tblPr,
  6022. W.sectPr,
  6023. W.permEnd,
  6024. W.permStart,
  6025. W.footnoteRef,
  6026. W.endnoteRef,
  6027. W.separator,
  6028. W.continuationSeparator,
  6029. };
  6030. private static XName[] ElementsToHaveSha1Hash = new XName[]
  6031. {
  6032. W.p,
  6033. W.tbl,
  6034. W.tr,
  6035. W.tc,
  6036. W.drawing,
  6037. W.pict,
  6038. W.txbxContent,
  6039. };
  6040. private static XName[] InvalidElements = new XName[]
  6041. {
  6042. W.altChunk,
  6043. W.customXml,
  6044. W.customXmlDelRangeEnd,
  6045. W.customXmlDelRangeStart,
  6046. W.customXmlInsRangeEnd,
  6047. W.customXmlInsRangeStart,
  6048. W.customXmlMoveFromRangeEnd,
  6049. W.customXmlMoveFromRangeStart,
  6050. W.customXmlMoveToRangeEnd,
  6051. W.customXmlMoveToRangeStart,
  6052. W.moveFrom,
  6053. W.moveFromRangeStart,
  6054. W.moveFromRangeEnd,
  6055. W.moveTo,
  6056. W.moveToRangeStart,
  6057. W.moveToRangeEnd,
  6058. W.subDoc,
  6059. };
  6060. private class RecursionInfo
  6061. {
  6062. public XName ElementName;
  6063. public XName[] ChildElementPropertyNames;
  6064. }
  6065. private static RecursionInfo[] RecursionElements = new RecursionInfo[]
  6066. {
  6067. new RecursionInfo()
  6068. {
  6069. ElementName = W.del,
  6070. ChildElementPropertyNames = null,
  6071. },
  6072. new RecursionInfo()
  6073. {
  6074. ElementName = W.ins,
  6075. ChildElementPropertyNames = null,
  6076. },
  6077. new RecursionInfo()
  6078. {
  6079. ElementName = W.tbl,
  6080. ChildElementPropertyNames = new[] { W.tblPr, W.tblGrid, W.tblPrEx },
  6081. },
  6082. new RecursionInfo()
  6083. {
  6084. ElementName = W.tr,
  6085. ChildElementPropertyNames = new[] { W.trPr, W.tblPrEx },
  6086. },
  6087. new RecursionInfo()
  6088. {
  6089. ElementName = W.tc,
  6090. ChildElementPropertyNames = new[] { W.tcPr, W.tblPrEx },
  6091. },
  6092. new RecursionInfo()
  6093. {
  6094. ElementName = W.pict,
  6095. ChildElementPropertyNames = new[] { VML.shapetype },
  6096. },
  6097. new RecursionInfo()
  6098. {
  6099. ElementName = VML.group,
  6100. ChildElementPropertyNames = null,
  6101. },
  6102. new RecursionInfo()
  6103. {
  6104. ElementName = VML.shape,
  6105. ChildElementPropertyNames = null,
  6106. },
  6107. new RecursionInfo()
  6108. {
  6109. ElementName = VML.rect,
  6110. ChildElementPropertyNames = null,
  6111. },
  6112. new RecursionInfo()
  6113. {
  6114. ElementName = VML.textbox,
  6115. ChildElementPropertyNames = null,
  6116. },
  6117. new RecursionInfo()
  6118. {
  6119. ElementName = O._lock,
  6120. ChildElementPropertyNames = null,
  6121. },
  6122. new RecursionInfo()
  6123. {
  6124. ElementName = W.txbxContent,
  6125. ChildElementPropertyNames = null,
  6126. },
  6127. new RecursionInfo()
  6128. {
  6129. ElementName = W10.wrap,
  6130. ChildElementPropertyNames = null,
  6131. },
  6132. new RecursionInfo()
  6133. {
  6134. ElementName = W.sdt,
  6135. ChildElementPropertyNames = new[] { W.sdtPr, W.sdtEndPr },
  6136. },
  6137. new RecursionInfo()
  6138. {
  6139. ElementName = W.sdtContent,
  6140. ChildElementPropertyNames = null,
  6141. },
  6142. new RecursionInfo()
  6143. {
  6144. ElementName = W.hyperlink,
  6145. ChildElementPropertyNames = null,
  6146. },
  6147. new RecursionInfo()
  6148. {
  6149. ElementName = W.fldSimple,
  6150. ChildElementPropertyNames = null,
  6151. },
  6152. new RecursionInfo()
  6153. {
  6154. ElementName = VML.shapetype,
  6155. ChildElementPropertyNames = null,
  6156. },
  6157. new RecursionInfo()
  6158. {
  6159. ElementName = W.smartTag,
  6160. ChildElementPropertyNames = new[] { W.smartTagPr },
  6161. },
  6162. new RecursionInfo()
  6163. {
  6164. ElementName = W.ruby,
  6165. ChildElementPropertyNames = new[] { W.rubyPr },
  6166. },
  6167. };
  6168. internal static ComparisonUnitAtom[] CreateComparisonUnitAtomList(OpenXmlPart part, XElement contentParent, WmlComparerSettings settings)
  6169. {
  6170. VerifyNoInvalidContent(contentParent);
  6171. AssignUnidToAllElements(contentParent); // add the Guid id to every element
  6172. MoveLastSectPrIntoLastParagraph(contentParent);
  6173. var cal = CreateComparisonUnitAtomListInternal(part, contentParent, settings).ToArray();
  6174. if (s_False)
  6175. {
  6176. var sb = new StringBuilder();
  6177. foreach (var item in cal)
  6178. sb.Append(item.ToString() + Environment.NewLine);
  6179. var sbs = sb.ToString();
  6180. DocxComparerUtil.NotePad(sbs);
  6181. }
  6182. return cal;
  6183. }
  6184. private static void VerifyNoInvalidContent(XElement contentParent)
  6185. {
  6186. var invalidElement = contentParent.Descendants().FirstOrDefault(d => InvalidElements.Contains(d.Name));
  6187. if (invalidElement == null)
  6188. return;
  6189. throw new NotSupportedException("Document contains " + invalidElement.Name.LocalName);
  6190. }
  6191. internal static XDocument Coalesce(ComparisonUnitAtom[] comparisonUnitAtomList)
  6192. {
  6193. XDocument newXDoc = new XDocument();
  6194. var newBodyChildren = CoalesceRecurse(comparisonUnitAtomList, 0);
  6195. newXDoc.Add(new XElement(W.document,
  6196. new XAttribute(XNamespace.Xmlns + "w", W.w.NamespaceName),
  6197. new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName),
  6198. new XElement(W.body, newBodyChildren)));
  6199. // little bit of cleanup
  6200. MoveLastSectPrToChildOfBody(newXDoc);
  6201. XElement newXDoc2Root = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(newXDoc.Root);
  6202. newXDoc.Root.ReplaceWith(newXDoc2Root);
  6203. return newXDoc;
  6204. }
  6205. private static object CoalesceRecurse(IEnumerable<ComparisonUnitAtom> list, int level)
  6206. {
  6207. var grouped = list
  6208. .GroupBy(sr =>
  6209. {
  6210. // per the algorithm, The following condition will never evaluate to true
  6211. // if it evaluates to true, then the basic mechanism for breaking a hierarchical structure into flat and back is broken.
  6212. // for a table, we initially get all ComparisonUnitAtoms for the entire table, then process. When processing a row,
  6213. // no ComparisonUnitAtoms will have ancestors outside the row. Ditto for cells, and on down the tree.
  6214. if (level >= sr.AncestorElements.Length)
  6215. throw new OpenXmlPowerToolsException("Internal error 4 - why do we have ComparisonUnitAtom objects with fewer ancestors than its siblings?");
  6216. var unid = (string)sr.AncestorElements[level].Attribute(PtOpenXml.Unid);
  6217. return unid;
  6218. });
  6219. if (s_False)
  6220. {
  6221. var sb = new StringBuilder();
  6222. foreach (var group in grouped)
  6223. {
  6224. sb.AppendFormat("Group Key: {0}", group.Key);
  6225. sb.Append(Environment.NewLine);
  6226. foreach (var groupChildItem in group)
  6227. {
  6228. sb.Append(" ");
  6229. sb.Append(groupChildItem.ToString(0));
  6230. sb.Append(Environment.NewLine);
  6231. }
  6232. sb.Append(Environment.NewLine);
  6233. }
  6234. var sbs = sb.ToString();
  6235. }
  6236. var elementList = grouped
  6237. .Select(g =>
  6238. {
  6239. // see the comment above at the beginning of CoalesceRecurse
  6240. if (level >= g.First().AncestorElements.Length)
  6241. throw new OpenXmlPowerToolsException("Internal error 3 - why do we have ComparisonUnitAtom objects with fewer ancestors than its siblings?");
  6242. var ancestorBeingConstructed = g.First().AncestorElements[level];
  6243. if (ancestorBeingConstructed.Name == W.p)
  6244. {
  6245. var groupedChildren = g
  6246. .GroupAdjacent(gc => gc.ContentElement.Name.ToString());
  6247. var newChildElements = groupedChildren
  6248. .Where(gc => gc.First().ContentElement.Name != W.pPr)
  6249. .Select(gc =>
  6250. {
  6251. return CoalesceRecurse(gc, level + 1);
  6252. });
  6253. var newParaProps = groupedChildren
  6254. .Where(gc => gc.First().ContentElement.Name == W.pPr)
  6255. .Select(gc => gc.Select(gce => gce.ContentElement));
  6256. return new XElement(W.p,
  6257. ancestorBeingConstructed.Attributes(),
  6258. newParaProps, newChildElements);
  6259. }
  6260. if (ancestorBeingConstructed.Name == W.r)
  6261. {
  6262. var groupedChildren = g
  6263. .GroupAdjacent(gc => gc.ContentElement.Name.ToString());
  6264. var newChildElements = groupedChildren
  6265. .Select(gc =>
  6266. {
  6267. var name = gc.First().ContentElement.Name;
  6268. if (name == W.t || name == W.delText)
  6269. {
  6270. var textOfTextElement = gc.Select(gce => gce.ContentElement.Value).StringConcatenate();
  6271. return (object)(new XElement(name,
  6272. GetXmlSpaceAttribute(textOfTextElement),
  6273. textOfTextElement));
  6274. }
  6275. else
  6276. return gc.Select(gce => gce.ContentElement);
  6277. });
  6278. var runProps = ancestorBeingConstructed.Elements(W.rPr);
  6279. return new XElement(W.r, runProps, newChildElements);
  6280. }
  6281. var re = RecursionElements.FirstOrDefault(z => z.ElementName == ancestorBeingConstructed.Name);
  6282. if (re != null)
  6283. {
  6284. return ReconstructElement(g, ancestorBeingConstructed, re.ChildElementPropertyNames, level);
  6285. }
  6286. var newElement = new XElement(ancestorBeingConstructed.Name,
  6287. ancestorBeingConstructed.Attributes(),
  6288. CoalesceRecurse(g, level + 1));
  6289. return newElement;
  6290. })
  6291. .ToList();
  6292. return elementList;
  6293. }
  6294. private static XElement ReconstructElement(IGrouping<string, ComparisonUnitAtom> g, XElement ancestorBeingConstructed, XName[] childPropElementNames, int level)
  6295. {
  6296. var newChildElements = CoalesceRecurse(g, level + 1);
  6297. IEnumerable<XElement> childProps = null;
  6298. if (childPropElementNames != null)
  6299. childProps = ancestorBeingConstructed.Elements()
  6300. .Where(a => childPropElementNames.Contains(a.Name));
  6301. var reconstructedElement = new XElement(ancestorBeingConstructed.Name, childProps, newChildElements);
  6302. return reconstructedElement;
  6303. }
  6304. private static void MoveLastSectPrIntoLastParagraph(XElement contentParent)
  6305. {
  6306. var lastSectPrList = contentParent.Elements(W.sectPr).ToList();
  6307. if (lastSectPrList.Count() > 1)
  6308. throw new OpenXmlPowerToolsException("Invalid document");
  6309. var lastSectPr = lastSectPrList.FirstOrDefault();
  6310. if (lastSectPr != null)
  6311. {
  6312. var lastParagraph = contentParent.Elements(W.p).LastOrDefault();
  6313. if (lastParagraph == null)
  6314. lastParagraph = contentParent.Descendants(W.p).LastOrDefault();
  6315. var pPr = lastParagraph.Element(W.pPr);
  6316. if (pPr == null)
  6317. {
  6318. pPr = new XElement(W.pPr);
  6319. lastParagraph.AddFirst(W.pPr);
  6320. }
  6321. pPr.Add(lastSectPr);
  6322. contentParent.Elements(W.sectPr).Remove();
  6323. }
  6324. }
  6325. private static List<ComparisonUnitAtom> CreateComparisonUnitAtomListInternal(OpenXmlPart part, XElement contentParent, WmlComparerSettings settings)
  6326. {
  6327. var comparisonUnitAtomList = new List<ComparisonUnitAtom>();
  6328. CreateComparisonUnitAtomListRecurse(part, contentParent, comparisonUnitAtomList, settings);
  6329. return comparisonUnitAtomList;
  6330. }
  6331. private static XName[] ComparisonGroupingElements = new[] {
  6332. W.p,
  6333. W.tbl,
  6334. W.tr,
  6335. W.tc,
  6336. W.txbxContent,
  6337. };
  6338. private static void CreateComparisonUnitAtomListRecurse(OpenXmlPart part, XElement element, List<ComparisonUnitAtom> comparisonUnitAtomList, WmlComparerSettings settings)
  6339. {
  6340. if (element.Name == W.body || element.Name == W.footnote || element.Name == W.endnote)
  6341. {
  6342. foreach (var item in element.Elements())
  6343. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  6344. return;
  6345. }
  6346. if (element.Name == W.p)
  6347. {
  6348. var paraChildrenToProcess = element
  6349. .Elements()
  6350. .Where(e => e.Name != W.pPr);
  6351. foreach (var item in paraChildrenToProcess)
  6352. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  6353. var paraProps = element.Element(W.pPr);
  6354. if (paraProps == null)
  6355. {
  6356. ComparisonUnitAtom pPrComparisonUnitAtom = new ComparisonUnitAtom(
  6357. new XElement(W.pPr),
  6358. element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(),
  6359. part,
  6360. settings);
  6361. comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
  6362. }
  6363. else
  6364. {
  6365. ComparisonUnitAtom pPrComparisonUnitAtom = new ComparisonUnitAtom(
  6366. paraProps,
  6367. element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(),
  6368. part,
  6369. settings);
  6370. comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
  6371. }
  6372. return;
  6373. }
  6374. if (element.Name == W.r)
  6375. {
  6376. var runChildrenToProcess = element
  6377. .Elements()
  6378. .Where(e => e.Name != W.rPr);
  6379. foreach (var item in runChildrenToProcess)
  6380. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  6381. return;
  6382. }
  6383. if (element.Name == W.t || element.Name == W.delText)
  6384. {
  6385. var val = element.Value;
  6386. foreach (var ch in val)
  6387. {
  6388. ComparisonUnitAtom sr = new ComparisonUnitAtom(
  6389. new XElement(element.Name, ch),
  6390. element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(),
  6391. part,
  6392. settings);
  6393. comparisonUnitAtomList.Add(sr);
  6394. }
  6395. return;
  6396. }
  6397. if (AllowableRunChildren.Contains(element.Name) || element.Name == W._object)
  6398. {
  6399. ComparisonUnitAtom sr3 = new ComparisonUnitAtom(
  6400. element,
  6401. element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(),
  6402. part,
  6403. settings);
  6404. comparisonUnitAtomList.Add(sr3);
  6405. return;
  6406. }
  6407. var re = RecursionElements.FirstOrDefault(z => z.ElementName == element.Name);
  6408. if (re != null)
  6409. {
  6410. AnnotateElementWithProps(part, element, comparisonUnitAtomList, re.ChildElementPropertyNames, settings);
  6411. return;
  6412. }
  6413. if (ElementsToThrowAway.Contains(element.Name))
  6414. return;
  6415. AnnotateElementWithProps(part, element, comparisonUnitAtomList, null, settings);
  6416. }
  6417. private static void AnnotateElementWithProps(OpenXmlPart part, XElement element, List<ComparisonUnitAtom> comparisonUnitAtomList, XName[] childElementPropertyNames, WmlComparerSettings settings)
  6418. {
  6419. IEnumerable<XElement> runChildrenToProcess = null;
  6420. if (childElementPropertyNames == null)
  6421. runChildrenToProcess = element.Elements();
  6422. else
  6423. runChildrenToProcess = element
  6424. .Elements()
  6425. .Where(e => !childElementPropertyNames.Contains(e.Name));
  6426. foreach (var item in runChildrenToProcess)
  6427. CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
  6428. }
  6429. private static void AssignUnidToAllElements(XElement contentParent)
  6430. {
  6431. var content = contentParent.Descendants();
  6432. foreach (var d in content)
  6433. {
  6434. if (d.Attribute(PtOpenXml.Unid) == null)
  6435. {
  6436. string unid = Guid.NewGuid().ToString().Replace("-", "");
  6437. var newAtt = new XAttribute(PtOpenXml.Unid, unid);
  6438. d.Add(newAtt);
  6439. }
  6440. }
  6441. }
  6442. }
  6443. internal class WithHierarchicalGroupingKey
  6444. {
  6445. public string[] HierarchicalGroupingArray;
  6446. public ComparisonUnitWord ComparisonUnitWord;
  6447. }
  6448. public abstract class ComparisonUnit
  6449. {
  6450. public List<ComparisonUnit> Contents;
  6451. public string SHA1Hash;
  6452. public CorrelationStatus CorrelationStatus;
  6453. public IEnumerable<ComparisonUnit> Descendants()
  6454. {
  6455. List<ComparisonUnit> comparisonUnitList = new List<ComparisonUnit>();
  6456. DescendantsInternal(this, comparisonUnitList);
  6457. return comparisonUnitList;
  6458. }
  6459. public IEnumerable<ComparisonUnitAtom> DescendantContentAtoms()
  6460. {
  6461. return Descendants().OfType<ComparisonUnitAtom>();
  6462. }
  6463. private int? m_DescendantContentAtomsCount = null;
  6464. public int DescendantContentAtomsCount
  6465. {
  6466. get
  6467. {
  6468. if (m_DescendantContentAtomsCount != null)
  6469. return (int)m_DescendantContentAtomsCount;
  6470. m_DescendantContentAtomsCount = this.DescendantContentAtoms().Count();
  6471. return (int)m_DescendantContentAtomsCount;
  6472. }
  6473. }
  6474. private void DescendantsInternal(ComparisonUnit comparisonUnit, List<ComparisonUnit> comparisonUnitList)
  6475. {
  6476. foreach (var cu in comparisonUnit.Contents)
  6477. {
  6478. comparisonUnitList.Add(cu);
  6479. if (cu.Contents != null && cu.Contents.Any())
  6480. DescendantsInternal(cu, comparisonUnitList);
  6481. }
  6482. }
  6483. public abstract string ToString(int indent);
  6484. internal static string ComparisonUnitListToString(ComparisonUnit[] cul)
  6485. {
  6486. var sb = new StringBuilder();
  6487. sb.Append("Dump Comparision Unit List To String" + Environment.NewLine);
  6488. foreach (var item in cul)
  6489. {
  6490. sb.Append(item.ToString(2) + Environment.NewLine);
  6491. }
  6492. return sb.ToString();
  6493. }
  6494. }
  6495. internal class ComparisonUnitWord : ComparisonUnit
  6496. {
  6497. public ComparisonUnitWord(IEnumerable<ComparisonUnitAtom> comparisonUnitAtomList)
  6498. {
  6499. Contents = comparisonUnitAtomList.OfType<ComparisonUnit>().ToList();
  6500. var sha1String = Contents
  6501. .Select(c => c.SHA1Hash)
  6502. .StringConcatenate();
  6503. SHA1Hash = PtUtils.SHA1HashStringForUTF8String(sha1String);
  6504. }
  6505. public static XName[] s_ElementsWithRelationshipIds = new XName[] {
  6506. A.blip,
  6507. A.hlinkClick,
  6508. A.relIds,
  6509. C.chart,
  6510. C.externalData,
  6511. C.userShapes,
  6512. DGM.relIds,
  6513. O.OLEObject,
  6514. VML.fill,
  6515. VML.imagedata,
  6516. VML.stroke,
  6517. W.altChunk,
  6518. W.attachedTemplate,
  6519. W.control,
  6520. W.dataSource,
  6521. W.embedBold,
  6522. W.embedBoldItalic,
  6523. W.embedItalic,
  6524. W.embedRegular,
  6525. W.footerReference,
  6526. W.headerReference,
  6527. W.headerSource,
  6528. W.hyperlink,
  6529. W.printerSettings,
  6530. W.recipientData,
  6531. W.saveThroughXslt,
  6532. W.sourceFileName,
  6533. W.src,
  6534. W.subDoc,
  6535. WNE.toolbarData,
  6536. };
  6537. public static XName[] s_RelationshipAttributeNames = new XName[] {
  6538. R.embed,
  6539. R.link,
  6540. R.id,
  6541. R.cs,
  6542. R.dm,
  6543. R.lo,
  6544. R.qs,
  6545. R.href,
  6546. R.pict,
  6547. };
  6548. public override string ToString(int indent)
  6549. {
  6550. var sb = new StringBuilder();
  6551. sb.Append("".PadRight(indent) + "Word SHA1:" + this.SHA1Hash.Substring(0, 8) + Environment.NewLine);
  6552. foreach (var comparisonUnitAtom in Contents)
  6553. sb.Append(comparisonUnitAtom.ToString(indent + 2) + Environment.NewLine);
  6554. return sb.ToString();
  6555. }
  6556. }
  6557. public class ComparisonUnitAtom : ComparisonUnit
  6558. {
  6559. // AncestorElements are kept in order from the body to the leaf, because this is the order in which we need to access in order
  6560. // to reassemble the document. However, in many places in the code, it is necessary to find the nearest ancestor, i.e. cell
  6561. // so it is necessary to reverse the order when looking for it, i.e. look from the leaf back to the body element.
  6562. public XElement[] AncestorElements;
  6563. public string[] AncestorUnids;
  6564. public XElement ContentElement;
  6565. public XElement ContentElementBefore;
  6566. public ComparisonUnitAtom ComparisonUnitAtomBefore;
  6567. public OpenXmlPart Part;
  6568. public XElement RevTrackElement;
  6569. public ComparisonUnitAtom(XElement contentElement, XElement[] ancestorElements, OpenXmlPart part, WmlComparerSettings settings)
  6570. {
  6571. ContentElement = contentElement;
  6572. AncestorElements = ancestorElements;
  6573. Part = part;
  6574. RevTrackElement = GetRevisionTrackingElementFromAncestors(contentElement, AncestorElements);
  6575. if (RevTrackElement == null)
  6576. {
  6577. CorrelationStatus = CorrelationStatus.Equal;
  6578. }
  6579. else
  6580. {
  6581. if (RevTrackElement.Name == W.del)
  6582. CorrelationStatus = CorrelationStatus.Deleted;
  6583. else if (RevTrackElement.Name == W.ins)
  6584. CorrelationStatus = CorrelationStatus.Inserted;
  6585. }
  6586. string sha1Hash = (string)contentElement.Attribute(PtOpenXml.SHA1Hash);
  6587. if (sha1Hash != null)
  6588. {
  6589. SHA1Hash = sha1Hash;
  6590. }
  6591. else
  6592. {
  6593. var shaHashString = GetSha1HashStringForElement(ContentElement, settings);
  6594. SHA1Hash = PtUtils.SHA1HashStringForUTF8String(shaHashString);
  6595. }
  6596. }
  6597. private string GetSha1HashStringForElement(XElement contentElement, WmlComparerSettings settings)
  6598. {
  6599. var text = contentElement.Value;
  6600. if (settings.CaseInsensitive)
  6601. text = text.ToUpper(settings.CultureInfo);
  6602. if (settings.ConflateBreakingAndNonbreakingSpaces)
  6603. text = text.Replace(' ', '\x00a0');
  6604. return contentElement.Name.LocalName + text;
  6605. }
  6606. private static XElement GetRevisionTrackingElementFromAncestors(XElement contentElement, XElement[] ancestors)
  6607. {
  6608. XElement revTrackElement = null;
  6609. if (contentElement.Name == W.pPr)
  6610. {
  6611. revTrackElement = contentElement
  6612. .Elements(W.rPr)
  6613. .Elements()
  6614. .FirstOrDefault(e => e.Name == W.del || e.Name == W.ins);
  6615. return revTrackElement;
  6616. }
  6617. revTrackElement = ancestors.FirstOrDefault(a => a.Name == W.del || a.Name == W.ins);
  6618. return revTrackElement;
  6619. }
  6620. public override string ToString(int indent)
  6621. {
  6622. int xNamePad = 16;
  6623. var indentString = "".PadRight(indent);
  6624. var sb = new StringBuilder();
  6625. sb.Append(indentString);
  6626. string correlationStatus = "";
  6627. if (CorrelationStatus != CorrelationStatus.Nil)
  6628. correlationStatus = string.Format("[{0}] ", CorrelationStatus.ToString().PadRight(8));
  6629. if (ContentElement.Name == W.t || ContentElement.Name == W.delText)
  6630. {
  6631. sb.AppendFormat("Atom {0}: {1} {2} SHA1:{3} ", PadLocalName(xNamePad, this), ContentElement.Value, correlationStatus, this.SHA1Hash.Substring(0, 8));
  6632. AppendAncestorsDump(sb, this);
  6633. }
  6634. else
  6635. {
  6636. sb.AppendFormat("Atom {0}: {1} SHA1:{2} ", PadLocalName(xNamePad, this), correlationStatus, this.SHA1Hash.Substring(0, 8));
  6637. AppendAncestorsDump(sb, this);
  6638. }
  6639. return sb.ToString();
  6640. }
  6641. public string ToStringAncestorUnids(int indent)
  6642. {
  6643. int xNamePad = 16;
  6644. var indentString = "".PadRight(indent);
  6645. var sb = new StringBuilder();
  6646. sb.Append(indentString);
  6647. string correlationStatus = "";
  6648. if (CorrelationStatus != CorrelationStatus.Nil)
  6649. correlationStatus = string.Format("[{0}] ", CorrelationStatus.ToString().PadRight(8));
  6650. if (ContentElement.Name == W.t || ContentElement.Name == W.delText)
  6651. {
  6652. sb.AppendFormat("Atom {0}: {1} {2} SHA1:{3} ", PadLocalName(xNamePad, this), ContentElement.Value, correlationStatus, this.SHA1Hash.Substring(0, 8));
  6653. AppendAncestorsUnidsDump(sb, this);
  6654. }
  6655. else
  6656. {
  6657. sb.AppendFormat("Atom {0}: {1} SHA1:{2} ", PadLocalName(xNamePad, this), correlationStatus, this.SHA1Hash.Substring(0, 8));
  6658. AppendAncestorsUnidsDump(sb, this);
  6659. }
  6660. return sb.ToString();
  6661. }
  6662. public override string ToString()
  6663. {
  6664. return ToString(0);
  6665. }
  6666. public string ToStringAncestorUnids()
  6667. {
  6668. return ToStringAncestorUnids(0);
  6669. }
  6670. private static string PadLocalName(int xNamePad, ComparisonUnitAtom item)
  6671. {
  6672. return (item.ContentElement.Name.LocalName + " ").PadRight(xNamePad, '-') + " ";
  6673. }
  6674. private void AppendAncestorsDump(StringBuilder sb, ComparisonUnitAtom sr)
  6675. {
  6676. var s = sr.AncestorElements.Select(p => p.Name.LocalName + GetUnid(p) + "/").StringConcatenate().TrimEnd('/');
  6677. sb.Append("Ancestors:" + s);
  6678. }
  6679. private void AppendAncestorsUnidsDump(StringBuilder sb, ComparisonUnitAtom sr)
  6680. {
  6681. var zipped = sr.AncestorElements.Zip(sr.AncestorUnids, (a, u) => new
  6682. {
  6683. AncestorElement = a,
  6684. AncestorUnid = u,
  6685. });
  6686. var s = zipped.Select(p => p.AncestorElement.Name.LocalName + "[" + p.AncestorUnid.Substring(0, 8) + "]/").StringConcatenate().TrimEnd('/');
  6687. sb.Append("Ancestors:" + s);
  6688. }
  6689. private string GetUnid(XElement p)
  6690. {
  6691. var unid = (string)p.Attribute(PtOpenXml.Unid);
  6692. if (unid == null)
  6693. return "";
  6694. return "[" + unid.Substring(0, 8) + "]";
  6695. }
  6696. public static string ComparisonUnitAtomListToString(List<ComparisonUnitAtom> comparisonUnitAtomList, int indent)
  6697. {
  6698. StringBuilder sb = new StringBuilder();
  6699. var cal = comparisonUnitAtomList
  6700. .Select((ca, i) => new
  6701. {
  6702. ComparisonUnitAtom = ca,
  6703. Index = i,
  6704. });
  6705. foreach (var item in cal)
  6706. sb.Append("".PadRight(indent))
  6707. .AppendFormat("[{0:000000}] ", item.Index + 1)
  6708. .Append(item.ComparisonUnitAtom.ToString(0) + Environment.NewLine);
  6709. return sb.ToString();
  6710. }
  6711. }
  6712. internal enum ComparisonUnitGroupType
  6713. {
  6714. Paragraph,
  6715. Table,
  6716. Row,
  6717. Cell,
  6718. Textbox,
  6719. };
  6720. internal class ComparisonUnitGroup : ComparisonUnit
  6721. {
  6722. public ComparisonUnitGroupType ComparisonUnitGroupType;
  6723. public string CorrelatedSHA1Hash;
  6724. public string StructureSHA1Hash;
  6725. public ComparisonUnitGroup(IEnumerable<ComparisonUnit> comparisonUnitList, ComparisonUnitGroupType groupType, int level)
  6726. {
  6727. Contents = comparisonUnitList.ToList();
  6728. ComparisonUnitGroupType = groupType;
  6729. var first = comparisonUnitList.First();
  6730. ComparisonUnitAtom comparisonUnitAtom = GetFirstComparisonUnitAtomOfGroup(first);
  6731. XName ancestorName = null;
  6732. if (groupType == ComparisonUnitGroupType.Table)
  6733. ancestorName = W.tbl;
  6734. else if (groupType == ComparisonUnitGroupType.Row)
  6735. ancestorName = W.tr;
  6736. else if (groupType == ComparisonUnitGroupType.Cell)
  6737. ancestorName = W.tc;
  6738. else if (groupType == ComparisonUnitGroupType.Paragraph)
  6739. ancestorName = W.p;
  6740. else if (groupType == ComparisonUnitGroupType.Textbox)
  6741. ancestorName = W.txbxContent;
  6742. var ancestorsToLookAt = comparisonUnitAtom.AncestorElements.Where(ae => ae.Name == W.tbl || ae.Name == W.tr || ae.Name == W.tc || ae.Name == W.p || ae.Name == W.txbxContent).ToArray(); ;
  6743. var ancestor = ancestorsToLookAt[level];
  6744. if (ancestor == null)
  6745. throw new OpenXmlPowerToolsException("Internal error: ComparisonUnitGroup");
  6746. SHA1Hash = (string)ancestor.Attribute(PtOpenXml.SHA1Hash);
  6747. CorrelatedSHA1Hash = (string)ancestor.Attribute(PtOpenXml.CorrelatedSHA1Hash);
  6748. StructureSHA1Hash = (string)ancestor.Attribute(PtOpenXml.StructureSHA1Hash);
  6749. }
  6750. public static ComparisonUnitAtom GetFirstComparisonUnitAtomOfGroup(ComparisonUnit group)
  6751. {
  6752. var thisGroup = group;
  6753. while (true)
  6754. {
  6755. var tg = thisGroup as ComparisonUnitGroup;
  6756. if (tg != null)
  6757. {
  6758. thisGroup = tg.Contents.First();
  6759. continue;
  6760. }
  6761. var tw = thisGroup as ComparisonUnitWord;
  6762. if (tw == null)
  6763. throw new OpenXmlPowerToolsException("Internal error: GetFirstComparisonUnitAtomOfGroup");
  6764. var ca = (ComparisonUnitAtom)tw.Contents.First();
  6765. return ca;
  6766. }
  6767. }
  6768. public override string ToString(int indent)
  6769. {
  6770. var sb = new StringBuilder();
  6771. sb.Append("".PadRight(indent) + "Group Type: " + ComparisonUnitGroupType.ToString() + " SHA1:" + SHA1Hash + Environment.NewLine);
  6772. foreach (var comparisonUnitAtom in Contents)
  6773. sb.Append(comparisonUnitAtom.ToString(indent + 2));
  6774. return sb.ToString();
  6775. }
  6776. }
  6777. public enum CorrelationStatus
  6778. {
  6779. Nil,
  6780. Normal,
  6781. Unknown,
  6782. Inserted,
  6783. Deleted,
  6784. Equal,
  6785. Group,
  6786. }
  6787. class PartSHA1HashAnnotation
  6788. {
  6789. public string Hash;
  6790. public PartSHA1HashAnnotation(string hash)
  6791. {
  6792. Hash = hash;
  6793. }
  6794. }
  6795. class CorrelatedSequence
  6796. {
  6797. public CorrelationStatus CorrelationStatus;
  6798. // if ComparisonUnitList1 == null and ComparisonUnitList2 contains sequence, then inserted content.
  6799. // if ComparisonUnitList2 == null and ComparisonUnitList1 contains sequence, then deleted content.
  6800. // if ComparisonUnitList2 contains sequence and ComparisonUnitList1 contains sequence, then either is Unknown or Equal.
  6801. public ComparisonUnit[] ComparisonUnitArray1;
  6802. public ComparisonUnit[] ComparisonUnitArray2;
  6803. #if DEBUG
  6804. public string SourceFile;
  6805. public int SourceLine;
  6806. #endif
  6807. public CorrelatedSequence()
  6808. {
  6809. #if DEBUG
  6810. SourceFile = new System.Diagnostics.StackTrace(true).GetFrame(1).GetFileName();
  6811. SourceLine = new System.Diagnostics.StackTrace(true).GetFrame(1).GetFileLineNumber();
  6812. #endif
  6813. }
  6814. public override string ToString()
  6815. {
  6816. var sb = new StringBuilder();
  6817. var indentString = " ";
  6818. var indentString4 = " ";
  6819. sb.Append("CorrelatedSequence =====" + Environment.NewLine);
  6820. #if DEBUG
  6821. sb.Append(indentString + "Created at Line: " + SourceLine.ToString() + Environment.NewLine);
  6822. #endif
  6823. sb.Append(indentString + "CorrelatedItem =====" + Environment.NewLine);
  6824. sb.Append(indentString4 + "CorrelationStatus: " + CorrelationStatus.ToString() + Environment.NewLine);
  6825. if (CorrelationStatus == CorrelationStatus.Equal)
  6826. {
  6827. sb.Append(indentString4 + "ComparisonUnitList =====" + Environment.NewLine);
  6828. foreach (var item in ComparisonUnitArray2)
  6829. sb.Append(item.ToString(6) + Environment.NewLine);
  6830. }
  6831. else
  6832. {
  6833. if (ComparisonUnitArray1 != null)
  6834. {
  6835. sb.Append(indentString4 + "ComparisonUnitList1 =====" + Environment.NewLine);
  6836. foreach (var item in ComparisonUnitArray1)
  6837. sb.Append(item.ToString(6) + Environment.NewLine);
  6838. }
  6839. if (ComparisonUnitArray2 != null)
  6840. {
  6841. sb.Append(indentString4 + "ComparisonUnitList2 =====" + Environment.NewLine);
  6842. foreach (var item in ComparisonUnitArray2)
  6843. sb.Append(item.ToString(6) + Environment.NewLine);
  6844. }
  6845. }
  6846. return sb.ToString();
  6847. }
  6848. }
  6849. internal class DocxComparerUtil
  6850. {
  6851. public static void NotePad(string str)
  6852. {
  6853. var tempPath = Path.GetTempPath();
  6854. var guidName = Guid.NewGuid().ToString().Replace("-", "") + ".txt";
  6855. var fi = new FileInfo(Path.Combine(tempPath, guidName));
  6856. File.WriteAllText(fi.FullName, str);
  6857. var notepadExe = new FileInfo(@"C:\Program Files (x86)\Notepad++\notepad++.exe");
  6858. if (!notepadExe.Exists)
  6859. notepadExe = new FileInfo(@"C:\Program Files\Notepad++\notepad++.exe");
  6860. if (!notepadExe.Exists)
  6861. notepadExe = new FileInfo(@"C:\Windows\System32\notepad.exe");
  6862. ExecutableRunner.RunExecutable(notepadExe.FullName, fi.FullName, tempPath);
  6863. }
  6864. }
  6865. #if false
  6866. public class PtpSHA1Util
  6867. {
  6868. public static string SHA1HashStringForUTF8String(string s)
  6869. {
  6870. byte[] bytes = Encoding.UTF8.GetBytes(s);
  6871. var sha1 = SHA1.Create();
  6872. byte[] hashBytes = sha1.ComputeHash(bytes);
  6873. return HexStringFromBytes(hashBytes);
  6874. }
  6875. public static string SHA1HashStringForByteArray(byte[] bytes)
  6876. {
  6877. var sha1 = SHA1.Create();
  6878. byte[] hashBytes = sha1.ComputeHash(bytes);
  6879. return HexStringFromBytes(hashBytes);
  6880. }
  6881. public static string HexStringFromBytes(byte[] bytes)
  6882. {
  6883. var sb = new StringBuilder();
  6884. foreach (byte b in bytes)
  6885. {
  6886. var hex = b.ToString("x2");
  6887. sb.Append(hex);
  6888. }
  6889. return sb.ToString();
  6890. }
  6891. }
  6892. public class Base64Util
  6893. {
  6894. private class Bs64Tupple
  6895. {
  6896. public char Bs64Character;
  6897. public int Bs64Chunk;
  6898. }
  6899. public static string Convert76CharLineLength(byte[] byteArray)
  6900. {
  6901. string base64String = (System.Convert.ToBase64String(byteArray))
  6902. .Select
  6903. (
  6904. (c, i) => new Bs64Tupple()
  6905. {
  6906. Bs64Character = c,
  6907. Bs64Chunk = i / 76
  6908. }
  6909. )
  6910. .GroupBy(c => c.Bs64Chunk)
  6911. .Aggregate(
  6912. new StringBuilder(),
  6913. (s, i) =>
  6914. s.Append(
  6915. i.Aggregate(
  6916. new StringBuilder(),
  6917. (seed, it) => seed.Append(it.Bs64Character),
  6918. sb => sb.ToString()
  6919. )
  6920. )
  6921. .Append(Environment.NewLine),
  6922. s => s.ToString()
  6923. );
  6924. return base64String;
  6925. }
  6926. }
  6927. #endif
  6928. }