WmlComparer.Private.Methods.ProduceDocument.cs 151 KB


  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. using System;
  4. using System.Collections.Generic;
  5. using System.IO;
  6. using System.IO.Packaging;
  7. using System.Linq;
  8. using System.Text;
  9. using System.Xml.Linq;
  10. using DocumentFormat.OpenXml.Packaging;
  11. namespace OpenXmlPowerTools
  12. {
  13. public static partial class WmlComparer
  14. {
  15. private static WmlDocument ProduceDocumentWithTrackedRevisions(
  16. WmlComparerSettings settings,
  17. WmlDocument wmlResult,
  18. WordprocessingDocument wDoc1,
  19. WordprocessingDocument wDoc2)
  20. {
  21. // save away sectPr so that can set in the newly produced document.
  22. XElement savedSectPr = wDoc1
  23. .MainDocumentPart
  24. .GetXDocument()
  25. .Root?
  26. .Element(W.body)?
  27. .Element(W.sectPr);
  28. XElement contentParent1 = wDoc1.MainDocumentPart.GetXDocument().Root?.Element(W.body);
  29. AddSha1HashToBlockLevelContent(wDoc1.MainDocumentPart, contentParent1, settings);
  30. XElement contentParent2 = wDoc2.MainDocumentPart.GetXDocument().Root?.Element(W.body);
  31. AddSha1HashToBlockLevelContent(wDoc2.MainDocumentPart, contentParent2, settings);
  32. ComparisonUnitAtom[] cal1 = CreateComparisonUnitAtomList(
  33. wDoc1.MainDocumentPart,
  34. wDoc1.MainDocumentPart.GetXDocument().Root?.Element(W.body),
  35. settings);
  36. if (False)
  37. {
  38. var sb = new StringBuilder();
  39. foreach (ComparisonUnitAtom item in cal1)
  40. sb.Append(item + Environment.NewLine);
  41. string sbs = sb.ToString();
  42. TestUtil.NotePad(sbs);
  43. }
  44. ComparisonUnit[] cus1 = GetComparisonUnitList(cal1, settings);
  45. if (False)
  46. {
  47. string sbs = ComparisonUnit.ComparisonUnitListToString(cus1);
  48. TestUtil.NotePad(sbs);
  49. }
  50. ComparisonUnitAtom[] cal2 = CreateComparisonUnitAtomList(
  51. wDoc2.MainDocumentPart,
  52. wDoc2.MainDocumentPart.GetXDocument().Root?.Element(W.body),
  53. settings);
  54. if (False)
  55. {
  56. var sb = new StringBuilder();
  57. foreach (ComparisonUnitAtom item in cal2)
  58. sb.Append(item + Environment.NewLine);
  59. string sbs = sb.ToString();
  60. TestUtil.NotePad(sbs);
  61. }
  62. ComparisonUnit[] cus2 = GetComparisonUnitList(cal2, settings);
  63. if (False)
  64. {
  65. string sbs = ComparisonUnit.ComparisonUnitListToString(cus2);
  66. TestUtil.NotePad(sbs);
  67. }
  68. if (False)
  69. {
  70. var sb3 = new StringBuilder();
  71. sb3.Append("ComparisonUnitList 1 =====" + Environment.NewLine + Environment.NewLine);
  72. sb3.Append(ComparisonUnit.ComparisonUnitListToString(cus1));
  73. sb3.Append(Environment.NewLine);
  74. sb3.Append("ComparisonUnitList 2 =====" + Environment.NewLine + Environment.NewLine);
  75. sb3.Append(ComparisonUnit.ComparisonUnitListToString(cus2));
  76. string sbs3 = sb3.ToString();
  77. TestUtil.NotePad(sbs3);
  78. }
  79. List<CorrelatedSequence> correlatedSequence = Lcs(cus1, cus2, settings);
  80. if (False)
  81. {
  82. var sb = new StringBuilder();
  83. foreach (CorrelatedSequence item in correlatedSequence)
  84. {
  85. sb.Append(item + Environment.NewLine);
  86. }
  87. string sbs = sb.ToString();
  88. TestUtil.NotePad(sbs);
  89. }
  90. // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or
  91. // w:del element, and therefore when generating the document, the appropriate row will be marked as deleted
  92. // or inserted.
  93. MarkRowsAsDeletedOrInserted(settings, correlatedSequence);
  94. // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each
  95. // ComparisonUnitAtom: Deleted, Inserted, or Equal
  96. List<ComparisonUnitAtom> listOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(correlatedSequence, settings);
  97. if (False)
  98. {
  99. var sb = new StringBuilder();
  100. foreach (ComparisonUnitAtom item in listOfComparisonUnitAtoms)
  101. {
  102. sb.Append(item + Environment.NewLine);
  103. }
  104. string sbs = sb.ToString();
  105. TestUtil.NotePad(sbs);
  106. }
  107. // note - we don't want to do the hack until after flattening all of the groups. At the end of the
  108. // flattening, we should simply have a list of ComparisonUnitAtoms, appropriately marked as equal,
  109. // inserted, or deleted.
  110. // the table id will be hacked in the normal course of events.
  111. // in the case where a row is deleted, not necessary to hack - the deleted row ID will do.
  112. // in the case where a row is inserted, not necessary to hack - the inserted row ID will do as well.
  113. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(listOfComparisonUnitAtoms);
  114. if (False)
  115. {
  116. var sb = new StringBuilder();
  117. foreach (ComparisonUnitAtom item in listOfComparisonUnitAtoms)
  118. sb.Append(item.ToStringAncestorUnids() + Environment.NewLine);
  119. string sbs = sb.ToString();
  120. TestUtil.NotePad(sbs);
  121. }
  122. // and then finally can generate the document with revisions
  123. using (var ms = new MemoryStream())
  124. {
  125. ms.Write(wmlResult.DocumentByteArray, 0, wmlResult.DocumentByteArray.Length);
  126. using (WordprocessingDocument wDocWithRevisions = WordprocessingDocument.Open(ms, true))
  127. {
  128. XDocument xDoc = wDocWithRevisions.MainDocumentPart.GetXDocument();
  129. List<XAttribute> rootNamespaceAttributes = xDoc
  130. .Root?
  131. .Attributes()
  132. .Where(a => a.IsNamespaceDeclaration || a.Name.Namespace == MC.mc)
  133. .ToList();
  134. // ======================================
  135. // The following produces a new valid WordprocessingML document from the listOfComparisonUnitAtoms
  136. object newBodyChildren = ProduceNewWmlMarkupFromCorrelatedSequence(
  137. wDocWithRevisions.MainDocumentPart,
  138. listOfComparisonUnitAtoms,
  139. settings);
  140. var newXDoc = new XDocument();
  141. newXDoc.Add(
  142. new XElement(W.document,
  143. rootNamespaceAttributes,
  144. new XElement(W.body, newBodyChildren)));
  145. MarkContentAsDeletedOrInserted(newXDoc, settings);
  146. CoalesceAdjacentRunsWithIdenticalFormatting(newXDoc);
  147. IgnorePt14Namespace(newXDoc.Root);
  148. ProcessFootnoteEndnote(settings,
  149. listOfComparisonUnitAtoms,
  150. wDoc1.MainDocumentPart,
  151. wDoc2.MainDocumentPart,
  152. newXDoc);
  153. RectifyFootnoteEndnoteIds(
  154. wDoc1.MainDocumentPart,
  155. wDoc2.MainDocumentPart,
  156. wDocWithRevisions.MainDocumentPart,
  157. newXDoc,
  158. settings);
  159. ConjoinDeletedInsertedParagraphMarks(wDocWithRevisions.MainDocumentPart, newXDoc);
  160. FixUpRevisionIds(wDocWithRevisions, newXDoc);
  161. // little bit of cleanup
  162. MoveLastSectPrToChildOfBody(newXDoc);
  163. var newXDoc2Root = (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(newXDoc.Root);
  164. xDoc.Root?.ReplaceWith(newXDoc2Root);
  165. /**********************************************************************************************/
  166. // temporary code to remove sections. When remove this code, get validation errors for some ITU documents.
  167. // Note: This is a no-go for use cases in which documents have multiple sections, e.g., for title pages,
  168. // front matter, and body matter. Another example is where you have to switch between portrait and
  169. // landscape orientation, which requires sections.
  170. // TODO: Revisit
  171. xDoc.Root?.Descendants(W.sectPr).Remove();
  172. // move w:sectPr from source document into newly generated document.
  173. if (savedSectPr != null)
  174. {
  175. XDocument xd = wDocWithRevisions.MainDocumentPart.GetXDocument();
  176. // add everything but headers/footers
  177. var clonedSectPr = new XElement(W.sectPr,
  178. savedSectPr.Attributes(),
  179. savedSectPr.Element(W.type),
  180. savedSectPr.Element(W.pgSz),
  181. savedSectPr.Element(W.pgMar),
  182. savedSectPr.Element(W.cols),
  183. savedSectPr.Element(W.titlePg));
  184. xd.Root?.Element(W.body)?.Add(clonedSectPr);
  185. }
  186. /**********************************************************************************************/
  187. wDocWithRevisions.MainDocumentPart.PutXDocument();
  188. FixUpFootnotesEndnotesWithCustomMarkers(wDocWithRevisions);
  189. FixUpRevMarkIds(wDocWithRevisions);
  190. FixUpDocPrIds(wDocWithRevisions);
  191. FixUpShapeIds(wDocWithRevisions);
  192. FixUpShapeTypeIds(wDocWithRevisions);
  193. AddFootnotesEndnotesStyles(wDocWithRevisions);
  194. CopyMissingStylesFromOneDocToAnother(wDoc2, wDocWithRevisions);
  195. DeleteFootnotePropertiesInSettings(wDocWithRevisions);
  196. }
  197. foreach (OpenXmlPart part in wDoc1.ContentParts())
  198. {
  199. part.PutXDocument();
  200. }
  201. foreach (OpenXmlPart part in wDoc2.ContentParts())
  202. {
  203. part.PutXDocument();
  204. }
  205. var updatedWmlResult = new WmlDocument("Dummy.docx", ms.ToArray());
  206. return updatedWmlResult;
  207. }
  208. }
  209. private static void AddSha1HashToBlockLevelContent(OpenXmlPart part, XElement contentParent, WmlComparerSettings settings)
  210. {
  211. IEnumerable<XElement> blockLevelContentToAnnotate = contentParent
  212. .Descendants()
  213. .Where(d => ElementsToHaveSha1Hash.Contains(d.Name));
  214. foreach (XElement blockLevelContent in blockLevelContentToAnnotate)
  215. {
  216. var cloneBlockLevelContentForHashing =
  217. (XElement) CloneBlockLevelContentForHashing(part, blockLevelContent, true, settings);
  218. string shaString = cloneBlockLevelContentForHashing.ToString(SaveOptions.DisableFormatting)
  219. .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", "");
  220. string sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString);
  221. blockLevelContent.Add(new XAttribute(PtOpenXml.SHA1Hash, sha1Hash));
  222. if (blockLevelContent.Name == W.tbl ||
  223. blockLevelContent.Name == W.tr)
  224. {
  225. var clonedForStructureHash = (XElement) CloneForStructureHash(cloneBlockLevelContentForHashing);
  226. // this is a convenient place to look at why tables are being compared as different.
  227. //if (blockLevelContent.Name == W.tbl)
  228. // Console.WriteLine();
  229. string shaString2 = clonedForStructureHash.ToString(SaveOptions.DisableFormatting)
  230. .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", "");
  231. string sha1Hash2 = WmlComparerUtil.SHA1HashStringForUTF8String(shaString2);
  232. blockLevelContent.Add(new XAttribute(PtOpenXml.StructureSHA1Hash, sha1Hash2));
  233. }
  234. }
  235. }
  236. private static List<CorrelatedSequence> Lcs(ComparisonUnit[] cu1, ComparisonUnit[] cu2, WmlComparerSettings settings)
  237. {
  238. // set up initial state - one CorrelatedSequence, UnKnown, contents == entire sequences (both)
  239. var cs = new CorrelatedSequence
  240. {
  241. CorrelationStatus = CorrelationStatus.Unknown,
  242. ComparisonUnitArray1 = cu1,
  243. ComparisonUnitArray2 = cu2
  244. };
  245. var csList = new List<CorrelatedSequence>
  246. {
  247. cs
  248. };
  249. while (true)
  250. {
  251. if (False)
  252. {
  253. var sb = new StringBuilder();
  254. foreach (CorrelatedSequence item in csList)
  255. sb.Append(item).Append(Environment.NewLine);
  256. string sbs = sb.ToString();
  257. TestUtil.NotePad(sbs);
  258. }
  259. CorrelatedSequence unknown = csList
  260. .FirstOrDefault(z => z.CorrelationStatus == CorrelationStatus.Unknown);
  261. if (unknown != null)
  262. {
  263. // if unknown consists of a single group of the same type in each side, then can set some Unids in the 'after' document.
  264. // if the unknown is a pair of single tables, then can set table Unid.
  265. // if the unknown is a pair of single rows, then can set table and rows Unids.
  266. // if the unknown is a pair of single cells, then can set table, row, and cell Unids.
  267. // if the unknown is a pair of paragraphs, then can set paragraph (and all ancestor) Unids.
  268. SetAfterUnids(unknown);
  269. if (False)
  270. {
  271. var sb = new StringBuilder();
  272. sb.Append(unknown);
  273. string sbs = sb.ToString();
  274. TestUtil.NotePad(sbs);
  275. }
  276. List<CorrelatedSequence> newSequence = ProcessCorrelatedHashes(unknown, settings);
  277. if (newSequence == null)
  278. {
  279. newSequence = FindCommonAtBeginningAndEnd(unknown, settings);
  280. if (newSequence == null)
  281. {
  282. newSequence = DoLcsAlgorithm(unknown, settings);
  283. }
  284. }
  285. int indexOfUnknown = csList.IndexOf(unknown);
  286. csList.Remove(unknown);
  287. newSequence.Reverse();
  288. foreach (CorrelatedSequence item in newSequence)
  289. csList.Insert(indexOfUnknown, item);
  290. continue;
  291. }
  292. return csList;
  293. }
  294. }
  295. private static void MarkRowsAsDeletedOrInserted(WmlComparerSettings settings, List<CorrelatedSequence> correlatedSequence)
  296. {
  297. foreach (CorrelatedSequence dcs in correlatedSequence.Where(cs =>
  298. cs.CorrelationStatus == CorrelationStatus.Deleted || cs.CorrelationStatus == CorrelationStatus.Inserted))
  299. {
  300. // iterate through all deleted/inserted items in dcs.ComparisonUnitArray1/ComparisonUnitArray2
  301. ComparisonUnit[] toIterateThrough = dcs.ComparisonUnitArray1;
  302. if (dcs.CorrelationStatus == CorrelationStatus.Inserted)
  303. toIterateThrough = dcs.ComparisonUnitArray2;
  304. foreach (ComparisonUnit ca in toIterateThrough)
  305. {
  306. var cug = ca as ComparisonUnitGroup;
  307. // this works because we will never see a table in this list, only rows. If tables were in this list, would need to recursively
  308. // go into children, but tables are always flattened in the LCS process.
  309. // when we have a row, it is only necessary to find the first content atom of the row, then find the row ancestor, and then tweak
  310. // the w:trPr
  311. if (cug != null && cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)
  312. {
  313. ComparisonUnitAtom firstContentAtom = cug.DescendantContentAtoms().FirstOrDefault();
  314. if (firstContentAtom == null)
  315. throw new OpenXmlPowerToolsException("Internal error");
  316. XElement tr = firstContentAtom
  317. .AncestorElements
  318. .Reverse()
  319. .FirstOrDefault(a => a.Name == W.tr);
  320. if (tr == null)
  321. throw new OpenXmlPowerToolsException("Internal error");
  322. XElement trPr = tr.Element(W.trPr);
  323. if (trPr == null)
  324. {
  325. trPr = new XElement(W.trPr);
  326. tr.AddFirst(trPr);
  327. }
  328. XName revTrackElementName = null;
  329. if (dcs.CorrelationStatus == CorrelationStatus.Deleted)
  330. revTrackElementName = W.del;
  331. else if (dcs.CorrelationStatus == CorrelationStatus.Inserted)
  332. revTrackElementName = W.ins;
  333. trPr.Add(new XElement(revTrackElementName,
  334. new XAttribute(W.author, settings.AuthorForRevisions),
  335. new XAttribute(W.id, _maxId++),
  336. new XAttribute(W.date, settings.DateTimeForRevisions)));
  337. }
  338. }
  339. }
  340. }
  341. private static List<ComparisonUnitAtom> FlattenToComparisonUnitAtomList(
  342. List<CorrelatedSequence> correlatedSequence,
  343. WmlComparerSettings settings)
  344. {
  345. List<ComparisonUnitAtom> listOfComparisonUnitAtoms = correlatedSequence
  346. .Select(cs =>
  347. {
  348. // need to write some code here to find out if we are assembling a paragraph (or anything) that contains the following unid.
  349. // why do are we dropping content???????
  350. //string searchFor = "0ecb9184";
  351. if (cs.CorrelationStatus == CorrelationStatus.Equal)
  352. {
  353. IEnumerable<ComparisonUnitAtom> contentAtomsBefore = cs
  354. .ComparisonUnitArray1
  355. .Select(ca => ca.DescendantContentAtoms())
  356. .SelectMany(m => m);
  357. IEnumerable<ComparisonUnitAtom> contentAtomsAfter = cs
  358. .ComparisonUnitArray2
  359. .Select(ca => ca.DescendantContentAtoms())
  360. .SelectMany(m => m);
  361. List<ComparisonUnitAtom> comparisonUnitAtomList = contentAtomsBefore
  362. .Zip(contentAtomsAfter,
  363. (before, after) => new ComparisonUnitAtom(
  364. after.ContentElement,
  365. after.AncestorElements,
  366. after.Part,
  367. settings)
  368. {
  369. CorrelationStatus = CorrelationStatus.Equal,
  370. ContentElementBefore = before.ContentElement,
  371. ComparisonUnitAtomBefore = before
  372. })
  373. .ToList();
  374. return comparisonUnitAtomList;
  375. }
  376. if (cs.CorrelationStatus == CorrelationStatus.Deleted)
  377. {
  378. IEnumerable<ComparisonUnitAtom> comparisonUnitAtomList = cs
  379. .ComparisonUnitArray1
  380. .Select(ca => ca.DescendantContentAtoms())
  381. .SelectMany(m => m)
  382. .Select(ca =>
  383. new ComparisonUnitAtom(ca.ContentElement, ca.AncestorElements, ca.Part, settings)
  384. {
  385. CorrelationStatus = CorrelationStatus.Deleted
  386. });
  387. return comparisonUnitAtomList;
  388. }
  389. if (cs.CorrelationStatus == CorrelationStatus.Inserted)
  390. {
  391. IEnumerable<ComparisonUnitAtom> comparisonUnitAtomList = cs
  392. .ComparisonUnitArray2
  393. .Select(ca => ca.DescendantContentAtoms())
  394. .SelectMany(m => m)
  395. .Select(ca =>
  396. new ComparisonUnitAtom(ca.ContentElement, ca.AncestorElements, ca.Part, settings)
  397. {
  398. CorrelationStatus = CorrelationStatus.Inserted
  399. });
  400. return comparisonUnitAtomList;
  401. }
  402. throw new OpenXmlPowerToolsException("Internal error");
  403. })
  404. .SelectMany(m => m)
  405. .ToList();
  406. if (False)
  407. {
  408. var sb = new StringBuilder();
  409. foreach (ComparisonUnitAtom item in listOfComparisonUnitAtoms)
  410. sb.Append(item).Append(Environment.NewLine);
  411. string sbs = sb.ToString();
  412. TestUtil.NotePad(sbs);
  413. }
  414. return listOfComparisonUnitAtoms;
  415. }
  416. /// Here is the crux of the fix to the algorithm. After assembling the entire list of ComparisonUnitAtoms, we do the following:
  417. /// - First, figure out the maximum hierarchy depth, considering only paragraphs, txbx, txbxContent, tables, rows, cells, and content controls.
  418. /// - For documents that do not contain tables, nor text boxes, this maximum hierarchy depth will always be 1.
  419. /// - For atoms within a table, the depth will be 4. The first level is the table, the second level is row, third is cell, fourth is paragraph.
  420. /// - For atoms within a nested table, the depth will be 7: Table / Row / Cell / Table / Row / Cell / Paragraph
  421. /// - For atoms within a text box, the depth will be 3: Paragraph / txbxContent / Paragraph
  422. /// - For atoms within a table in a text box, the depth will be 5: Paragraph / txbxContent / Table / Row / Cell / Paragraph
  423. /// In any case, we figure out the maximum depth.
  424. ///
  425. /// Then we iterate through the list of content atoms backwards. We do this n times, where n is the maximum depth.
  426. ///
  427. /// At each level, we find a paragraph mark, and working backwards, we set the guids in the hierarchy so that the content will be assembled together correctly.
  428. ///
  429. /// For each iteration, we only set unids at the level that we are working at.
  430. ///
  431. /// So first we will set all unids at level 1. When we find a paragraph mark, we get the unid for that level, and then working backwards, until we find another
  432. /// paragraph mark, we set all unids at level 1 to the same unid as level 1 of the paragraph mark.
  433. ///
  434. /// Then we set all unids at level 2. When we find a paragraph mark, we get the unid for that level, and then working backwards, until we find another paragraph
  435. /// mark, we set all unids at level 2 to the same unid as level 2 of the paragraph mark. At some point, we will find a paragraph mark with no level 2. This is
  436. /// not a problem. We stop setting anything until we find another paragraph mark that has a level 2, at which point we resume setting values at level 2.
  437. ///
  438. /// Same process for level 3, and so on, until we have processed to the maximum depth of the hierarchy.
  439. ///
  440. /// At the end of this process, we will be able to do the coalsce recurse algorithm, and the content atom list will be put back together into a beautiful tree,
  441. /// where every element is correctly positioned in the hierarchy.
  442. ///
  443. /// This should also properly assemble the test where just the paragraph marks have been deleted for a range of paragraphs.
  444. ///
  445. /// There is an interesting thought - it is possible that I have set two runs of text that were initially in the same paragraph, but then after
  446. /// processing, they match up to text in different paragraphs. Therefore this will not work. We need to actually keep a list of reconstructed ancestor
  447. /// Unids, because the same paragraph would get set to two different IDs - two ComparisonUnitAtoms need to be in separate paragraphs in the reconstructed
  448. /// document, but their ancestors actually point to the same paragraph.
  449. ///
  450. /// Fix this in the algorithm, and also keep the appropriate list in ComparisonUnitAtom class.
  451. private static void AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(List<ComparisonUnitAtom> comparisonUnitAtomList)
  452. {
  453. if (False)
  454. {
  455. var sb = new StringBuilder();
  456. foreach (ComparisonUnitAtom item in comparisonUnitAtomList)
  457. sb.Append(item).Append(Environment.NewLine);
  458. string sbs = sb.ToString();
  459. TestUtil.NotePad(sbs);
  460. }
  461. // the following loop sets all ancestor unids in the after document to the unids in the before document for all pPr where the status is equal.
  462. // this should always be true.
  463. // one additional modification to make to this loop - where we find a pPr in a text box, we want to do this as well, regardless of whether the status is equal, inserted, or deleted.
  464. // reason being that this module does not support insertion / deletion of text boxes themselves. If a text box is in the before or after document, it will be in the document that
  465. // contains deltas. It may have inserted or deleted text, but regardless, it will be in the result document.
  466. foreach (ComparisonUnitAtom cua in comparisonUnitAtomList)
  467. {
  468. var doSet = false;
  469. if (cua.ContentElement.Name == W.pPr)
  470. {
  471. if (cua.AncestorElements.Any(ae => ae.Name == W.txbxContent))
  472. doSet = true;
  473. if (cua.CorrelationStatus == CorrelationStatus.Equal)
  474. doSet = true;
  475. }
  476. if (doSet)
  477. {
  478. ComparisonUnitAtom cuaBefore = cua.ComparisonUnitAtomBefore;
  479. XElement[] ancestorsAfter = cua.AncestorElements;
  480. if (cuaBefore != null)
  481. {
  482. XElement[] ancestorsBefore = cuaBefore.AncestorElements;
  483. if (ancestorsAfter.Length == ancestorsBefore.Length)
  484. {
  485. var zipped = ancestorsBefore.Zip(ancestorsAfter, (b, a) =>
  486. new
  487. {
  488. After = a,
  489. Before = b
  490. });
  491. foreach (var z in zipped)
  492. {
  493. XAttribute afterUnidAtt = z.After.Attribute(PtOpenXml.Unid);
  494. XAttribute beforeUnidAtt = z.Before.Attribute(PtOpenXml.Unid);
  495. if (afterUnidAtt != null && beforeUnidAtt != null)
  496. afterUnidAtt.Value = beforeUnidAtt.Value;
  497. }
  498. }
  499. }
  500. }
  501. }
  502. if (False)
  503. {
  504. var sb = new StringBuilder();
  505. foreach (ComparisonUnitAtom item in comparisonUnitAtomList)
  506. sb.Append(item).Append(Environment.NewLine);
  507. string sbs = sb.ToString();
  508. TestUtil.NotePad(sbs);
  509. }
  510. List<ComparisonUnitAtom> rComparisonUnitAtomList =
  511. ((IEnumerable<ComparisonUnitAtom>) comparisonUnitAtomList).Reverse().ToList();
  512. // the following should always succeed, because there will always be at least one element in
  513. // rComparisonUnitAtomList, and there will always be at least one ancestor in AncestorElements
  514. XElement deepestAncestor = rComparisonUnitAtomList.First().AncestorElements.First();
  515. XName deepestAncestorName = deepestAncestor.Name;
  516. string deepestAncestorUnid = null;
  517. if (deepestAncestorName == W.footnote || deepestAncestorName == W.endnote)
  518. {
  519. deepestAncestorUnid = (string) deepestAncestor.Attribute(PtOpenXml.Unid);
  520. }
  521. // If the following loop finds a pPr that is in a text box, then continue on, processing the pPr and all of its contents as though it were
  522. // content in the containing text box. This is going to leave it after this loop where the AncestorUnids for the content in the text box will be
  523. // incomplete. We then will need to go through the rComparisonUnitAtomList a second time, processing all of the text boxes.
  524. // Note that this makes the basic assumption that a text box can't be nested inside of a text box, which, as far as I know, is a good assumption.
  525. // This also makes the basic assumption that an endnote / footnote can't contain a text box, which I believe is a good assumption.
  526. string[] currentAncestorUnids = null;
  527. foreach (ComparisonUnitAtom cua in rComparisonUnitAtomList)
  528. {
  529. if (cua.ContentElement.Name == W.pPr)
  530. {
  531. bool pPr_inTextBox = cua
  532. .AncestorElements
  533. .Any(ae => ae.Name == W.txbxContent);
  534. if (!pPr_inTextBox)
  535. {
  536. // this will collect the ancestor unids for the paragraph.
  537. // my hypothesis is that these ancestor unids should be the same for all content unit atoms within that paragraph.
  538. currentAncestorUnids = cua
  539. .AncestorElements
  540. .Select(ae =>
  541. {
  542. var thisUnid = (string) ae.Attribute(PtOpenXml.Unid);
  543. if (thisUnid == null)
  544. throw new OpenXmlPowerToolsException("Internal error");
  545. return thisUnid;
  546. })
  547. .ToArray();
  548. cua.AncestorUnids = currentAncestorUnids;
  549. if (deepestAncestorUnid != null)
  550. cua.AncestorUnids[0] = deepestAncestorUnid;
  551. continue;
  552. }
  553. }
  554. int thisDepth = cua.AncestorElements.Length;
  555. IEnumerable<string> additionalAncestorUnids = cua
  556. .AncestorElements
  557. .Skip(currentAncestorUnids.Length)
  558. .Select(ae =>
  559. {
  560. var thisUnid = (string) ae.Attribute(PtOpenXml.Unid);
  561. if (thisUnid == null)
  562. Guid.NewGuid().ToString().Replace("-", "");
  563. return thisUnid;
  564. });
  565. string[] thisAncestorUnids = currentAncestorUnids
  566. .Concat(additionalAncestorUnids)
  567. .ToArray();
  568. cua.AncestorUnids = thisAncestorUnids;
  569. if (deepestAncestorUnid != null)
  570. cua.AncestorUnids[0] = deepestAncestorUnid;
  571. }
  572. if (False)
  573. {
  574. var sb = new StringBuilder();
  575. foreach (ComparisonUnitAtom item in comparisonUnitAtomList)
  576. sb.Append(item).Append(Environment.NewLine);
  577. string sbs = sb.ToString();
  578. TestUtil.NotePad(sbs);
  579. }
  580. // this is the second loop that processes all text boxes.
  581. currentAncestorUnids = null;
  582. var skipUntilNextPpr = false;
  583. foreach (ComparisonUnitAtom cua in rComparisonUnitAtomList)
  584. {
  585. if (currentAncestorUnids != null && cua.AncestorElements.Length < currentAncestorUnids.Length)
  586. {
  587. skipUntilNextPpr = true;
  588. currentAncestorUnids = null;
  589. continue;
  590. }
  591. if (cua.ContentElement.Name == W.pPr)
  592. {
  593. //if (s_True)
  594. //{
  595. // var sb = new StringBuilder();
  596. // foreach (var item in comparisonUnitAtomList)
  597. // sb.Append(item.ToString()).Append(Environment.NewLine);
  598. // var sbs = sb.ToString();
  599. // TestUtil.NotePad(sbs);
  600. //}
  601. bool pPr_inTextBox = cua
  602. .AncestorElements
  603. .Any(ae => ae.Name == W.txbxContent);
  604. if (!pPr_inTextBox)
  605. {
  606. skipUntilNextPpr = true;
  607. currentAncestorUnids = null;
  608. continue;
  609. }
  610. skipUntilNextPpr = false;
  611. currentAncestorUnids = cua
  612. .AncestorElements
  613. .Select(ae =>
  614. {
  615. var thisUnid = (string) ae.Attribute(PtOpenXml.Unid);
  616. if (thisUnid == null)
  617. throw new OpenXmlPowerToolsException("Internal error");
  618. return thisUnid;
  619. })
  620. .ToArray();
  621. cua.AncestorUnids = currentAncestorUnids;
  622. continue;
  623. }
  624. if (skipUntilNextPpr)
  625. continue;
  626. int thisDepth = cua.AncestorElements.Length;
  627. IEnumerable<string> additionalAncestorUnids = cua
  628. .AncestorElements
  629. .Skip(currentAncestorUnids.Length)
  630. .Select(ae =>
  631. {
  632. var thisUnid = (string) ae.Attribute(PtOpenXml.Unid);
  633. if (thisUnid == null)
  634. Guid.NewGuid().ToString().Replace("-", "");
  635. return thisUnid;
  636. });
  637. string[] thisAncestorUnids = currentAncestorUnids
  638. .Concat(additionalAncestorUnids)
  639. .ToArray();
  640. cua.AncestorUnids = thisAncestorUnids;
  641. }
  642. if (False)
  643. {
  644. var sb = new StringBuilder();
  645. foreach (ComparisonUnitAtom item in comparisonUnitAtomList)
  646. sb.Append(item.ToStringAncestorUnids()).Append(Environment.NewLine);
  647. string sbs = sb.ToString();
  648. TestUtil.NotePad(sbs);
  649. }
  650. }
  651. private static object ProduceNewWmlMarkupFromCorrelatedSequence(
  652. OpenXmlPart part,
  653. IEnumerable<ComparisonUnitAtom> comparisonUnitAtomList,
  654. WmlComparerSettings settings)
  655. {
  656. // fabricate new MainDocumentPart from correlatedSequence
  657. _maxId = 0;
  658. object newBodyChildren = CoalesceRecurse(part, comparisonUnitAtomList, 0, settings);
  659. return newBodyChildren;
  660. }
  661. private static void MarkContentAsDeletedOrInserted(XDocument newXDoc, WmlComparerSettings settings)
  662. {
  663. object newRoot = MarkContentAsDeletedOrInsertedTransform(newXDoc.Root, settings);
  664. newXDoc.Root?.ReplaceWith(newRoot);
  665. }
  666. private static object MarkContentAsDeletedOrInsertedTransform(XNode node, WmlComparerSettings settings)
  667. {
  668. if (node is XElement element)
  669. {
  670. if (element.Name == W.r)
  671. {
  672. List<string> statusList = element
  673. .DescendantsTrimmed(W.txbxContent)
  674. .Where(d => d.Name == W.t || d.Name == W.delText || AllowableRunChildren.Contains(d.Name))
  675. .Attributes(PtOpenXml.Status)
  676. .Select(a => (string) a)
  677. .Distinct()
  678. .ToList();
  679. if (statusList.Count() > 1)
  680. {
  681. throw new OpenXmlPowerToolsException(
  682. "Internal error - have both deleted and inserted text elements in the same run.");
  683. }
  684. if (statusList.Count == 0)
  685. {
  686. return new XElement(W.r,
  687. element.Attributes(),
  688. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)));
  689. }
  690. if (statusList.First() == "Deleted")
  691. {
  692. return new XElement(W.del,
  693. new XAttribute(W.author, settings.AuthorForRevisions),
  694. new XAttribute(W.id, _maxId++),
  695. new XAttribute(W.date, settings.DateTimeForRevisions),
  696. new XElement(W.r,
  697. element.Attributes(),
  698. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))));
  699. }
  700. if (statusList.First() == "Inserted")
  701. {
  702. return new XElement(W.ins,
  703. new XAttribute(W.author, settings.AuthorForRevisions),
  704. new XAttribute(W.id, _maxId++),
  705. new XAttribute(W.date, settings.DateTimeForRevisions),
  706. new XElement(W.r,
  707. element.Attributes(),
  708. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))));
  709. }
  710. }
  711. if (element.Name == W.pPr)
  712. {
  713. var status = (string) element.Attribute(PtOpenXml.Status);
  714. if (status == null)
  715. return new XElement(W.pPr,
  716. element.Attributes(),
  717. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)));
  718. var pPr = new XElement(element);
  719. if (status == "Deleted")
  720. {
  721. XElement rPr = pPr.Element(W.rPr);
  722. if (rPr == null)
  723. rPr = new XElement(W.rPr);
  724. rPr.Add(new XElement(W.del,
  725. new XAttribute(W.author, settings.AuthorForRevisions),
  726. new XAttribute(W.id, _maxId++),
  727. new XAttribute(W.date, settings.DateTimeForRevisions)));
  728. if (pPr.Element(W.rPr) != null)
  729. pPr.Element(W.rPr).ReplaceWith(rPr);
  730. else
  731. pPr.AddFirst(rPr);
  732. }
  733. else if (status == "Inserted")
  734. {
  735. XElement rPr = pPr.Element(W.rPr);
  736. if (rPr == null)
  737. rPr = new XElement(W.rPr);
  738. rPr.Add(new XElement(W.ins,
  739. new XAttribute(W.author, settings.AuthorForRevisions),
  740. new XAttribute(W.id, _maxId++),
  741. new XAttribute(W.date, settings.DateTimeForRevisions)));
  742. if (pPr.Element(W.rPr) != null)
  743. pPr.Element(W.rPr).ReplaceWith(rPr);
  744. else
  745. pPr.AddFirst(rPr);
  746. }
  747. else
  748. {
  749. throw new OpenXmlPowerToolsException("Internal error");
  750. }
  751. return pPr;
  752. }
  753. return new XElement(element.Name,
  754. element.Attributes(),
  755. element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)));
  756. }
  757. return node;
  758. }
  759. private static void CoalesceAdjacentRunsWithIdenticalFormatting(XDocument xDoc)
  760. {
  761. IEnumerable<XElement> paras = xDoc.Root.DescendantsTrimmed(W.txbxContent).Where(d => d.Name == W.p);
  762. foreach (XElement para in paras)
  763. {
  764. XElement newPara = WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(para);
  765. para.ReplaceNodes(newPara.Nodes());
  766. }
  767. }
  768. private static void IgnorePt14Namespace(XElement root)
  769. {
  770. if (root.Attribute(XNamespace.Xmlns + "pt14") == null)
  771. {
  772. root.Add(new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName));
  773. }
  774. var ignorable = (string) root.Attribute(MC.Ignorable);
  775. if (ignorable != null)
  776. {
  777. string[] list = ignorable.Split(' ');
  778. if (!list.Contains("pt14"))
  779. {
  780. ignorable += " pt14";
  781. root.Attribute(MC.Ignorable).Value = ignorable;
  782. }
  783. }
  784. else
  785. {
  786. root.Add(new XAttribute(MC.Ignorable, "pt14"));
  787. }
  788. }
  789. private static void ProcessFootnoteEndnote(
  790. WmlComparerSettings settings,
  791. List<ComparisonUnitAtom> listOfComparisonUnitAtoms,
  792. MainDocumentPart mainDocumentPartBefore,
  793. MainDocumentPart mainDocumentPartAfter,
  794. XDocument mainDocumentXDoc)
  795. {
  796. FootnotesPart footnotesPartBefore = mainDocumentPartBefore.FootnotesPart;
  797. EndnotesPart endnotesPartBefore = mainDocumentPartBefore.EndnotesPart;
  798. FootnotesPart footnotesPartAfter = mainDocumentPartAfter.FootnotesPart;
  799. EndnotesPart endnotesPartAfter = mainDocumentPartAfter.EndnotesPart;
  800. XDocument footnotesPartBeforeXDoc = null;
  801. if (footnotesPartBefore != null)
  802. footnotesPartBeforeXDoc = footnotesPartBefore.GetXDocument();
  803. XDocument footnotesPartAfterXDoc = null;
  804. if (footnotesPartAfter != null)
  805. footnotesPartAfterXDoc = footnotesPartAfter.GetXDocument();
  806. XDocument endnotesPartBeforeXDoc = null;
  807. if (endnotesPartBefore != null)
  808. endnotesPartBeforeXDoc = endnotesPartBefore.GetXDocument();
  809. XDocument endnotesPartAfterXDoc = null;
  810. if (endnotesPartAfter != null)
  811. endnotesPartAfterXDoc = endnotesPartAfter.GetXDocument();
  812. List<ComparisonUnitAtom> possiblyModifiedFootnotesEndNotes = listOfComparisonUnitAtoms
  813. .Where(cua =>
  814. cua.ContentElement.Name == W.footnoteReference ||
  815. cua.ContentElement.Name == W.endnoteReference)
  816. .ToList();
  817. foreach (ComparisonUnitAtom fn in possiblyModifiedFootnotesEndNotes)
  818. {
  819. string beforeId = null;
  820. if (fn.ContentElementBefore != null)
  821. beforeId = (string) fn.ContentElementBefore.Attribute(W.id);
  822. var afterId = (string) fn.ContentElement.Attribute(W.id);
  823. XElement footnoteEndnoteBefore = null;
  824. XElement footnoteEndnoteAfter = null;
  825. OpenXmlPart partToUseBefore = null;
  826. OpenXmlPart partToUseAfter = null;
  827. XDocument partToUseBeforeXDoc = null;
  828. XDocument partToUseAfterXDoc = null;
  829. if (fn.CorrelationStatus == CorrelationStatus.Equal)
  830. {
  831. if (fn.ContentElement.Name == W.footnoteReference)
  832. {
  833. footnoteEndnoteBefore = footnotesPartBeforeXDoc
  834. .Root
  835. .Elements()
  836. .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == beforeId);
  837. footnoteEndnoteAfter = footnotesPartAfterXDoc
  838. .Root
  839. .Elements()
  840. .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId);
  841. partToUseBefore = footnotesPartBefore;
  842. partToUseAfter = footnotesPartAfter;
  843. partToUseBeforeXDoc = footnotesPartBeforeXDoc;
  844. partToUseAfterXDoc = footnotesPartAfterXDoc;
  845. }
  846. else
  847. {
  848. footnoteEndnoteBefore = endnotesPartBeforeXDoc
  849. .Root
  850. .Elements()
  851. .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == beforeId);
  852. footnoteEndnoteAfter = endnotesPartAfterXDoc
  853. .Root
  854. .Elements()
  855. .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId);
  856. partToUseBefore = endnotesPartBefore;
  857. partToUseAfter = endnotesPartAfter;
  858. partToUseBeforeXDoc = endnotesPartBeforeXDoc;
  859. partToUseAfterXDoc = endnotesPartAfterXDoc;
  860. }
  861. AddSha1HashToBlockLevelContent(partToUseBefore, footnoteEndnoteBefore, settings);
  862. AddSha1HashToBlockLevelContent(partToUseAfter, footnoteEndnoteAfter, settings);
  863. ComparisonUnitAtom[] fncal1 = CreateComparisonUnitAtomList(partToUseBefore, footnoteEndnoteBefore, settings);
  864. ComparisonUnit[] fncus1 = GetComparisonUnitList(fncal1, settings);
  865. ComparisonUnitAtom[] fncal2 = CreateComparisonUnitAtomList(partToUseAfter, footnoteEndnoteAfter, settings);
  866. ComparisonUnit[] fncus2 = GetComparisonUnitList(fncal2, settings);
  867. if (!(fncus1.Length == 0 && fncus2.Length == 0))
  868. {
  869. List<CorrelatedSequence> fnCorrelatedSequence = Lcs(fncus1, fncus2, settings);
  870. if (False)
  871. {
  872. var sb = new StringBuilder();
  873. foreach (CorrelatedSequence item in fnCorrelatedSequence)
  874. sb.Append(item).Append(Environment.NewLine);
  875. string sbs = sb.ToString();
  876. TestUtil.NotePad(sbs);
  877. }
  878. // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore
  879. // when generating the document, the appropriate row will be marked as deleted or inserted.
  880. MarkRowsAsDeletedOrInserted(settings, fnCorrelatedSequence);
  881. // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal
  882. List<ComparisonUnitAtom> fnListOfComparisonUnitAtoms =
  883. FlattenToComparisonUnitAtomList(fnCorrelatedSequence, settings);
  884. if (False)
  885. {
  886. var sb = new StringBuilder();
  887. foreach (ComparisonUnitAtom item in fnListOfComparisonUnitAtoms)
  888. sb.Append(item + Environment.NewLine);
  889. string sbs = sb.ToString();
  890. TestUtil.NotePad(sbs);
  891. }
  892. // hack = set the guid ID of the table, row, or cell from the 'before' document to be equal to the 'after' document.
  893. // note - we don't want to do the hack until after flattening all of the groups. At the end of the flattening, we should simply
  894. // have a list of ComparisonUnitAtoms, appropriately marked as equal, inserted, or deleted.
  895. // the table id will be hacked in the normal course of events.
  896. // in the case where a row is deleted, not necessary to hack - the deleted row ID will do.
  897. // in the case where a row is inserted, not necessary to hack - the inserted row ID will do as well.
  898. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms);
  899. object newFootnoteEndnoteChildren =
  900. ProduceNewWmlMarkupFromCorrelatedSequence(partToUseAfter, fnListOfComparisonUnitAtoms, settings);
  901. var tempElement = new XElement(W.body, newFootnoteEndnoteChildren);
  902. bool hasFootnoteReference = tempElement.Descendants(W.r).Any(r =>
  903. {
  904. var b = false;
  905. if ((string) r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() ==
  906. "FootnoteReference")
  907. b = true;
  908. if (r.Descendants(W.footnoteRef).Any())
  909. b = true;
  910. return b;
  911. });
  912. if (!hasFootnoteReference)
  913. {
  914. XElement firstPara = tempElement.Descendants(W.p).FirstOrDefault();
  915. if (firstPara != null)
  916. {
  917. XElement firstRun = firstPara.Element(W.r);
  918. if (firstRun != null)
  919. {
  920. if (fn.ContentElement.Name == W.footnoteReference)
  921. firstRun.AddBeforeSelf(
  922. new XElement(W.r,
  923. new XElement(W.rPr,
  924. new XElement(W.rStyle,
  925. new XAttribute(W.val, "FootnoteReference"))),
  926. new XElement(W.footnoteRef)));
  927. else
  928. firstRun.AddBeforeSelf(
  929. new XElement(W.r,
  930. new XElement(W.rPr,
  931. new XElement(W.rStyle,
  932. new XAttribute(W.val, "EndnoteReference"))),
  933. new XElement(W.endnoteRef)));
  934. }
  935. }
  936. }
  937. var newTempElement = (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement);
  938. XElement newContentElement = newTempElement.Descendants()
  939. .FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote);
  940. if (newContentElement == null)
  941. throw new OpenXmlPowerToolsException("Internal error");
  942. footnoteEndnoteAfter.ReplaceNodes(newContentElement.Nodes());
  943. }
  944. }
  945. else if (fn.CorrelationStatus == CorrelationStatus.Inserted)
  946. {
  947. if (fn.ContentElement.Name == W.footnoteReference)
  948. {
  949. footnoteEndnoteAfter = footnotesPartAfterXDoc
  950. .Root
  951. .Elements()
  952. .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId);
  953. partToUseAfter = footnotesPartAfter;
  954. partToUseAfterXDoc = footnotesPartAfterXDoc;
  955. }
  956. else
  957. {
  958. footnoteEndnoteAfter = endnotesPartAfterXDoc
  959. .Root
  960. .Elements()
  961. .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId);
  962. partToUseAfter = endnotesPartAfter;
  963. partToUseAfterXDoc = endnotesPartAfterXDoc;
  964. }
  965. AddSha1HashToBlockLevelContent(partToUseAfter, footnoteEndnoteAfter, settings);
  966. ComparisonUnitAtom[] fncal2 = CreateComparisonUnitAtomList(partToUseAfter, footnoteEndnoteAfter, settings);
  967. ComparisonUnit[] fncus2 = GetComparisonUnitList(fncal2, settings);
  968. var insertedCorrSequ = new List<CorrelatedSequence>
  969. {
  970. new CorrelatedSequence
  971. {
  972. ComparisonUnitArray1 = null,
  973. ComparisonUnitArray2 = fncus2,
  974. CorrelationStatus = CorrelationStatus.Inserted
  975. }
  976. };
  977. if (False)
  978. {
  979. var sb = new StringBuilder();
  980. foreach (CorrelatedSequence item in insertedCorrSequ)
  981. sb.Append(item).Append(Environment.NewLine);
  982. string sbs = sb.ToString();
  983. TestUtil.NotePad(sbs);
  984. }
  985. MarkRowsAsDeletedOrInserted(settings, insertedCorrSequ);
  986. List<ComparisonUnitAtom> fnListOfComparisonUnitAtoms =
  987. FlattenToComparisonUnitAtomList(insertedCorrSequ, settings);
  988. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms);
  989. object newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseAfter,
  990. fnListOfComparisonUnitAtoms, settings);
  991. var tempElement = new XElement(W.body, newFootnoteEndnoteChildren);
  992. bool hasFootnoteReference = tempElement.Descendants(W.r).Any(r =>
  993. {
  994. var b = false;
  995. if ((string) r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() ==
  996. "FootnoteReference")
  997. b = true;
  998. if (r.Descendants(W.footnoteRef).Any())
  999. b = true;
  1000. return b;
  1001. });
  1002. if (!hasFootnoteReference)
  1003. {
  1004. XElement firstPara = tempElement.Descendants(W.p).FirstOrDefault();
  1005. if (firstPara != null)
  1006. {
  1007. XElement firstRun = firstPara.Descendants(W.r).FirstOrDefault();
  1008. if (firstRun != null)
  1009. {
  1010. if (fn.ContentElement.Name == W.footnoteReference)
  1011. firstRun.AddBeforeSelf(
  1012. new XElement(W.r,
  1013. new XElement(W.rPr,
  1014. new XElement(W.rStyle,
  1015. new XAttribute(W.val, "FootnoteReference"))),
  1016. new XElement(W.footnoteRef)));
  1017. else
  1018. firstRun.AddBeforeSelf(
  1019. new XElement(W.r,
  1020. new XElement(W.rPr,
  1021. new XElement(W.rStyle,
  1022. new XAttribute(W.val, "EndnoteReference"))),
  1023. new XElement(W.endnoteRef)));
  1024. }
  1025. }
  1026. }
  1027. var newTempElement = (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement);
  1028. XElement newContentElement = newTempElement
  1029. .Descendants()
  1030. .FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote);
  1031. if (newContentElement == null)
  1032. throw new OpenXmlPowerToolsException("Internal error");
  1033. footnoteEndnoteAfter.ReplaceNodes(newContentElement.Nodes());
  1034. }
  1035. else if (fn.CorrelationStatus == CorrelationStatus.Deleted)
  1036. {
  1037. if (fn.ContentElement.Name == W.footnoteReference)
  1038. {
  1039. footnoteEndnoteBefore = footnotesPartBeforeXDoc
  1040. .Root
  1041. .Elements()
  1042. .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId);
  1043. partToUseAfter = footnotesPartAfter;
  1044. partToUseAfterXDoc = footnotesPartAfterXDoc;
  1045. }
  1046. else
  1047. {
  1048. footnoteEndnoteBefore = endnotesPartBeforeXDoc
  1049. .Root
  1050. .Elements()
  1051. .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId);
  1052. partToUseBefore = endnotesPartBefore;
  1053. partToUseBeforeXDoc = endnotesPartBeforeXDoc;
  1054. }
  1055. AddSha1HashToBlockLevelContent(partToUseBefore, footnoteEndnoteBefore, settings);
  1056. ComparisonUnitAtom[] fncal2 = CreateComparisonUnitAtomList(partToUseBefore, footnoteEndnoteBefore, settings);
  1057. ComparisonUnit[] fncus2 = GetComparisonUnitList(fncal2, settings);
  1058. var deletedCorrSequ = new List<CorrelatedSequence>
  1059. {
  1060. new CorrelatedSequence
  1061. {
  1062. ComparisonUnitArray1 = fncus2,
  1063. ComparisonUnitArray2 = null,
  1064. CorrelationStatus = CorrelationStatus.Deleted
  1065. }
  1066. };
  1067. if (False)
  1068. {
  1069. var sb = new StringBuilder();
  1070. foreach (CorrelatedSequence item in deletedCorrSequ)
  1071. sb.Append(item).Append(Environment.NewLine);
  1072. string sbs = sb.ToString();
  1073. TestUtil.NotePad(sbs);
  1074. }
  1075. MarkRowsAsDeletedOrInserted(settings, deletedCorrSequ);
  1076. List<ComparisonUnitAtom> fnListOfComparisonUnitAtoms =
  1077. FlattenToComparisonUnitAtomList(deletedCorrSequ, settings);
  1078. if (fnListOfComparisonUnitAtoms.Any())
  1079. {
  1080. AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms);
  1081. object newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseBefore,
  1082. fnListOfComparisonUnitAtoms, settings);
  1083. var tempElement = new XElement(W.body, newFootnoteEndnoteChildren);
  1084. bool hasFootnoteReference = tempElement.Descendants(W.r).Any(r =>
  1085. {
  1086. var b = false;
  1087. if ((string) r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() ==
  1088. "FootnoteReference")
  1089. b = true;
  1090. if (r.Descendants(W.footnoteRef).Any())
  1091. b = true;
  1092. return b;
  1093. });
  1094. if (!hasFootnoteReference)
  1095. {
  1096. XElement firstPara = tempElement.Descendants(W.p).FirstOrDefault();
  1097. if (firstPara != null)
  1098. {
  1099. XElement firstRun = firstPara.Descendants(W.r).FirstOrDefault();
  1100. if (firstRun != null)
  1101. {
  1102. if (fn.ContentElement.Name == W.footnoteReference)
  1103. firstRun.AddBeforeSelf(
  1104. new XElement(W.r,
  1105. new XElement(W.rPr,
  1106. new XElement(W.rStyle,
  1107. new XAttribute(W.val, "FootnoteReference"))),
  1108. new XElement(W.footnoteRef)));
  1109. else
  1110. firstRun.AddBeforeSelf(
  1111. new XElement(W.r,
  1112. new XElement(W.rPr,
  1113. new XElement(W.rStyle,
  1114. new XAttribute(W.val, "EndnoteReference"))),
  1115. new XElement(W.endnoteRef)));
  1116. }
  1117. }
  1118. }
  1119. var newTempElement = (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement);
  1120. XElement newContentElement = newTempElement.Descendants()
  1121. .FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote);
  1122. if (newContentElement == null)
  1123. throw new OpenXmlPowerToolsException("Internal error");
  1124. footnoteEndnoteBefore.ReplaceNodes(newContentElement.Nodes());
  1125. }
  1126. }
  1127. else
  1128. {
  1129. throw new OpenXmlPowerToolsException("Internal error");
  1130. }
  1131. }
  1132. }
  1133. private static void RectifyFootnoteEndnoteIds(
  1134. MainDocumentPart mainDocumentPartBefore,
  1135. MainDocumentPart mainDocumentPartAfter,
  1136. MainDocumentPart mainDocumentPartWithRevisions,
  1137. XDocument mainDocumentXDoc,
  1138. WmlComparerSettings settings)
  1139. {
  1140. FootnotesPart footnotesPartBefore = mainDocumentPartBefore.FootnotesPart;
  1141. EndnotesPart endnotesPartBefore = mainDocumentPartBefore.EndnotesPart;
  1142. FootnotesPart footnotesPartAfter = mainDocumentPartAfter.FootnotesPart;
  1143. EndnotesPart endnotesPartAfter = mainDocumentPartAfter.EndnotesPart;
  1144. FootnotesPart footnotesPartWithRevisions = mainDocumentPartWithRevisions.FootnotesPart;
  1145. EndnotesPart endnotesPartWithRevisions = mainDocumentPartWithRevisions.EndnotesPart;
  1146. XDocument footnotesPartBeforeXDoc = null;
  1147. if (footnotesPartBefore != null)
  1148. footnotesPartBeforeXDoc = footnotesPartBefore.GetXDocument();
  1149. XDocument footnotesPartAfterXDoc = null;
  1150. if (footnotesPartAfter != null)
  1151. footnotesPartAfterXDoc = footnotesPartAfter.GetXDocument();
  1152. XDocument footnotesPartWithRevisionsXDoc = null;
  1153. if (footnotesPartWithRevisions != null)
  1154. {
  1155. footnotesPartWithRevisionsXDoc = footnotesPartWithRevisions.GetXDocument();
  1156. footnotesPartWithRevisionsXDoc
  1157. .Root
  1158. .Elements(W.footnote)
  1159. .Where(e => (string) e.Attribute(W.id) != "-1" && (string) e.Attribute(W.id) != "0")
  1160. .Remove();
  1161. }
  1162. XDocument endnotesPartBeforeXDoc = null;
  1163. if (endnotesPartBefore != null)
  1164. endnotesPartBeforeXDoc = endnotesPartBefore.GetXDocument();
  1165. XDocument endnotesPartAfterXDoc = null;
  1166. if (endnotesPartAfter != null)
  1167. endnotesPartAfterXDoc = endnotesPartAfter.GetXDocument();
  1168. XDocument endnotesPartWithRevisionsXDoc = null;
  1169. if (endnotesPartWithRevisions != null)
  1170. {
  1171. endnotesPartWithRevisionsXDoc = endnotesPartWithRevisions.GetXDocument();
  1172. endnotesPartWithRevisionsXDoc
  1173. .Root
  1174. .Elements(W.endnote)
  1175. .Where(e => (string) e.Attribute(W.id) != "-1" && (string) e.Attribute(W.id) != "0")
  1176. .Remove();
  1177. }
  1178. var footnotesRefs = mainDocumentXDoc
  1179. .Descendants(W.footnoteReference)
  1180. .Select((fn, idx) =>
  1181. {
  1182. return new
  1183. {
  1184. FootNote = fn,
  1185. Idx = idx
  1186. };
  1187. });
  1188. foreach (var fn in footnotesRefs)
  1189. {
  1190. var oldId = (string) fn.FootNote.Attribute(W.id);
  1191. string newId = (fn.Idx + 1).ToString();
  1192. fn.FootNote.Attribute(W.id).Value = newId;
  1193. XElement footnote = footnotesPartAfterXDoc
  1194. .Root
  1195. .Elements()
  1196. .FirstOrDefault(e => (string) e.Attribute(W.id) == oldId);
  1197. if (footnote == null)
  1198. {
  1199. footnote = footnotesPartBeforeXDoc
  1200. .Root
  1201. .Elements()
  1202. .FirstOrDefault(e => (string) e.Attribute(W.id) == oldId);
  1203. }
  1204. if (footnote == null)
  1205. throw new OpenXmlPowerToolsException("Internal error");
  1206. var cloned = new XElement(footnote);
  1207. cloned.Attribute(W.id).Value = newId;
  1208. footnotesPartWithRevisionsXDoc
  1209. .Root
  1210. .Add(cloned);
  1211. }
  1212. var endnotesRefs = mainDocumentXDoc
  1213. .Descendants(W.endnoteReference)
  1214. .Select((fn, idx) =>
  1215. {
  1216. return new
  1217. {
  1218. Endnote = fn,
  1219. Idx = idx
  1220. };
  1221. });
  1222. foreach (var fn in endnotesRefs)
  1223. {
  1224. var oldId = (string) fn.Endnote.Attribute(W.id);
  1225. string newId = (fn.Idx + 1).ToString();
  1226. fn.Endnote.Attribute(W.id).Value = newId;
  1227. XElement endnote = endnotesPartAfterXDoc
  1228. .Root
  1229. .Elements()
  1230. .FirstOrDefault(e => (string) e.Attribute(W.id) == oldId);
  1231. if (endnote == null)
  1232. {
  1233. endnote = endnotesPartBeforeXDoc
  1234. .Root
  1235. .Elements()
  1236. .FirstOrDefault(e => (string) e.Attribute(W.id) == oldId);
  1237. }
  1238. if (endnote == null)
  1239. throw new OpenXmlPowerToolsException("Internal error");
  1240. var cloned = new XElement(endnote);
  1241. cloned.Attribute(W.id).Value = newId;
  1242. endnotesPartWithRevisionsXDoc
  1243. .Root
  1244. .Add(cloned);
  1245. }
  1246. if (footnotesPartWithRevisionsXDoc != null)
  1247. {
  1248. MarkContentAsDeletedOrInserted(footnotesPartWithRevisionsXDoc, settings);
  1249. CoalesceAdjacentRunsWithIdenticalFormatting(footnotesPartWithRevisionsXDoc);
  1250. var newXDocRoot =
  1251. (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(footnotesPartWithRevisionsXDoc.Root);
  1252. footnotesPartWithRevisionsXDoc.Root.ReplaceWith(newXDocRoot);
  1253. IgnorePt14Namespace(footnotesPartWithRevisionsXDoc.Root);
  1254. footnotesPartWithRevisions.PutXDocument();
  1255. }
  1256. if (endnotesPartWithRevisionsXDoc != null)
  1257. {
  1258. MarkContentAsDeletedOrInserted(endnotesPartWithRevisionsXDoc, settings);
  1259. CoalesceAdjacentRunsWithIdenticalFormatting(endnotesPartWithRevisionsXDoc);
  1260. var newXDocRoot = (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(endnotesPartWithRevisionsXDoc.Root);
  1261. endnotesPartWithRevisionsXDoc.Root.ReplaceWith(newXDocRoot);
  1262. IgnorePt14Namespace(endnotesPartWithRevisionsXDoc.Root);
  1263. endnotesPartWithRevisions.PutXDocument();
  1264. }
  1265. }
  1266. private static void ConjoinDeletedInsertedParagraphMarks(MainDocumentPart mainDocumentPart, XDocument newXDoc)
  1267. {
  1268. ConjoinMultipleParagraphMarks(newXDoc);
  1269. if (mainDocumentPart.FootnotesPart != null)
  1270. {
  1271. XDocument fnXDoc = mainDocumentPart.FootnotesPart.GetXDocument();
  1272. ConjoinMultipleParagraphMarks(fnXDoc);
  1273. mainDocumentPart.FootnotesPart.PutXDocument();
  1274. }
  1275. if (mainDocumentPart.EndnotesPart != null)
  1276. {
  1277. XDocument fnXDoc = mainDocumentPart.EndnotesPart.GetXDocument();
  1278. ConjoinMultipleParagraphMarks(fnXDoc);
  1279. mainDocumentPart.EndnotesPart.PutXDocument();
  1280. }
  1281. }
  1282. // it is possible, per the algorithm, for the algorithm to find that the paragraph mark for a single paragraph has been
  1283. // inserted and deleted. If the algorithm sets them to equal, then sometimes it will equate paragraph marks that should
  1284. // not be equated.
  1285. private static void ConjoinMultipleParagraphMarks(XDocument xDoc)
  1286. {
  1287. object newRoot = ConjoinTransform(xDoc.Root);
  1288. xDoc.Root?.ReplaceWith(newRoot);
  1289. }
  1290. private static object ConjoinTransform(XNode node)
  1291. {
  1292. if (node is XElement element)
  1293. {
  1294. if (element.Name == W.p && element.Elements(W.pPr).Count() >= 2)
  1295. {
  1296. var pPr = new XElement(element.Elements(W.pPr).First());
  1297. pPr.Elements(W.rPr).Elements().Where(r => r.Name == W.ins || r.Name == W.del).Remove();
  1298. pPr.Attributes(PtOpenXml.Status).Remove();
  1299. var newPara = new XElement(W.p,
  1300. element.Attributes(),
  1301. pPr,
  1302. element.Elements().Where(c => c.Name != W.pPr));
  1303. return newPara;
  1304. }
  1305. return new XElement(element.Name,
  1306. element.Attributes(),
  1307. element.Nodes().Select(ConjoinTransform));
  1308. }
  1309. return node;
  1310. }
  1311. private static void FixUpRevisionIds(WordprocessingDocument wDocWithRevisions, XDocument newXDoc)
  1312. {
  1313. IEnumerable<XElement> footnoteRevisions = Enumerable.Empty<XElement>();
  1314. if (wDocWithRevisions.MainDocumentPart.FootnotesPart != null)
  1315. {
  1316. XDocument fnxd = wDocWithRevisions.MainDocumentPart.FootnotesPart.GetXDocument();
  1317. footnoteRevisions = fnxd
  1318. .Descendants()
  1319. .Where(d => d.Name == W.ins || d.Name == W.del);
  1320. }
  1321. IEnumerable<XElement> endnoteRevisions = Enumerable.Empty<XElement>();
  1322. if (wDocWithRevisions.MainDocumentPart.EndnotesPart != null)
  1323. {
  1324. XDocument fnxd = wDocWithRevisions.MainDocumentPart.EndnotesPart.GetXDocument();
  1325. endnoteRevisions = fnxd
  1326. .Descendants()
  1327. .Where(d => d.Name == W.ins || d.Name == W.del);
  1328. }
  1329. IEnumerable<XElement> mainRevisions = newXDoc
  1330. .Descendants()
  1331. .Where(d => d.Name == W.ins || d.Name == W.del);
  1332. var allRevisions = mainRevisions
  1333. .Concat(footnoteRevisions)
  1334. .Concat(endnoteRevisions)
  1335. .Select((r, i) =>
  1336. {
  1337. return new
  1338. {
  1339. Rev = r,
  1340. Idx = i + 1
  1341. };
  1342. });
  1343. foreach (var item in allRevisions)
  1344. item.Rev.Attribute(W.id).Value = item.Idx.ToString();
  1345. if (wDocWithRevisions.MainDocumentPart.FootnotesPart != null)
  1346. wDocWithRevisions.MainDocumentPart.FootnotesPart.PutXDocument();
  1347. if (wDocWithRevisions.MainDocumentPart.EndnotesPart != null)
  1348. wDocWithRevisions.MainDocumentPart.EndnotesPart.PutXDocument();
  1349. }
  1350. private static void MoveLastSectPrToChildOfBody(XDocument newXDoc)
  1351. {
  1352. XElement lastParaWithSectPr = newXDoc
  1353. .Root
  1354. .Elements(W.body)
  1355. .Elements(W.p)
  1356. .Where(p => p.Elements(W.pPr).Elements(W.sectPr).Any())
  1357. .LastOrDefault();
  1358. if (lastParaWithSectPr != null)
  1359. {
  1360. newXDoc.Root.Element(W.body).Add(lastParaWithSectPr.Elements(W.pPr).Elements(W.sectPr));
  1361. lastParaWithSectPr.Elements(W.pPr).Elements(W.sectPr).Remove();
  1362. }
  1363. }
  1364. private static void FixUpFootnotesEndnotesWithCustomMarkers(WordprocessingDocument wDocWithRevisions)
  1365. {
  1366. #if FALSE
  1367. // this needs to change
  1368. <w:del w:author = "Open-Xml-PowerTools"
  1369. w:id = "7"
  1370. w:date = "2017-06-07T12:23:22.8601285-07:00">
  1371. <w:r>
  1372. <w:rPr pt14:Unid = "ec75a71361c84562a757eee8b28fc229">
  1373. <w:rFonts w:cs = "Times New Roman Bold"
  1374. pt14:Unid = "16bb355df5964ba09854f9152c97242b" />
  1375. <w:b w:val = "0"
  1376. pt14:Unid = "9abcec54ad414791a5627cbb198e8aa9" />
  1377. <w:bCs pt14:Unid = "71ecd2eba85e4bfaa92b3d618e2f8829" />
  1378. <w:position w:val = "6"
  1379. pt14:Unid = "61793f6a5f494700b7f2a3a753ce9055" />
  1380. <w:sz w:val = "16"
  1381. pt14:Unid = "60b3cd020c214d0ea07e5a68ae0e4efe" />
  1382. <w:szCs w:val = "16"
  1383. pt14:Unid = "9ae61a724de44a75868180aac44ea380" />
  1384. </w:rPr>
  1385. <w:footnoteReference w:customMarkFollows = "1"
  1386. w:id = "1"
  1387. pt14:Status = "Deleted" />
  1388. </w:r>
  1389. </w:del>
  1390. <w:del w:author = "Open-Xml-PowerTools"
  1391. w:id = "8"
  1392. w:date = "2017-06-07T12:23:22.8601285-07:00">
  1393. <w:r>
  1394. <w:rPr pt14:Unid = "445caef74a624e588e7adaa6d7775639">
  1395. <w:rFonts w:cs = "Times New Roman Bold"
  1396. pt14:Unid = "5920885f8ec44c53bcaece2de7eafda2" />
  1397. <w:b w:val = "0"
  1398. pt14:Unid = "023a29e2e6d44c3b8c5df47317ace4c6" />
  1399. <w:bCs pt14:Unid = "e96e37daf9174b268ef4731df831df7d" />
  1400. <w:position w:val = "6"
  1401. pt14:Unid = "be3f8ff7ed0745ae9340bb2706b28b1f" />
  1402. <w:sz w:val = "16"
  1403. pt14:Unid = "6fbbde024e7c46b9b72435ae50065459" />
  1404. <w:szCs w:val = "16"
  1405. pt14:Unid = "cc82e7bd75f441f2b609eae0672fb285" />
  1406. </w:rPr>
  1407. <w:delText>1</w:delText>
  1408. </w:r>
  1409. </w:del>
  1410. // to this
  1411. <w:del w:author = "Open-Xml-PowerTools"
  1412. w:id = "7"
  1413. w:date = "2017-06-07T12:23:22.8601285-07:00">
  1414. <w:r>
  1415. <w:rPr pt14:Unid = "ec75a71361c84562a757eee8b28fc229">
  1416. <w:rFonts w:cs = "Times New Roman Bold"
  1417. pt14:Unid = "16bb355df5964ba09854f9152c97242b" />
  1418. <w:b w:val = "0"
  1419. pt14:Unid = "9abcec54ad414791a5627cbb198e8aa9" />
  1420. <w:bCs pt14:Unid = "71ecd2eba85e4bfaa92b3d618e2f8829" />
  1421. <w:position w:val = "6"
  1422. pt14:Unid = "61793f6a5f494700b7f2a3a753ce9055" />
  1423. <w:sz w:val = "16"
  1424. pt14:Unid = "60b3cd020c214d0ea07e5a68ae0e4efe" />
  1425. <w:szCs w:val = "16"
  1426. pt14:Unid = "9ae61a724de44a75868180aac44ea380" />
  1427. </w:rPr>
  1428. <w:footnoteReference w:customMarkFollows = "1"
  1429. w:id = "1"
  1430. pt14:Status = "Deleted" />
  1431. <w:delText>1</w:delText>
  1432. </w:r>
  1433. </w:del>
  1434. #endif
  1435. // this is pretty random - a bug in Word prevents display of a document if the delText element does not immediately follow the footnoteReference element, in the same run.
  1436. XDocument mainXDoc = wDocWithRevisions.MainDocumentPart.GetXDocument();
  1437. var newRoot = (XElement) FootnoteEndnoteReferenceCleanupTransform(mainXDoc.Root);
  1438. mainXDoc.Root?.ReplaceWith(newRoot);
  1439. wDocWithRevisions.MainDocumentPart.PutXDocument();
  1440. }
  1441. private static object FootnoteEndnoteReferenceCleanupTransform(XNode node)
  1442. {
  1443. var element = node as XElement;
  1444. if (element != null)
  1445. {
  1446. // small optimization to eliminate the work for most elements
  1447. if (element.Element(W.del) != null || element.Element(W.ins) != null)
  1448. {
  1449. bool hasFootnoteEndnoteReferencesThatNeedCleanedUp = element
  1450. .Elements()
  1451. .Where(e => e.Name == W.del || e.Name == W.ins)
  1452. .Elements(W.r)
  1453. .Elements()
  1454. .Where(e => e.Name == W.footnoteReference || e.Name == W.endnoteReference)
  1455. .Attributes(W.customMarkFollows)
  1456. .Any();
  1457. if (hasFootnoteEndnoteReferencesThatNeedCleanedUp)
  1458. {
  1459. var clone = new XElement(element.Name,
  1460. element.Attributes(),
  1461. element.Nodes().Select(n => FootnoteEndnoteReferenceCleanupTransform(n)));
  1462. IEnumerable<XElement> footnoteEndnoteReferencesToAdjust = clone
  1463. .Descendants()
  1464. .Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference)
  1465. .Where(d => d.Attribute(W.customMarkFollows) != null);
  1466. foreach (XElement fnenr in footnoteEndnoteReferencesToAdjust)
  1467. {
  1468. XElement par = fnenr.Parent;
  1469. XElement gp = fnenr.Parent.Parent;
  1470. if (par.Name == W.r &&
  1471. gp.Name == W.del)
  1472. {
  1473. if (par.Element(W.delText) != null)
  1474. continue;
  1475. XElement afterGp = gp.ElementsAfterSelf().FirstOrDefault();
  1476. if (afterGp == null)
  1477. continue;
  1478. IEnumerable<XElement> afterGpDelText = afterGp.Elements(W.r).Elements(W.delText);
  1479. if (afterGpDelText.Any())
  1480. {
  1481. par.Add(afterGpDelText); // this will clone and add to run that contains the reference
  1482. afterGpDelText.Remove(); // this leaves an empty run, does not matter.
  1483. }
  1484. }
  1485. if (par.Name == W.r &&
  1486. gp.Name == W.ins)
  1487. {
  1488. if (par.Element(W.t) != null)
  1489. continue;
  1490. XElement afterGp = gp.ElementsAfterSelf().FirstOrDefault();
  1491. if (afterGp == null)
  1492. continue;
  1493. IEnumerable<XElement> afterGpText = afterGp.Elements(W.r).Elements(W.t);
  1494. if (afterGpText.Any())
  1495. {
  1496. par.Add(afterGpText); // this will clone and add to run that contains the reference
  1497. afterGpText.Remove(); // this leaves an empty run, does not matter.
  1498. }
  1499. }
  1500. }
  1501. return clone;
  1502. }
  1503. }
  1504. else
  1505. {
  1506. return new XElement(element.Name,
  1507. element.Attributes(),
  1508. element.Nodes().Select(n => FootnoteEndnoteReferenceCleanupTransform(n)));
  1509. }
  1510. }
  1511. return node;
  1512. }
  1513. private static void FixUpRevMarkIds(WordprocessingDocument wDoc)
  1514. {
  1515. IEnumerable<XElement> revMarksToChange = wDoc
  1516. .ContentParts()
  1517. .Select(cp => cp.GetXDocument())
  1518. .Select(xd => xd.Descendants().Where(d => d.Name == W.ins || d.Name == W.del))
  1519. .SelectMany(m => m);
  1520. var nextId = 0;
  1521. foreach (XElement item in revMarksToChange)
  1522. {
  1523. XAttribute idAtt = item.Attribute(W.id);
  1524. if (idAtt != null)
  1525. idAtt.Value = nextId++.ToString();
  1526. }
  1527. foreach (OpenXmlPart cp in wDoc.ContentParts())
  1528. cp.PutXDocument();
  1529. }
  1530. private static void FixUpDocPrIds(WordprocessingDocument wDoc)
  1531. {
  1532. XName elementToFind = WP.docPr;
  1533. IEnumerable<XElement> docPrToChange = wDoc
  1534. .ContentParts()
  1535. .Select(cp => cp.GetXDocument())
  1536. .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind))
  1537. .SelectMany(m => m);
  1538. var nextId = 1;
  1539. foreach (XElement item in docPrToChange)
  1540. {
  1541. XAttribute idAtt = item.Attribute("id");
  1542. if (idAtt != null)
  1543. idAtt.Value = nextId++.ToString();
  1544. }
  1545. foreach (OpenXmlPart cp in wDoc.ContentParts())
  1546. cp.PutXDocument();
  1547. }
  1548. private static void FixUpShapeIds(WordprocessingDocument wDoc)
  1549. {
  1550. XName elementToFind = VML.shape;
  1551. IEnumerable<XElement> shapeIdsToChange = wDoc
  1552. .ContentParts()
  1553. .Select(cp => cp.GetXDocument())
  1554. .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind))
  1555. .SelectMany(m => m);
  1556. var nextId = 1;
  1557. foreach (XElement item in shapeIdsToChange)
  1558. {
  1559. int thisId = nextId++;
  1560. XAttribute idAtt = item.Attribute("id");
  1561. if (idAtt != null)
  1562. idAtt.Value = thisId.ToString();
  1563. XElement oleObject = item.Parent.Element(O.OLEObject);
  1564. if (oleObject != null)
  1565. {
  1566. XAttribute shapeIdAtt = oleObject.Attribute("ShapeID");
  1567. if (shapeIdAtt != null)
  1568. shapeIdAtt.Value = thisId.ToString();
  1569. }
  1570. }
  1571. foreach (OpenXmlPart cp in wDoc.ContentParts())
  1572. cp.PutXDocument();
  1573. }
  1574. private static void FixUpShapeTypeIds(WordprocessingDocument wDoc)
  1575. {
  1576. XName elementToFind = VML.shapetype;
  1577. IEnumerable<XElement> shapeTypeIdsToChange = wDoc
  1578. .ContentParts()
  1579. .Select(cp => cp.GetXDocument())
  1580. .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind))
  1581. .SelectMany(m => m);
  1582. var nextId = 1;
  1583. foreach (XElement item in shapeTypeIdsToChange)
  1584. {
  1585. int thisId = nextId++;
  1586. XAttribute idAtt = item.Attribute("id");
  1587. if (idAtt != null)
  1588. idAtt.Value = thisId.ToString();
  1589. XElement shape = item.Parent.Element(VML.shape);
  1590. if (shape != null)
  1591. {
  1592. XAttribute typeAtt = shape.Attribute("type");
  1593. if (typeAtt != null)
  1594. typeAtt.Value = thisId.ToString();
  1595. }
  1596. }
  1597. foreach (OpenXmlPart cp in wDoc.ContentParts())
  1598. cp.PutXDocument();
  1599. }
  1600. private static void AddFootnotesEndnotesStyles(WordprocessingDocument wDocWithRevisions)
  1601. {
  1602. XDocument mainXDoc = wDocWithRevisions.MainDocumentPart.GetXDocument();
  1603. bool hasFootnotes = mainXDoc.Descendants(W.footnoteReference).Any();
  1604. bool hasEndnotes = mainXDoc.Descendants(W.endnoteReference).Any();
  1605. StyleDefinitionsPart styleDefinitionsPart = wDocWithRevisions.MainDocumentPart.StyleDefinitionsPart;
  1606. XDocument sXDoc = styleDefinitionsPart.GetXDocument();
  1607. if (hasFootnotes)
  1608. {
  1609. XElement footnoteTextStyle = sXDoc
  1610. .Root
  1611. .Elements(W.style)
  1612. .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "FootnoteText");
  1613. if (footnoteTextStyle == null)
  1614. {
  1615. var footnoteTextStyleMarkup =
  1616. @"<w:style w:type=""paragraph""
  1617. w:styleId=""FootnoteText""
  1618. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1619. <w:name w:val=""footnote text""/>
  1620. <w:basedOn w:val=""Normal""/>
  1621. <w:link w:val=""FootnoteTextChar""/>
  1622. <w:uiPriority w:val=""99""/>
  1623. <w:semiHidden/>
  1624. <w:unhideWhenUsed/>
  1625. <w:pPr>
  1626. <w:spacing w:after=""0""
  1627. w:line=""240""
  1628. w:lineRule=""auto""/>
  1629. </w:pPr>
  1630. <w:rPr>
  1631. <w:sz w:val=""20""/>
  1632. <w:szCs w:val=""20""/>
  1633. </w:rPr>
  1634. </w:style>";
  1635. XElement ftsElement = XElement.Parse(footnoteTextStyleMarkup);
  1636. sXDoc.Root.Add(ftsElement);
  1637. }
  1638. XElement footnoteTextCharStyle = sXDoc
  1639. .Root
  1640. .Elements(W.style)
  1641. .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "FootnoteTextChar");
  1642. if (footnoteTextCharStyle == null)
  1643. {
  1644. var footnoteTextCharStyleMarkup =
  1645. @"<w:style w:type=""character""
  1646. w:customStyle=""1""
  1647. w:styleId=""FootnoteTextChar""
  1648. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1649. <w:name w:val=""Footnote Text Char""/>
  1650. <w:basedOn w:val=""DefaultParagraphFont""/>
  1651. <w:link w:val=""FootnoteText""/>
  1652. <w:uiPriority w:val=""99""/>
  1653. <w:semiHidden/>
  1654. <w:rPr>
  1655. <w:sz w:val=""20""/>
  1656. <w:szCs w:val=""20""/>
  1657. </w:rPr>
  1658. </w:style>";
  1659. XElement fntcsElement = XElement.Parse(footnoteTextCharStyleMarkup);
  1660. sXDoc.Root.Add(fntcsElement);
  1661. }
  1662. XElement footnoteReferenceStyle = sXDoc
  1663. .Root
  1664. .Elements(W.style)
  1665. .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "FootnoteReference");
  1666. if (footnoteReferenceStyle == null)
  1667. {
  1668. var footnoteReferenceStyleMarkup =
  1669. @"<w:style w:type=""character""
  1670. w:styleId=""FootnoteReference""
  1671. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1672. <w:name w:val=""footnote reference""/>
  1673. <w:basedOn w:val=""DefaultParagraphFont""/>
  1674. <w:uiPriority w:val=""99""/>
  1675. <w:semiHidden/>
  1676. <w:unhideWhenUsed/>
  1677. <w:rPr>
  1678. <w:vertAlign w:val=""superscript""/>
  1679. </w:rPr>
  1680. </w:style>";
  1681. XElement fnrsElement = XElement.Parse(footnoteReferenceStyleMarkup);
  1682. sXDoc.Root.Add(fnrsElement);
  1683. }
  1684. }
  1685. if (hasEndnotes)
  1686. {
  1687. XElement endnoteTextStyle = sXDoc
  1688. .Root
  1689. .Elements(W.style)
  1690. .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "EndnoteText");
  1691. if (endnoteTextStyle == null)
  1692. {
  1693. var endnoteTextStyleMarkup =
  1694. @"<w:style w:type=""paragraph""
  1695. w:styleId=""EndnoteText""
  1696. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1697. <w:name w:val=""endnote text""/>
  1698. <w:basedOn w:val=""Normal""/>
  1699. <w:link w:val=""EndnoteTextChar""/>
  1700. <w:uiPriority w:val=""99""/>
  1701. <w:semiHidden/>
  1702. <w:unhideWhenUsed/>
  1703. <w:pPr>
  1704. <w:spacing w:after=""0""
  1705. w:line=""240""
  1706. w:lineRule=""auto""/>
  1707. </w:pPr>
  1708. <w:rPr>
  1709. <w:sz w:val=""20""/>
  1710. <w:szCs w:val=""20""/>
  1711. </w:rPr>
  1712. </w:style>";
  1713. XElement etsElement = XElement.Parse(endnoteTextStyleMarkup);
  1714. sXDoc.Root.Add(etsElement);
  1715. }
  1716. XElement endnoteTextCharStyle = sXDoc
  1717. .Root
  1718. .Elements(W.style)
  1719. .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "EndnoteTextChar");
  1720. if (endnoteTextCharStyle == null)
  1721. {
  1722. var endnoteTextCharStyleMarkup =
  1723. @"<w:style w:type=""character""
  1724. w:customStyle=""1""
  1725. w:styleId=""EndnoteTextChar""
  1726. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1727. <w:name w:val=""Endnote Text Char""/>
  1728. <w:basedOn w:val=""DefaultParagraphFont""/>
  1729. <w:link w:val=""EndnoteText""/>
  1730. <w:uiPriority w:val=""99""/>
  1731. <w:semiHidden/>
  1732. <w:rPr>
  1733. <w:sz w:val=""20""/>
  1734. <w:szCs w:val=""20""/>
  1735. </w:rPr>
  1736. </w:style>";
  1737. XElement entcsElement = XElement.Parse(endnoteTextCharStyleMarkup);
  1738. sXDoc.Root.Add(entcsElement);
  1739. }
  1740. XElement endnoteReferenceStyle = sXDoc
  1741. .Root
  1742. .Elements(W.style)
  1743. .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "EndnoteReference");
  1744. if (endnoteReferenceStyle == null)
  1745. {
  1746. var endnoteReferenceStyleMarkup =
  1747. @"<w:style w:type=""character""
  1748. w:styleId=""EndnoteReference""
  1749. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1750. <w:name w:val=""endnote reference""/>
  1751. <w:basedOn w:val=""DefaultParagraphFont""/>
  1752. <w:uiPriority w:val=""99""/>
  1753. <w:semiHidden/>
  1754. <w:unhideWhenUsed/>
  1755. <w:rPr>
  1756. <w:vertAlign w:val=""superscript""/>
  1757. </w:rPr>
  1758. </w:style>";
  1759. XElement enrsElement = XElement.Parse(endnoteReferenceStyleMarkup);
  1760. sXDoc.Root.Add(enrsElement);
  1761. }
  1762. }
  1763. if (hasFootnotes || hasEndnotes)
  1764. {
  1765. styleDefinitionsPart.PutXDocument();
  1766. }
  1767. }
  1768. private static void CopyMissingStylesFromOneDocToAnother(WordprocessingDocument wDocFrom, WordprocessingDocument wDocTo)
  1769. {
  1770. XDocument revisionsStylesXDoc = wDocTo.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
  1771. XDocument afterStylesXDoc = wDocFrom.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
  1772. foreach (XElement style in afterStylesXDoc.Root.Elements(W.style))
  1773. {
  1774. var type = (string) style.Attribute(W.type);
  1775. var styleId = (string) style.Attribute(W.styleId);
  1776. XElement styleInRevDoc = revisionsStylesXDoc
  1777. .Root
  1778. .Elements(W.style)
  1779. .FirstOrDefault(st => (string) st.Attribute(W.type) == type &&
  1780. (string) st.Attribute(W.styleId) == styleId);
  1781. if (styleInRevDoc != null)
  1782. continue;
  1783. var cloned = new XElement(style);
  1784. if (cloned.Attribute(W._default) != null)
  1785. cloned.Attribute(W._default).Remove();
  1786. revisionsStylesXDoc.Root.Add(cloned);
  1787. }
  1788. wDocTo.MainDocumentPart.StyleDefinitionsPart.PutXDocument();
  1789. }
  1790. private static void DeleteFootnotePropertiesInSettings(WordprocessingDocument wDocWithRevisions)
  1791. {
  1792. DocumentSettingsPart settingsPart = wDocWithRevisions.MainDocumentPart.DocumentSettingsPart;
  1793. if (settingsPart != null)
  1794. {
  1795. XDocument sxDoc = settingsPart.GetXDocument();
  1796. sxDoc.Root?.Elements().Where(e => e.Name == W.footnotePr || e.Name == W.endnotePr).Remove();
  1797. settingsPart.PutXDocument();
  1798. }
  1799. }
  1800. private static object CloneForStructureHash(XNode node)
  1801. {
  1802. if (node is XElement element)
  1803. {
  1804. return new XElement(element.Name,
  1805. element.Attributes(),
  1806. element.Elements().Select(CloneForStructureHash));
  1807. }
  1808. return null;
  1809. }
  1810. private static List<CorrelatedSequence> FindCommonAtBeginningAndEnd(
  1811. CorrelatedSequence unknown,
  1812. WmlComparerSettings settings)
  1813. {
  1814. int lengthToCompare = Math.Min(unknown.ComparisonUnitArray1.Length, unknown.ComparisonUnitArray2.Length);
  1815. int countCommonAtBeginning = unknown
  1816. .ComparisonUnitArray1
  1817. .Take(lengthToCompare)
  1818. .Zip(unknown.ComparisonUnitArray2,
  1819. (pu1, pu2) => new
  1820. {
  1821. Pu1 = pu1,
  1822. Pu2 = pu2
  1823. })
  1824. .TakeWhile(pair => pair.Pu1.SHA1Hash == pair.Pu2.SHA1Hash)
  1825. .Count();
  1826. if (countCommonAtBeginning != 0 && countCommonAtBeginning / (double) lengthToCompare < settings.DetailThreshold)
  1827. countCommonAtBeginning = 0;
  1828. if (countCommonAtBeginning != 0)
  1829. {
  1830. var newSequence = new List<CorrelatedSequence>();
  1831. var csEqual = new CorrelatedSequence
  1832. {
  1833. CorrelationStatus = CorrelationStatus.Equal,
  1834. ComparisonUnitArray1 = unknown
  1835. .ComparisonUnitArray1
  1836. .Take(countCommonAtBeginning)
  1837. .ToArray(),
  1838. ComparisonUnitArray2 = unknown
  1839. .ComparisonUnitArray2
  1840. .Take(countCommonAtBeginning)
  1841. .ToArray()
  1842. };
  1843. newSequence.Add(csEqual);
  1844. int remainingLeft = unknown.ComparisonUnitArray1.Length - countCommonAtBeginning;
  1845. int remainingRight = unknown.ComparisonUnitArray2.Length - countCommonAtBeginning;
  1846. if (remainingLeft != 0 && remainingRight == 0)
  1847. {
  1848. var csDeleted = new CorrelatedSequence
  1849. {
  1850. CorrelationStatus = CorrelationStatus.Deleted,
  1851. ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(countCommonAtBeginning).ToArray(),
  1852. ComparisonUnitArray2 = null
  1853. };
  1854. newSequence.Add(csDeleted);
  1855. }
  1856. else if (remainingLeft == 0 && remainingRight != 0)
  1857. {
  1858. var csInserted = new CorrelatedSequence
  1859. {
  1860. CorrelationStatus = CorrelationStatus.Inserted,
  1861. ComparisonUnitArray1 = null,
  1862. ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(countCommonAtBeginning).ToArray()
  1863. };
  1864. newSequence.Add(csInserted);
  1865. }
  1866. else if (remainingLeft != 0 && remainingRight != 0)
  1867. {
  1868. if (unknown.ComparisonUnitArray1[0] is ComparisonUnitWord first1 &&
  1869. unknown.ComparisonUnitArray2[0] is ComparisonUnitWord first2)
  1870. {
  1871. // if operating at the word level and
  1872. // if the last word on the left != pPr && last word on right != pPr
  1873. // then create an unknown for the rest of the paragraph, and create an unknown for the rest of the unknown
  1874. // if the last word on the left != pPr and last word on right == pPr
  1875. // then create deleted for the left, and create an unknown for the rest of the unknown
  1876. // if the last word on the left == pPr and last word on right != pPr
  1877. // then create inserted for the right, and create an unknown for the rest of the unknown
  1878. // if the last word on the left == pPr and last word on right == pPr
  1879. // then create an unknown for the rest of the unknown
  1880. ComparisonUnit[] remainingInLeft = unknown
  1881. .ComparisonUnitArray1
  1882. .Skip(countCommonAtBeginning)
  1883. .ToArray();
  1884. ComparisonUnit[] remainingInRight = unknown
  1885. .ComparisonUnitArray2
  1886. .Skip(countCommonAtBeginning)
  1887. .ToArray();
  1888. ComparisonUnitAtom lastContentAtomLeft = unknown.ComparisonUnitArray1[countCommonAtBeginning - 1]
  1889. .DescendantContentAtoms()
  1890. .FirstOrDefault();
  1891. ComparisonUnitAtom lastContentAtomRight = unknown.ComparisonUnitArray2[countCommonAtBeginning - 1]
  1892. .DescendantContentAtoms()
  1893. .FirstOrDefault();
  1894. if (lastContentAtomLeft?.ContentElement.Name != W.pPr && lastContentAtomRight?.ContentElement.Name != W.pPr)
  1895. {
  1896. List<ComparisonUnit[]> split1 = SplitAtParagraphMark(remainingInLeft);
  1897. List<ComparisonUnit[]> split2 = SplitAtParagraphMark(remainingInRight);
  1898. if (split1.Count() == 1 && split2.Count() == 1)
  1899. {
  1900. var csUnknown2 = new CorrelatedSequence
  1901. {
  1902. CorrelationStatus = CorrelationStatus.Unknown,
  1903. ComparisonUnitArray1 = split1.First(),
  1904. ComparisonUnitArray2 = split2.First()
  1905. };
  1906. newSequence.Add(csUnknown2);
  1907. return newSequence;
  1908. }
  1909. if (split1.Count == 2 && split2.Count == 2)
  1910. {
  1911. var csUnknown2 = new CorrelatedSequence
  1912. {
  1913. CorrelationStatus = CorrelationStatus.Unknown,
  1914. ComparisonUnitArray1 = split1.First(),
  1915. ComparisonUnitArray2 = split2.First()
  1916. };
  1917. newSequence.Add(csUnknown2);
  1918. var csUnknown3 = new CorrelatedSequence
  1919. {
  1920. CorrelationStatus = CorrelationStatus.Unknown,
  1921. ComparisonUnitArray1 = split1.Skip(1).First(),
  1922. ComparisonUnitArray2 = split2.Skip(1).First()
  1923. };
  1924. newSequence.Add(csUnknown3);
  1925. return newSequence;
  1926. }
  1927. }
  1928. }
  1929. var csUnknown = new CorrelatedSequence
  1930. {
  1931. CorrelationStatus = CorrelationStatus.Unknown,
  1932. ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(countCommonAtBeginning).ToArray(),
  1933. ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(countCommonAtBeginning).ToArray()
  1934. };
  1935. newSequence.Add(csUnknown);
  1936. }
  1937. else if (remainingLeft == 0 && remainingRight == 0)
  1938. {
  1939. // nothing to do
  1940. }
  1941. return newSequence;
  1942. }
  1943. // if we get to here, then countCommonAtBeginning == 0
  1944. int countCommonAtEnd = unknown
  1945. .ComparisonUnitArray1
  1946. .Reverse()
  1947. .Take(lengthToCompare)
  1948. .Zip(unknown
  1949. .ComparisonUnitArray2
  1950. .Reverse()
  1951. .Take(lengthToCompare),
  1952. (pu1, pu2) => new
  1953. {
  1954. Pu1 = pu1,
  1955. Pu2 = pu2
  1956. })
  1957. .TakeWhile(pair => pair.Pu1.SHA1Hash == pair.Pu2.SHA1Hash)
  1958. .Count();
  1959. // never start a common section with a paragraph mark. However, it is OK to set two paragraph marks as equal.
  1960. while (true)
  1961. {
  1962. if (countCommonAtEnd <= 1)
  1963. break;
  1964. ComparisonUnit firstCommon = unknown
  1965. .ComparisonUnitArray1
  1966. .Reverse()
  1967. .Take(countCommonAtEnd)
  1968. .LastOrDefault();
  1969. if (!(firstCommon is ComparisonUnitWord firstCommonWord))
  1970. break;
  1971. // if the word contains more than one atom, then not a paragraph mark
  1972. if (firstCommonWord.Contents.Count() != 1)
  1973. break;
  1974. if (!(firstCommonWord.Contents.First() is ComparisonUnitAtom firstCommonAtom))
  1975. break;
  1976. if (firstCommonAtom.ContentElement.Name != W.pPr)
  1977. break;
  1978. countCommonAtEnd--;
  1979. }
  1980. var isOnlyParagraphMark = false;
  1981. if (countCommonAtEnd == 1)
  1982. {
  1983. ComparisonUnit firstCommon = unknown
  1984. .ComparisonUnitArray1
  1985. .Reverse()
  1986. .Take(countCommonAtEnd)
  1987. .LastOrDefault();
  1988. if (firstCommon is ComparisonUnitWord firstCommonWord)
  1989. {
  1990. // if the word contains more than one atom, then not a paragraph mark
  1991. if (firstCommonWord.Contents.Count == 1)
  1992. {
  1993. if (firstCommonWord.Contents.First() is ComparisonUnitAtom firstCommonAtom)
  1994. {
  1995. if (firstCommonAtom.ContentElement.Name == W.pPr)
  1996. isOnlyParagraphMark = true;
  1997. }
  1998. }
  1999. }
  2000. }
  2001. if (countCommonAtEnd == 2)
  2002. {
  2003. ComparisonUnit firstCommon = unknown
  2004. .ComparisonUnitArray1
  2005. .Reverse()
  2006. .Take(countCommonAtEnd)
  2007. .LastOrDefault();
  2008. ComparisonUnit secondCommon = unknown
  2009. .ComparisonUnitArray1
  2010. .Reverse()
  2011. .Take(countCommonAtEnd)
  2012. .FirstOrDefault();
  2013. if (firstCommon is ComparisonUnitWord firstCommonWord && secondCommon is ComparisonUnitWord secondCommonWord)
  2014. {
  2015. // if the word contains more than one atom, then not a paragraph mark
  2016. if (firstCommonWord.Contents.Count == 1 && secondCommonWord.Contents.Count == 1)
  2017. {
  2018. if (firstCommonWord.Contents.First() is ComparisonUnitAtom firstCommonAtom &&
  2019. secondCommonWord.Contents.First() is ComparisonUnitAtom secondCommonAtom)
  2020. {
  2021. if (secondCommonAtom.ContentElement.Name == W.pPr)
  2022. isOnlyParagraphMark = true;
  2023. }
  2024. }
  2025. }
  2026. }
  2027. if (!isOnlyParagraphMark && countCommonAtEnd != 0 &&
  2028. countCommonAtEnd / (double) lengthToCompare < settings.DetailThreshold)
  2029. {
  2030. countCommonAtEnd = 0;
  2031. }
  2032. // If the following test is not there, the test below sets the end paragraph mark of the entire document equal to the end paragraph
  2033. // mark of the first paragraph in the other document, causing lines to be out of order.
  2034. // [InlineData("WC010-Para-Before-Table-Unmodified.docx", "WC010-Para-Before-Table-Mod.docx", 3)]
  2035. if (isOnlyParagraphMark)
  2036. {
  2037. countCommonAtEnd = 0;
  2038. }
  2039. if (countCommonAtEnd == 0)
  2040. {
  2041. return null;
  2042. }
  2043. // if countCommonAtEnd != 0, and if it contains a paragraph mark, then if there are comparison units in the same paragraph before the common at end (in either version)
  2044. // then we want to put all of those comparison units into a single unknown, where they must be resolved against each other. We don't want those comparison units to go into the middle unknown comparison unit.
  2045. if (countCommonAtEnd != 0)
  2046. {
  2047. var remainingInLeftParagraph = 0;
  2048. var remainingInRightParagraph = 0;
  2049. List<ComparisonUnit> commonEndSeq = unknown
  2050. .ComparisonUnitArray1
  2051. .Reverse()
  2052. .Take(countCommonAtEnd)
  2053. .Reverse()
  2054. .ToList();
  2055. ComparisonUnit firstOfCommonEndSeq = commonEndSeq.First();
  2056. if (firstOfCommonEndSeq is ComparisonUnitWord)
  2057. {
  2058. // are there any paragraph marks in the common seq at end?
  2059. //if (commonEndSeq.Any(cu => cu.Contents.OfType<ComparisonUnitAtom>().First().ContentElement.Name == W.pPr))
  2060. if (commonEndSeq.Any(cu =>
  2061. {
  2062. ComparisonUnitAtom firstComparisonUnitAtom = cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  2063. if (firstComparisonUnitAtom == null)
  2064. return false;
  2065. return firstComparisonUnitAtom.ContentElement.Name == W.pPr;
  2066. }))
  2067. {
  2068. remainingInLeftParagraph = unknown
  2069. .ComparisonUnitArray1
  2070. .Reverse()
  2071. .Skip(countCommonAtEnd)
  2072. .TakeWhile(cu =>
  2073. {
  2074. if (!(cu is ComparisonUnitWord))
  2075. return false;
  2076. ComparisonUnitAtom firstComparisonUnitAtom =
  2077. cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  2078. if (firstComparisonUnitAtom == null)
  2079. return true;
  2080. return firstComparisonUnitAtom.ContentElement.Name != W.pPr;
  2081. })
  2082. .Count();
  2083. remainingInRightParagraph = unknown
  2084. .ComparisonUnitArray2
  2085. .Reverse()
  2086. .Skip(countCommonAtEnd)
  2087. .TakeWhile(cu =>
  2088. {
  2089. if (!(cu is ComparisonUnitWord))
  2090. return false;
  2091. ComparisonUnitAtom firstComparisonUnitAtom =
  2092. cu.Contents.OfType<ComparisonUnitAtom>().FirstOrDefault();
  2093. if (firstComparisonUnitAtom == null)
  2094. return true;
  2095. return firstComparisonUnitAtom.ContentElement.Name != W.pPr;
  2096. })
  2097. .Count();
  2098. }
  2099. }
  2100. var newSequence = new List<CorrelatedSequence>();
  2101. int beforeCommonParagraphLeft = unknown.ComparisonUnitArray1.Length - remainingInLeftParagraph - countCommonAtEnd;
  2102. int beforeCommonParagraphRight =
  2103. unknown.ComparisonUnitArray2.Length - remainingInRightParagraph - countCommonAtEnd;
  2104. if (beforeCommonParagraphLeft != 0 && beforeCommonParagraphRight == 0)
  2105. {
  2106. var csDeleted = new CorrelatedSequence();
  2107. csDeleted.CorrelationStatus = CorrelationStatus.Deleted;
  2108. csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Take(beforeCommonParagraphLeft).ToArray();
  2109. csDeleted.ComparisonUnitArray2 = null;
  2110. newSequence.Add(csDeleted);
  2111. }
  2112. else if (beforeCommonParagraphLeft == 0 && beforeCommonParagraphRight != 0)
  2113. {
  2114. var csInserted = new CorrelatedSequence();
  2115. csInserted.CorrelationStatus = CorrelationStatus.Inserted;
  2116. csInserted.ComparisonUnitArray1 = null;
  2117. csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Take(beforeCommonParagraphRight).ToArray();
  2118. newSequence.Add(csInserted);
  2119. }
  2120. else if (beforeCommonParagraphLeft != 0 && beforeCommonParagraphRight != 0)
  2121. {
  2122. var csUnknown = new CorrelatedSequence();
  2123. csUnknown.CorrelationStatus = CorrelationStatus.Unknown;
  2124. csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Take(beforeCommonParagraphLeft).ToArray();
  2125. csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Take(beforeCommonParagraphRight).ToArray();
  2126. newSequence.Add(csUnknown);
  2127. }
  2128. else if (beforeCommonParagraphLeft == 0 && beforeCommonParagraphRight == 0)
  2129. {
  2130. // nothing to do
  2131. }
  2132. if (remainingInLeftParagraph != 0 && remainingInRightParagraph == 0)
  2133. {
  2134. var csDeleted = new CorrelatedSequence();
  2135. csDeleted.CorrelationStatus = CorrelationStatus.Deleted;
  2136. csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(beforeCommonParagraphLeft)
  2137. .Take(remainingInLeftParagraph).ToArray();
  2138. csDeleted.ComparisonUnitArray2 = null;
  2139. newSequence.Add(csDeleted);
  2140. }
  2141. else if (remainingInLeftParagraph == 0 && remainingInRightParagraph != 0)
  2142. {
  2143. var csInserted = new CorrelatedSequence();
  2144. csInserted.CorrelationStatus = CorrelationStatus.Inserted;
  2145. csInserted.ComparisonUnitArray1 = null;
  2146. csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(beforeCommonParagraphRight)
  2147. .Take(remainingInRightParagraph).ToArray();
  2148. newSequence.Add(csInserted);
  2149. }
  2150. else if (remainingInLeftParagraph != 0 && remainingInRightParagraph != 0)
  2151. {
  2152. var csUnknown = new CorrelatedSequence();
  2153. csUnknown.CorrelationStatus = CorrelationStatus.Unknown;
  2154. csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(beforeCommonParagraphLeft)
  2155. .Take(remainingInLeftParagraph).ToArray();
  2156. csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(beforeCommonParagraphRight)
  2157. .Take(remainingInRightParagraph).ToArray();
  2158. newSequence.Add(csUnknown);
  2159. }
  2160. else if (remainingInLeftParagraph == 0 && remainingInRightParagraph == 0)
  2161. {
  2162. // nothing to do
  2163. }
  2164. var csEqual = new CorrelatedSequence();
  2165. csEqual.CorrelationStatus = CorrelationStatus.Equal;
  2166. csEqual.ComparisonUnitArray1 = unknown.ComparisonUnitArray1
  2167. .Skip(unknown.ComparisonUnitArray1.Length - countCommonAtEnd).ToArray();
  2168. csEqual.ComparisonUnitArray2 = unknown.ComparisonUnitArray2
  2169. .Skip(unknown.ComparisonUnitArray2.Length - countCommonAtEnd).ToArray();
  2170. newSequence.Add(csEqual);
  2171. return newSequence;
  2172. }
  2173. return null;
  2174. #if false
  2175. var middleLeft = unknown
  2176. .ComparisonUnitArray1
  2177. .Skip(countCommonAtBeginning)
  2178. .SkipLast(remainingInLeftParagraph)
  2179. .SkipLast(countCommonAtEnd)
  2180. .ToArray();
  2181. var middleRight = unknown
  2182. .ComparisonUnitArray2
  2183. .Skip(countCommonAtBeginning)
  2184. .SkipLast(remainingInRightParagraph)
  2185. .SkipLast(countCommonAtEnd)
  2186. .ToArray();
  2187. if (middleLeft.Length > 0 && middleRight.Length == 0)
  2188. {
  2189. CorrelatedSequence cs = new CorrelatedSequence();
  2190. cs.CorrelationStatus = CorrelationStatus.Deleted;
  2191. cs.ComparisonUnitArray1 = middleLeft;
  2192. cs.ComparisonUnitArray2 = null;
  2193. newSequence.Add(cs);
  2194. }
  2195. else if (middleLeft.Length == 0 && middleRight.Length > 0)
  2196. {
  2197. CorrelatedSequence cs = new CorrelatedSequence();
  2198. cs.CorrelationStatus = CorrelationStatus.Inserted;
  2199. cs.ComparisonUnitArray1 = null;
  2200. cs.ComparisonUnitArray2 = middleRight;
  2201. newSequence.Add(cs);
  2202. }
  2203. else if (middleLeft.Length > 0 && middleRight.Length > 0)
  2204. {
  2205. CorrelatedSequence cs = new CorrelatedSequence();
  2206. cs.CorrelationStatus = CorrelationStatus.Unknown;
  2207. cs.ComparisonUnitArray1 = middleLeft;
  2208. cs.ComparisonUnitArray2 = middleRight;
  2209. newSequence.Add(cs);
  2210. }
  2211. var remainingInParaLeft = unknown
  2212. .ComparisonUnitArray1
  2213. .Skip(countCommonAtBeginning)
  2214. .Skip(middleLeft.Length)
  2215. .Take(remainingInLeftParagraph)
  2216. .ToArray();
  2217. var remainingInParaRight = unknown
  2218. .ComparisonUnitArray2
  2219. .Skip(countCommonAtBeginning)
  2220. .Skip(middleRight.Length)
  2221. .Take(remainingInRightParagraph)
  2222. .ToArray();
  2223. if (remainingInParaLeft.Length > 0 && remainingInParaRight.Length == 0)
  2224. {
  2225. CorrelatedSequence cs = new CorrelatedSequence();
  2226. cs.CorrelationStatus = CorrelationStatus.Deleted;
  2227. cs.ComparisonUnitArray1 = remainingInParaLeft;
  2228. cs.ComparisonUnitArray2 = null;
  2229. newSequence.Add(cs);
  2230. }
  2231. else if (remainingInParaLeft.Length == 0 && remainingInParaRight.Length > 0)
  2232. {
  2233. CorrelatedSequence cs = new CorrelatedSequence();
  2234. cs.CorrelationStatus = CorrelationStatus.Inserted;
  2235. cs.ComparisonUnitArray1 = null;
  2236. cs.ComparisonUnitArray2 = remainingInParaRight;
  2237. newSequence.Add(cs);
  2238. }
  2239. else if (remainingInParaLeft.Length > 0 && remainingInParaRight.Length > 0)
  2240. {
  2241. CorrelatedSequence cs = new CorrelatedSequence();
  2242. cs.CorrelationStatus = CorrelationStatus.Unknown;
  2243. cs.ComparisonUnitArray1 = remainingInParaLeft;
  2244. cs.ComparisonUnitArray2 = remainingInParaRight;
  2245. newSequence.Add(cs);
  2246. }
  2247. if (countCommonAtEnd != 0)
  2248. {
  2249. CorrelatedSequence cs = new CorrelatedSequence();
  2250. cs.CorrelationStatus = CorrelationStatus.Equal;
  2251. cs.ComparisonUnitArray1 = unknown
  2252. .ComparisonUnitArray1
  2253. .Skip(countCommonAtBeginning + middleLeft.Length + remainingInParaLeft.Length)
  2254. .ToArray();
  2255. cs.ComparisonUnitArray2 = unknown
  2256. .ComparisonUnitArray2
  2257. .Skip(countCommonAtBeginning + middleRight.Length + remainingInParaRight.Length)
  2258. .ToArray();
  2259. if (cs.ComparisonUnitArray1.Length != cs.ComparisonUnitArray2.Length)
  2260. throw new OpenXmlPowerToolsException("Internal error");
  2261. newSequence.Add(cs);
  2262. }
  2263. return newSequence;
  2264. #endif
  2265. }
  2266. private static List<ComparisonUnit[]> SplitAtParagraphMark(ComparisonUnit[] cua)
  2267. {
  2268. int i;
  2269. for (i = 0; i < cua.Length; i++)
  2270. {
  2271. ComparisonUnitAtom atom = cua[i].DescendantContentAtoms().FirstOrDefault();
  2272. if (atom != null && atom.ContentElement.Name == W.pPr)
  2273. break;
  2274. }
  2275. if (i == cua.Length)
  2276. {
  2277. return new List<ComparisonUnit[]>
  2278. {
  2279. cua
  2280. };
  2281. }
  2282. return new List<ComparisonUnit[]>
  2283. {
  2284. cua.Take(i).ToArray(),
  2285. cua.Skip(i).ToArray()
  2286. };
  2287. }
  2288. private static object CoalesceRecurse(
  2289. OpenXmlPart part,
  2290. IEnumerable<ComparisonUnitAtom> list,
  2291. int level,
  2292. WmlComparerSettings settings)
  2293. {
  2294. IEnumerable<IGrouping<string, ComparisonUnitAtom>> grouped = list.GroupBy(ca =>
  2295. {
  2296. if (level >= ca.AncestorElements.Length)
  2297. return "";
  2298. return ca.AncestorUnids[level];
  2299. })
  2300. .Where(g => g.Key != "");
  2301. // if there are no deeper children, then we're done.
  2302. if (!grouped.Any())
  2303. return null;
  2304. if (False)
  2305. {
  2306. var sb = new StringBuilder();
  2307. foreach (IGrouping<string, ComparisonUnitAtom> group in grouped)
  2308. {
  2309. sb.AppendFormat("Group Key: {0}", @group.Key);
  2310. sb.Append(Environment.NewLine);
  2311. foreach (ComparisonUnitAtom groupChildItem in @group)
  2312. {
  2313. sb.Append(" ");
  2314. sb.Append(groupChildItem.ToString(0));
  2315. sb.Append(Environment.NewLine);
  2316. }
  2317. sb.Append(Environment.NewLine);
  2318. }
  2319. string sbs = sb.ToString();
  2320. TestUtil.NotePad(sbs);
  2321. }
  2322. List<object> elementList = grouped
  2323. .Select(g =>
  2324. {
  2325. XElement ancestorBeingConstructed =
  2326. g.First().AncestorElements[level]; // these will all be the same, by definition
  2327. // need to group by corr stat
  2328. List<IGrouping<string, ComparisonUnitAtom>> groupedChildren = g
  2329. .GroupAdjacent(gc =>
  2330. {
  2331. var key = "";
  2332. if (level < gc.AncestorElements.Length - 1)
  2333. {
  2334. key = gc.AncestorUnids[level + 1];
  2335. }
  2336. if (gc.AncestorElements.Skip(level).Any(ae => ae.Name == W.txbxContent))
  2337. key += "|" + CorrelationStatus.Equal.ToString();
  2338. else
  2339. key += "|" + gc.CorrelationStatus.ToString();
  2340. return key;
  2341. })
  2342. .ToList();
  2343. if (ancestorBeingConstructed.Name == W.p)
  2344. {
  2345. List<object> newChildElements = groupedChildren
  2346. .Select(gc =>
  2347. {
  2348. string[] spl = gc.Key.Split('|');
  2349. if (spl[0] == "")
  2350. {
  2351. return (object) gc.Select(gcc =>
  2352. {
  2353. var dup = new XElement(gcc.ContentElement);
  2354. if (spl[1] == "Deleted")
  2355. dup.Add(new XAttribute(PtOpenXml.Status, "Deleted"));
  2356. else if (spl[1] == "Inserted")
  2357. dup.Add(new XAttribute(PtOpenXml.Status, "Inserted"));
  2358. return dup;
  2359. });
  2360. }
  2361. return CoalesceRecurse(part, gc, level + 1, settings);
  2362. })
  2363. .ToList();
  2364. var newPara = new XElement(W.p,
  2365. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  2366. new XAttribute(PtOpenXml.Unid, g.Key),
  2367. newChildElements);
  2368. return newPara;
  2369. }
  2370. if (ancestorBeingConstructed.Name == W.r)
  2371. {
  2372. List<object> newChildElements = groupedChildren
  2373. .Select(gc =>
  2374. {
  2375. string[] spl = gc.Key.Split('|');
  2376. if (spl[0] == "")
  2377. {
  2378. return (object) gc.Select(gcc =>
  2379. {
  2380. var dup = new XElement(gcc.ContentElement);
  2381. if (spl[1] == "Deleted")
  2382. dup.Add(new XAttribute(PtOpenXml.Status, "Deleted"));
  2383. else if (spl[1] == "Inserted")
  2384. dup.Add(new XAttribute(PtOpenXml.Status, "Inserted"));
  2385. return dup;
  2386. });
  2387. }
  2388. return CoalesceRecurse(part, gc, level + 1, settings);
  2389. })
  2390. .ToList();
  2391. XElement rPr = ancestorBeingConstructed.Element(W.rPr);
  2392. var newRun = new XElement(W.r,
  2393. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  2394. rPr,
  2395. newChildElements);
  2396. return newRun;
  2397. }
  2398. if (ancestorBeingConstructed.Name == W.t)
  2399. {
  2400. List<object> newChildElements = groupedChildren
  2401. .Select(gc =>
  2402. {
  2403. string textOfTextElement = gc.Select(gce => gce.ContentElement.Value).StringConcatenate();
  2404. bool del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  2405. bool ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  2406. if (del)
  2407. return (object) new XElement(W.delText,
  2408. new XAttribute(PtOpenXml.Status, "Deleted"),
  2409. GetXmlSpaceAttribute(textOfTextElement),
  2410. textOfTextElement);
  2411. if (ins)
  2412. return (object) new XElement(W.t,
  2413. new XAttribute(PtOpenXml.Status, "Inserted"),
  2414. GetXmlSpaceAttribute(textOfTextElement),
  2415. textOfTextElement);
  2416. return (object) new XElement(W.t,
  2417. GetXmlSpaceAttribute(textOfTextElement),
  2418. textOfTextElement);
  2419. })
  2420. .ToList();
  2421. return newChildElements;
  2422. }
  2423. if (ancestorBeingConstructed.Name == W.drawing)
  2424. {
  2425. List<object> newChildElements = groupedChildren
  2426. .Select(gc =>
  2427. {
  2428. bool del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  2429. if (del)
  2430. {
  2431. return (object) gc.Select(gcc =>
  2432. {
  2433. var newDrawing = new XElement(gcc.ContentElement);
  2434. newDrawing.Add(new XAttribute(PtOpenXml.Status, "Deleted"));
  2435. OpenXmlPart openXmlPartOfDeletedContent = gc.First().Part;
  2436. OpenXmlPart openXmlPartInNewDocument = part;
  2437. return gc.Select(gce =>
  2438. {
  2439. Package packageOfDeletedContent = openXmlPartOfDeletedContent.OpenXmlPackage.Package;
  2440. Package packageOfNewContent = openXmlPartInNewDocument.OpenXmlPackage.Package;
  2441. PackagePart partInDeletedDocument = packageOfDeletedContent.GetPart(part.Uri);
  2442. PackagePart partInNewDocument = packageOfNewContent.GetPart(part.Uri);
  2443. return MoveRelatedPartsToDestination(
  2444. partInDeletedDocument,
  2445. partInNewDocument,
  2446. newDrawing);
  2447. });
  2448. });
  2449. }
  2450. bool ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  2451. if (ins)
  2452. {
  2453. return gc.Select(gcc =>
  2454. {
  2455. var newDrawing = new XElement(gcc.ContentElement);
  2456. newDrawing.Add(new XAttribute(PtOpenXml.Status, "Inserted"));
  2457. OpenXmlPart openXmlPartOfInsertedContent = gc.First().Part;
  2458. OpenXmlPart openXmlPartInNewDocument = part;
  2459. return gc.Select(gce =>
  2460. {
  2461. Package packageOfSourceContent = openXmlPartOfInsertedContent.OpenXmlPackage.Package;
  2462. Package packageOfNewContent = openXmlPartInNewDocument.OpenXmlPackage.Package;
  2463. PackagePart partInDeletedDocument = packageOfSourceContent.GetPart(part.Uri);
  2464. PackagePart partInNewDocument = packageOfNewContent.GetPart(part.Uri);
  2465. return MoveRelatedPartsToDestination(
  2466. partInDeletedDocument,
  2467. partInNewDocument,
  2468. newDrawing);
  2469. });
  2470. });
  2471. }
  2472. return gc.Select(gcc => gcc.ContentElement);
  2473. })
  2474. .ToList();
  2475. return newChildElements;
  2476. }
  2477. if (ancestorBeingConstructed.Name == M.oMath || ancestorBeingConstructed.Name == M.oMathPara)
  2478. {
  2479. List<IEnumerable<XElement>> newChildElements = groupedChildren
  2480. .Select(gc =>
  2481. {
  2482. bool del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  2483. if (del)
  2484. {
  2485. return gc.Select(gcc =>
  2486. new XElement(W.del,
  2487. new XAttribute(W.author, settings.AuthorForRevisions),
  2488. new XAttribute(W.id, _maxId++),
  2489. new XAttribute(W.date, settings.DateTimeForRevisions),
  2490. gcc.ContentElement));
  2491. }
  2492. bool ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  2493. if (ins)
  2494. {
  2495. return gc.Select(gcc =>
  2496. new XElement(W.ins,
  2497. new XAttribute(W.author, settings.AuthorForRevisions),
  2498. new XAttribute(W.id, _maxId++),
  2499. new XAttribute(W.date, settings.DateTimeForRevisions),
  2500. gcc.ContentElement));
  2501. }
  2502. return gc.Select(gcc => gcc.ContentElement);
  2503. })
  2504. .ToList();
  2505. return newChildElements;
  2506. }
  2507. if (AllowableRunChildren.Contains(ancestorBeingConstructed.Name))
  2508. {
  2509. List<IEnumerable<XElement>> newChildElements = groupedChildren
  2510. .Select(gc =>
  2511. {
  2512. bool del = gc.First().CorrelationStatus == CorrelationStatus.Deleted;
  2513. bool ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted;
  2514. if (del)
  2515. {
  2516. return gc.Select(gcc =>
  2517. {
  2518. var dup = new XElement(ancestorBeingConstructed.Name,
  2519. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  2520. new XAttribute(PtOpenXml.Status, "Deleted"));
  2521. return dup;
  2522. });
  2523. }
  2524. if (ins)
  2525. {
  2526. return gc.Select(gcc =>
  2527. {
  2528. var dup = new XElement(ancestorBeingConstructed.Name,
  2529. ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt),
  2530. new XAttribute(PtOpenXml.Status, "Inserted"));
  2531. return dup;
  2532. });
  2533. }
  2534. return gc.Select(gcc => gcc.ContentElement);
  2535. })
  2536. .ToList();
  2537. return newChildElements;
  2538. }
  2539. if (ancestorBeingConstructed.Name == W.tbl)
  2540. return ReconstructElement(part, g, ancestorBeingConstructed, W.tblPr, W.tblGrid, null, level, settings);
  2541. if (ancestorBeingConstructed.Name == W.tr)
  2542. return ReconstructElement(part, g, ancestorBeingConstructed, W.trPr, null, null, level, settings);
  2543. if (ancestorBeingConstructed.Name == W.tc)
  2544. return ReconstructElement(part, g, ancestorBeingConstructed, W.tcPr, null, null, level, settings);
  2545. if (ancestorBeingConstructed.Name == W.sdt)
  2546. return ReconstructElement(part, g, ancestorBeingConstructed, W.sdtPr, W.sdtEndPr, null, level, settings);
  2547. if (ancestorBeingConstructed.Name == W.pict)
  2548. return ReconstructElement(part, g, ancestorBeingConstructed, VML.shapetype, null, null, level, settings);
  2549. if (ancestorBeingConstructed.Name == VML.shape)
  2550. return ReconstructElement(part, g, ancestorBeingConstructed, W10.wrap, null, null, level, settings);
  2551. if (ancestorBeingConstructed.Name == W._object)
  2552. return ReconstructElement(part, g, ancestorBeingConstructed, VML.shapetype, VML.shape, O.OLEObject, level,
  2553. settings);
  2554. if (ancestorBeingConstructed.Name == W.ruby)
  2555. return ReconstructElement(part, g, ancestorBeingConstructed, W.rubyPr, null, null, level, settings);
  2556. return (object) ReconstructElement(part, g, ancestorBeingConstructed, null, null, null, level, settings);
  2557. })
  2558. .ToList();
  2559. return elementList;
  2560. }
  2561. private static XElement ReconstructElement(
  2562. OpenXmlPart part,
  2563. IGrouping<string, ComparisonUnitAtom> g,
  2564. XElement ancestorBeingConstructed,
  2565. XName props1XName,
  2566. XName props2XName,
  2567. XName props3XName,
  2568. int level,
  2569. WmlComparerSettings settings)
  2570. {
  2571. object newChildElements = CoalesceRecurse(part, g, level + 1, settings);
  2572. object props1 = null;
  2573. if (props1XName != null)
  2574. props1 = ancestorBeingConstructed.Elements(props1XName);
  2575. object props2 = null;
  2576. if (props2XName != null)
  2577. props2 = ancestorBeingConstructed.Elements(props2XName);
  2578. object props3 = null;
  2579. if (props3XName != null)
  2580. props3 = ancestorBeingConstructed.Elements(props3XName);
  2581. var reconstructedElement = new XElement(ancestorBeingConstructed.Name,
  2582. ancestorBeingConstructed.Attributes(),
  2583. props1, props2, props3, newChildElements);
  2584. return reconstructedElement;
  2585. }
  2586. private static void SetAfterUnids(CorrelatedSequence unknown)
  2587. {
  2588. if (unknown.ComparisonUnitArray1.Length == 1 && unknown.ComparisonUnitArray2.Length == 1)
  2589. {
  2590. if (unknown.ComparisonUnitArray1[0] is ComparisonUnitGroup cua1 &&
  2591. unknown.ComparisonUnitArray2[0] is ComparisonUnitGroup cua2 &&
  2592. cua1.ComparisonUnitGroupType == cua2.ComparisonUnitGroupType)
  2593. {
  2594. ComparisonUnitGroupType groupType = cua1.ComparisonUnitGroupType;
  2595. IEnumerable<ComparisonUnitAtom> da1 = cua1.DescendantContentAtoms();
  2596. IEnumerable<ComparisonUnitAtom> da2 = cua2.DescendantContentAtoms();
  2597. XName takeThruName = null;
  2598. switch (groupType)
  2599. {
  2600. case ComparisonUnitGroupType.Paragraph:
  2601. takeThruName = W.p;
  2602. break;
  2603. case ComparisonUnitGroupType.Table:
  2604. takeThruName = W.tbl;
  2605. break;
  2606. case ComparisonUnitGroupType.Row:
  2607. takeThruName = W.tr;
  2608. break;
  2609. case ComparisonUnitGroupType.Cell:
  2610. takeThruName = W.tc;
  2611. break;
  2612. case ComparisonUnitGroupType.Textbox:
  2613. takeThruName = W.txbxContent;
  2614. break;
  2615. }
  2616. if (takeThruName == null)
  2617. throw new OpenXmlPowerToolsException("Internal error");
  2618. var relevantAncestors = new List<XElement>();
  2619. foreach (XElement ae in da1.First().AncestorElements)
  2620. {
  2621. if (ae.Name != takeThruName)
  2622. {
  2623. relevantAncestors.Add(ae);
  2624. continue;
  2625. }
  2626. relevantAncestors.Add(ae);
  2627. break;
  2628. }
  2629. string[] unidList = relevantAncestors
  2630. .Select(a =>
  2631. {
  2632. var unid = (string) a.Attribute(PtOpenXml.Unid);
  2633. if (unid == null)
  2634. throw new OpenXmlPowerToolsException("Internal error");
  2635. return unid;
  2636. })
  2637. .ToArray();
  2638. foreach (ComparisonUnitAtom da in da2)
  2639. {
  2640. IEnumerable<XElement> ancestorsToSet = da.AncestorElements.Take(unidList.Length);
  2641. var zipped = ancestorsToSet.Zip(unidList, (a, u) =>
  2642. new
  2643. {
  2644. Ancestor = a,
  2645. Unid = u
  2646. });
  2647. foreach (var z in zipped)
  2648. {
  2649. XAttribute unid = z.Ancestor.Attribute(PtOpenXml.Unid);
  2650. if (z.Ancestor.Name == W.footnotes || z.Ancestor.Name == W.endnotes)
  2651. continue;
  2652. if (unid == null)
  2653. throw new OpenXmlPowerToolsException("Internal error");
  2654. unid.Value = z.Unid;
  2655. }
  2656. }
  2657. }
  2658. }
  2659. }
  2660. private static List<CorrelatedSequence> ProcessCorrelatedHashes(CorrelatedSequence unknown, WmlComparerSettings settings)
  2661. {
  2662. // never attempt this optimization if there are less than 3 groups
  2663. int maxd = Math.Min(unknown.ComparisonUnitArray1.Length, unknown.ComparisonUnitArray2.Length);
  2664. if (maxd < 3)
  2665. return null;
  2666. if (unknown.ComparisonUnitArray1.FirstOrDefault() is ComparisonUnitGroup firstInCu1 &&
  2667. unknown.ComparisonUnitArray2.FirstOrDefault() is ComparisonUnitGroup firstInCu2)
  2668. {
  2669. if ((firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph ||
  2670. firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Table ||
  2671. firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) &&
  2672. (firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph ||
  2673. firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Table ||
  2674. firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Row))
  2675. {
  2676. ComparisonUnitGroupType groupType = firstInCu1.ComparisonUnitGroupType;
  2677. // Next want to do the lcs algorithm on this.
  2678. // potentially, we will find all paragraphs are correlated, but they may not be for two reasons-
  2679. // - if there were changes that were not tracked
  2680. // - if the anomalies in the change tracking cause there to be a mismatch in the number of paragraphs
  2681. // therefore we are going to do the whole LCS algorithm thing and at the end of the process, we set
  2682. // up the correlated sequence list where correlated paragraphs are together in their own unknown
  2683. // correlated sequence.
  2684. ComparisonUnit[] cul1 = unknown.ComparisonUnitArray1;
  2685. ComparisonUnit[] cul2 = unknown.ComparisonUnitArray2;
  2686. var currentLongestCommonSequenceLength = 0;
  2687. var currentLongestCommonSequenceAtomCount = 0;
  2688. int currentI1 = -1;
  2689. int currentI2 = -1;
  2690. for (var i1 = 0; i1 < cul1.Length; i1++)
  2691. {
  2692. for (var i2 = 0; i2 < cul2.Length; i2++)
  2693. {
  2694. var thisSequenceLength = 0;
  2695. var thisSequenceAtomCount = 0;
  2696. int thisI1 = i1;
  2697. int thisI2 = i2;
  2698. while (true)
  2699. {
  2700. bool match = cul1[thisI1] is ComparisonUnitGroup group1 &&
  2701. cul2[thisI2] is ComparisonUnitGroup group2 &&
  2702. group1.ComparisonUnitGroupType == group2.ComparisonUnitGroupType &&
  2703. group1.CorrelatedSHA1Hash != null &&
  2704. group2.CorrelatedSHA1Hash != null &&
  2705. group1.CorrelatedSHA1Hash == group2.CorrelatedSHA1Hash;
  2706. if (match)
  2707. {
  2708. thisSequenceAtomCount += cul1[thisI1].DescendantContentAtomsCount;
  2709. thisI1++;
  2710. thisI2++;
  2711. thisSequenceLength++;
  2712. if (thisI1 == cul1.Length || thisI2 == cul2.Length)
  2713. {
  2714. if (thisSequenceAtomCount > currentLongestCommonSequenceAtomCount)
  2715. {
  2716. currentLongestCommonSequenceLength = thisSequenceLength;
  2717. currentLongestCommonSequenceAtomCount = thisSequenceAtomCount;
  2718. currentI1 = i1;
  2719. currentI2 = i2;
  2720. }
  2721. break;
  2722. }
  2723. }
  2724. else
  2725. {
  2726. if (thisSequenceAtomCount > currentLongestCommonSequenceAtomCount)
  2727. {
  2728. currentLongestCommonSequenceLength = thisSequenceLength;
  2729. currentLongestCommonSequenceAtomCount = thisSequenceAtomCount;
  2730. currentI1 = i1;
  2731. currentI2 = i2;
  2732. }
  2733. break;
  2734. }
  2735. }
  2736. }
  2737. }
  2738. // here we want to have some sort of threshold, and if the currentLongestCommonSequenceLength is not
  2739. // longer than the threshold, then don't do anything
  2740. var doCorrelation = false;
  2741. if (currentLongestCommonSequenceLength == 1)
  2742. {
  2743. int numberOfAtoms1 = unknown.ComparisonUnitArray1[currentI1].DescendantContentAtoms().Count();
  2744. int numberOfAtoms2 = unknown.ComparisonUnitArray2[currentI2].DescendantContentAtoms().Count();
  2745. if (numberOfAtoms1 > 16 && numberOfAtoms2 > 16)
  2746. {
  2747. doCorrelation = true;
  2748. }
  2749. }
  2750. else if (currentLongestCommonSequenceLength > 1 && currentLongestCommonSequenceLength <= 3)
  2751. {
  2752. int numberOfAtoms1 = unknown
  2753. .ComparisonUnitArray1
  2754. .Skip(currentI1)
  2755. .Take(currentLongestCommonSequenceLength)
  2756. .Select(z => z.DescendantContentAtoms().Count())
  2757. .Sum();
  2758. int numberOfAtoms2 = unknown
  2759. .ComparisonUnitArray2
  2760. .Skip(currentI2)
  2761. .Take(currentLongestCommonSequenceLength)
  2762. .Select(z => z.DescendantContentAtoms().Count())
  2763. .Sum();
  2764. if (numberOfAtoms1 > 32 && numberOfAtoms2 > 32)
  2765. {
  2766. doCorrelation = true;
  2767. }
  2768. }
  2769. else if (currentLongestCommonSequenceLength > 3)
  2770. {
  2771. doCorrelation = true;
  2772. }
  2773. if (doCorrelation)
  2774. {
  2775. var newListOfCorrelatedSequence = new List<CorrelatedSequence>();
  2776. if (currentI1 > 0 && currentI2 == 0)
  2777. {
  2778. var deletedCorrelatedSequence = new CorrelatedSequence
  2779. {
  2780. CorrelationStatus = CorrelationStatus.Deleted,
  2781. ComparisonUnitArray1 = cul1.Take(currentI1).ToArray(),
  2782. ComparisonUnitArray2 = null
  2783. };
  2784. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  2785. }
  2786. else if (currentI1 == 0 && currentI2 > 0)
  2787. {
  2788. var insertedCorrelatedSequence = new CorrelatedSequence
  2789. {
  2790. CorrelationStatus = CorrelationStatus.Inserted,
  2791. ComparisonUnitArray1 = null,
  2792. ComparisonUnitArray2 = cul2.Take(currentI2).ToArray()
  2793. };
  2794. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  2795. }
  2796. else if (currentI1 > 0 && currentI2 > 0)
  2797. {
  2798. var unknownCorrelatedSequence = new CorrelatedSequence
  2799. {
  2800. CorrelationStatus = CorrelationStatus.Unknown,
  2801. ComparisonUnitArray1 = cul1.Take(currentI1).ToArray(),
  2802. ComparisonUnitArray2 = cul2.Take(currentI2).ToArray()
  2803. };
  2804. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  2805. }
  2806. else if (currentI1 == 0 && currentI2 == 0)
  2807. {
  2808. // nothing to do
  2809. }
  2810. for (var i = 0; i < currentLongestCommonSequenceLength; i++)
  2811. {
  2812. var unknownCorrelatedSequence = new CorrelatedSequence
  2813. {
  2814. CorrelationStatus = CorrelationStatus.Unknown,
  2815. ComparisonUnitArray1 = cul1
  2816. .Skip(currentI1)
  2817. .Skip(i)
  2818. .Take(1)
  2819. .ToArray(),
  2820. ComparisonUnitArray2 = cul2
  2821. .Skip(currentI2)
  2822. .Skip(i)
  2823. .Take(1)
  2824. .ToArray()
  2825. };
  2826. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  2827. }
  2828. int endI1 = currentI1 + currentLongestCommonSequenceLength;
  2829. int endI2 = currentI2 + currentLongestCommonSequenceLength;
  2830. if (endI1 < cul1.Length && endI2 == cul2.Length)
  2831. {
  2832. var deletedCorrelatedSequence = new CorrelatedSequence
  2833. {
  2834. CorrelationStatus = CorrelationStatus.Deleted,
  2835. ComparisonUnitArray1 = cul1.Skip(endI1).ToArray(),
  2836. ComparisonUnitArray2 = null
  2837. };
  2838. newListOfCorrelatedSequence.Add(deletedCorrelatedSequence);
  2839. }
  2840. else if (endI1 == cul1.Length && endI2 < cul2.Length)
  2841. {
  2842. var insertedCorrelatedSequence = new CorrelatedSequence
  2843. {
  2844. CorrelationStatus = CorrelationStatus.Inserted,
  2845. ComparisonUnitArray1 = null,
  2846. ComparisonUnitArray2 = cul2.Skip(endI2).ToArray()
  2847. };
  2848. newListOfCorrelatedSequence.Add(insertedCorrelatedSequence);
  2849. }
  2850. else if (endI1 < cul1.Length && endI2 < cul2.Length)
  2851. {
  2852. var unknownCorrelatedSequence = new CorrelatedSequence
  2853. {
  2854. CorrelationStatus = CorrelationStatus.Unknown,
  2855. ComparisonUnitArray1 = cul1.Skip(endI1).ToArray(),
  2856. ComparisonUnitArray2 = cul2.Skip(endI2).ToArray()
  2857. };
  2858. newListOfCorrelatedSequence.Add(unknownCorrelatedSequence);
  2859. }
  2860. else if (endI1 == cul1.Length && endI2 == cul2.Length)
  2861. {
  2862. // nothing to do
  2863. }
  2864. return newListOfCorrelatedSequence;
  2865. }
  2866. return null;
  2867. }
  2868. }
  2869. return null;
  2870. }
  2871. }
  2872. }