MarkupSimplifier.cs 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754
  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. using System;
  4. using System.Collections.Generic;
  5. using System.Linq;
  6. using System.Xml.Linq;
  7. using System.Xml.Schema;
  8. using DocumentFormat.OpenXml.Packaging;
  9. namespace OpenXmlPowerTools
  10. {
  11. public partial class WmlDocument
  12. {
  13. public WmlDocument SimplifyMarkup(SimplifyMarkupSettings settings)
  14. {
  15. return MarkupSimplifier.SimplifyMarkup(this, settings);
  16. }
  17. }
  18. public class SimplifyMarkupSettings
  19. {
  20. public bool AcceptRevisions;
  21. public bool NormalizeXml;
  22. public bool RemoveBookmarks;
  23. public bool RemoveComments;
  24. public bool RemoveContentControls;
  25. public bool RemoveEndAndFootNotes;
  26. public bool RemoveFieldCodes;
  27. public bool RemoveGoBackBookmark;
  28. public bool RemoveHyperlinks;
  29. public bool RemoveLastRenderedPageBreak;
  30. public bool RemoveMarkupForDocumentComparison;
  31. public bool RemovePermissions;
  32. public bool RemoveProof;
  33. public bool RemoveRsidInfo;
  34. public bool RemoveSmartTags;
  35. public bool RemoveSoftHyphens;
  36. public bool RemoveWebHidden;
  37. public bool ReplaceTabsWithSpaces;
  38. }
  39. public static class MarkupSimplifier
  40. {
  41. public static WmlDocument SimplifyMarkup(WmlDocument doc, SimplifyMarkupSettings settings)
  42. {
  43. using (var streamDoc = new OpenXmlMemoryStreamDocument(doc))
  44. {
  45. using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
  46. {
  47. SimplifyMarkup(document, settings);
  48. }
  49. return streamDoc.GetModifiedWmlDocument();
  50. }
  51. }
  52. public static void SimplifyMarkup(WordprocessingDocument doc, SimplifyMarkupSettings settings)
  53. {
  54. if (settings.RemoveMarkupForDocumentComparison)
  55. {
  56. settings.RemoveRsidInfo = true;
  57. RemoveElementsForDocumentComparison(doc);
  58. }
  59. if (settings.RemoveRsidInfo)
  60. {
  61. RemoveRsidInfoInSettings(doc);
  62. }
  63. if (settings.AcceptRevisions)
  64. {
  65. RevisionAccepter.AcceptRevisions(doc);
  66. }
  67. foreach (OpenXmlPart part in doc.ContentParts())
  68. {
  69. SimplifyMarkupForPart(part, settings);
  70. }
  71. if (doc.MainDocumentPart.StyleDefinitionsPart != null)
  72. {
  73. SimplifyMarkupForPart(doc.MainDocumentPart.StyleDefinitionsPart, settings);
  74. }
  75. if (doc.MainDocumentPart.StylesWithEffectsPart != null)
  76. {
  77. SimplifyMarkupForPart(doc.MainDocumentPart.StylesWithEffectsPart, settings);
  78. }
  79. if (settings.RemoveComments)
  80. {
  81. WordprocessingCommentsPart commentsPart = doc.MainDocumentPart.WordprocessingCommentsPart;
  82. if (commentsPart != null) doc.MainDocumentPart.DeletePart(commentsPart);
  83. WordprocessingCommentsExPart commentsExPart = doc.MainDocumentPart.WordprocessingCommentsExPart;
  84. if (commentsExPart != null) doc.MainDocumentPart.DeletePart(commentsExPart);
  85. }
  86. }
  87. private static void RemoveRsidInfoInSettings(WordprocessingDocument doc)
  88. {
  89. DocumentSettingsPart part = doc.MainDocumentPart.DocumentSettingsPart;
  90. if (part == null) return;
  91. XDocument settingsXDoc = part.GetXDocument();
  92. settingsXDoc.Descendants(W.rsids).Remove();
  93. part.PutXDocument();
  94. }
  95. private static void RemoveElementsForDocumentComparison(WordprocessingDocument doc)
  96. {
  97. OpenXmlPart part = doc.ExtendedFilePropertiesPart;
  98. if (part != null)
  99. {
  100. XDocument appPropsXDoc = part.GetXDocument();
  101. appPropsXDoc.Descendants(EP.TotalTime).Remove();
  102. part.PutXDocument();
  103. }
  104. part = doc.CoreFilePropertiesPart;
  105. if (part != null)
  106. {
  107. XDocument corePropsXDoc = part.GetXDocument();
  108. corePropsXDoc.Descendants(CP.revision).Remove();
  109. corePropsXDoc.Descendants(DCTERMS.created).Remove();
  110. corePropsXDoc.Descendants(DCTERMS.modified).Remove();
  111. part.PutXDocument();
  112. }
  113. XDocument mainXDoc = doc.MainDocumentPart.GetXDocument();
  114. List<XElement> bookmarkStart = mainXDoc
  115. .Descendants(W.bookmarkStart)
  116. .Where(b => (string)b.Attribute(W.name) == "_GoBack")
  117. .ToList();
  118. foreach (XElement item in bookmarkStart)
  119. {
  120. IEnumerable<XElement> bookmarkEnd = mainXDoc
  121. .Descendants(W.bookmarkEnd)
  122. .Where(be => (int)be.Attribute(W.id) == (int)item.Attribute(W.id));
  123. bookmarkEnd.Remove();
  124. }
  125. bookmarkStart.Remove();
  126. doc.MainDocumentPart.PutXDocument();
  127. }
  128. public static XElement MergeAdjacentSuperfluousRuns(XElement element)
  129. {
  130. return (XElement)MergeAdjacentRunsTransform(element);
  131. }
  132. public static XElement TransformElementToSingleCharacterRuns(XElement element)
  133. {
  134. return (XElement)SingleCharacterRunTransform(element);
  135. }
  136. public static void TransformPartToSingleCharacterRuns(OpenXmlPart part)
  137. {
  138. // After transforming to single character runs, Rsid info will be invalid, so
  139. // remove from the part.
  140. XElement root = part.GetXElement();
  141. var newRoot = (XElement)RemoveRsidTransform(root);
  142. newRoot = (XElement)SingleCharacterRunTransform(newRoot);
  143. root.ReplaceWith(newRoot);
  144. part.PutXElement();
  145. }
  146. public static void TransformToSingleCharacterRuns(WordprocessingDocument doc)
  147. {
  148. if (RevisionAccepter.HasTrackedRevisions(doc))
  149. {
  150. throw new OpenXmlPowerToolsException(
  151. "Transforming a document to single character runs is not supported for " +
  152. "a document with tracked revisions.");
  153. }
  154. foreach (OpenXmlPart part in doc.ContentParts())
  155. {
  156. TransformPartToSingleCharacterRuns(part);
  157. }
  158. }
  159. private static object RemoveCustomXmlAndContentControlsTransform(
  160. XNode node,
  161. SimplifyMarkupSettings simplifyMarkupSettings)
  162. {
  163. if (node is XElement element)
  164. {
  165. if (simplifyMarkupSettings.RemoveSmartTags &&
  166. element.Name == W.smartTag)
  167. {
  168. return element
  169. .Elements()
  170. .Select(e =>
  171. RemoveCustomXmlAndContentControlsTransform(e,
  172. simplifyMarkupSettings));
  173. }
  174. if (simplifyMarkupSettings.RemoveContentControls &&
  175. element.Name == W.sdt)
  176. {
  177. return element
  178. .Elements(W.sdtContent)
  179. .Elements()
  180. .Select(e =>
  181. RemoveCustomXmlAndContentControlsTransform(e,
  182. simplifyMarkupSettings));
  183. }
  184. return new XElement(element.Name,
  185. element.Attributes(),
  186. element.Nodes().Select(n => RemoveCustomXmlAndContentControlsTransform(n, simplifyMarkupSettings)));
  187. }
  188. return node;
  189. }
  190. private static object RemoveRsidTransform(XNode node)
  191. {
  192. if (node is XElement element)
  193. {
  194. return element.Name == W.rsid
  195. ? null
  196. : new XElement(element.Name,
  197. element
  198. .Attributes()
  199. .Where(a => a.Name != W.rsid &&
  200. a.Name != W.rsidDel &&
  201. a.Name != W.rsidP &&
  202. a.Name != W.rsidR &&
  203. a.Name != W.rsidRDefault &&
  204. a.Name != W.rsidRPr &&
  205. a.Name != W.rsidSect &&
  206. a.Name != W.rsidTr),
  207. element.Nodes().Select(RemoveRsidTransform));
  208. }
  209. return node;
  210. }
  211. private static object MergeAdjacentRunsTransform(XNode node)
  212. {
  213. if (node is XElement element)
  214. {
  215. return element.Name == W.p
  216. ? WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(element)
  217. : new XElement(element.Name,
  218. element.Attributes(),
  219. element.Nodes().Select(MergeAdjacentRunsTransform));
  220. }
  221. return node;
  222. }
  223. private static object RemoveEmptyRunsAndRunPropertiesTransform(XNode node)
  224. {
  225. if (node is XElement element)
  226. {
  227. return (element.Name == W.r || element.Name == W.rPr || element.Name == W.pPr) && !element.Elements().Any()
  228. ? null
  229. : new XElement(element.Name,
  230. element.Attributes(),
  231. element.Nodes().Select(RemoveEmptyRunsAndRunPropertiesTransform));
  232. }
  233. return node;
  234. }
  235. private static object MergeAdjacentInstrText(
  236. XNode node)
  237. {
  238. if (node is XElement element)
  239. {
  240. if (element.Name == W.r && element.Elements(W.instrText).Any())
  241. {
  242. IEnumerable<IGrouping<bool, XElement>> grouped = element
  243. .Elements()
  244. .GroupAdjacent(e => e.Name == W.instrText);
  245. return new XElement(W.r,
  246. grouped.Select(g =>
  247. {
  248. if (g.Key == false)
  249. {
  250. return (object)g;
  251. }
  252. // If .doc files are converted to .docx by the Binary to Open XML Translator,
  253. // the w:instrText elements might be empty, in which case newInstrText would
  254. // be an empty string.
  255. string newInstrText = g.Select(i => (string)i).StringConcatenate();
  256. if (string.IsNullOrEmpty(newInstrText))
  257. {
  258. return new XElement(W.instrText);
  259. }
  260. return new XElement(W.instrText,
  261. XmlUtil.GetXmlSpaceAttribute(newInstrText),
  262. newInstrText);
  263. }));
  264. }
  265. return new XElement(element.Name,
  266. element.Attributes(),
  267. element.Nodes().Select(MergeAdjacentInstrText));
  268. }
  269. return node;
  270. }
  271. // lastRenderedPageBreak, permEnd, permStart, proofErr, noProof
  272. // softHyphen:
  273. // Remove when simplifying.
  274. // fldSimple, fldData, fldChar, instrText:
  275. // For hyperlinks, generate same in XHtml. Other than hyperlinks, do the following:
  276. // - collapse fldSimple
  277. // - remove fldSimple, fldData, fldChar, instrText.
  278. private static object SimplifyMarkupTransform(
  279. XNode node,
  280. SimplifyMarkupSettings settings,
  281. SimplifyMarkupParameters parameters)
  282. {
  283. if (!(node is XElement element)) return node;
  284. if (settings.RemovePermissions &&
  285. (element.Name == W.permEnd ||
  286. element.Name == W.permStart))
  287. {
  288. return null;
  289. }
  290. if (settings.RemoveProof &&
  291. (element.Name == W.proofErr ||
  292. element.Name == W.noProof))
  293. {
  294. return null;
  295. }
  296. if (settings.RemoveSoftHyphens &&
  297. element.Name == W.softHyphen)
  298. {
  299. return null;
  300. }
  301. if (settings.RemoveLastRenderedPageBreak &&
  302. element.Name == W.lastRenderedPageBreak)
  303. {
  304. return null;
  305. }
  306. if (settings.RemoveBookmarks &&
  307. (element.Name == W.bookmarkStart ||
  308. element.Name == W.bookmarkEnd))
  309. {
  310. return null;
  311. }
  312. if (settings.RemoveGoBackBookmark &&
  313. (element.Name == W.bookmarkStart && (int)element.Attribute(W.id) == parameters.GoBackId ||
  314. element.Name == W.bookmarkEnd && (int)element.Attribute(W.id) == parameters.GoBackId))
  315. {
  316. return null;
  317. }
  318. if (settings.RemoveWebHidden &&
  319. element.Name == W.webHidden)
  320. {
  321. return null;
  322. }
  323. if (settings.ReplaceTabsWithSpaces &&
  324. element.Name == W.tab && element.Parent != null && element.Parent.Name == W.r)
  325. {
  326. return new XElement(W.t, new XAttribute(XNamespace.Xml + "space", "preserve"), " ");
  327. }
  328. if (settings.RemoveComments &&
  329. (element.Name == W.commentRangeStart ||
  330. element.Name == W.commentRangeEnd ||
  331. element.Name == W.commentReference ||
  332. element.Name == W.annotationRef))
  333. {
  334. return null;
  335. }
  336. if (settings.RemoveComments &&
  337. element.Name == W.rStyle &&
  338. element.Attribute(W.val)?.Value == "CommentReference")
  339. {
  340. return null;
  341. }
  342. if (settings.RemoveEndAndFootNotes &&
  343. (element.Name == W.endnoteReference ||
  344. element.Name == W.footnoteReference))
  345. {
  346. return null;
  347. }
  348. if (settings.RemoveFieldCodes)
  349. {
  350. if (element.Name == W.fldSimple)
  351. {
  352. return element.Elements().Select(e => SimplifyMarkupTransform(e, settings, parameters));
  353. }
  354. if (element.Name == W.fldData ||
  355. element.Name == W.fldChar ||
  356. element.Name == W.instrText)
  357. {
  358. return null;
  359. }
  360. }
  361. if (settings.RemoveHyperlinks &&
  362. element.Name == W.hyperlink)
  363. {
  364. return element.Elements();
  365. }
  366. return new XElement(element.Name,
  367. element.Attributes(),
  368. element.Nodes().Select(n => SimplifyMarkupTransform(n, settings, parameters)));
  369. }
  370. private static XDocument Normalize(XDocument source, XmlSchemaSet schema)
  371. {
  372. var havePsvi = false;
  373. // validate, throw errors, add PSVI information
  374. if (schema != null)
  375. {
  376. source.Validate(schema, null, true);
  377. havePsvi = true;
  378. }
  379. // Remove comments, processing instructions, and text nodes that are
  380. // children of XDocument. Only white space text nodes are allowed as
  381. // children of a document, so we can remove all text nodes.
  382. return new XDocument(
  383. source.Declaration,
  384. source.Nodes().Select(n =>
  385. n is XComment || n is XProcessingInstruction || n is XText
  386. ? null
  387. : n is XElement e
  388. ? NormalizeElement(e, havePsvi)
  389. : n));
  390. }
  391. // TODO: Check whether this can be removed.
  392. //private static bool DeepEqualsWithNormalization(XDocument doc1, XDocument doc2, XmlSchemaSet schemaSet)
  393. //{
  394. // XDocument d1 = Normalize(doc1, schemaSet);
  395. // XDocument d2 = Normalize(doc2, schemaSet);
  396. // return XNode.DeepEquals(d1, d2);
  397. //}
  398. private static IEnumerable<XAttribute> NormalizeAttributes(XElement element, bool havePsvi)
  399. {
  400. return element
  401. .Attributes()
  402. .Where(a => !a.IsNamespaceDeclaration &&
  403. a.Name != XSI.schemaLocation &&
  404. a.Name != XSI.noNamespaceSchemaLocation)
  405. .OrderBy(a => a.Name.NamespaceName)
  406. .ThenBy(a => a.Name.LocalName)
  407. .Select(a =>
  408. {
  409. if (havePsvi)
  410. {
  411. IXmlSchemaInfo schemaInfo = a.GetSchemaInfo();
  412. XmlSchemaType schemaType = schemaInfo?.SchemaType;
  413. XmlTypeCode? typeCode = schemaType?.TypeCode;
  414. switch (typeCode)
  415. {
  416. case XmlTypeCode.Boolean:
  417. return new XAttribute(a.Name, (bool)a);
  418. case XmlTypeCode.DateTime:
  419. return new XAttribute(a.Name, (DateTime)a);
  420. case XmlTypeCode.Decimal:
  421. return new XAttribute(a.Name, (decimal)a);
  422. case XmlTypeCode.Double:
  423. return new XAttribute(a.Name, (double)a);
  424. case XmlTypeCode.Float:
  425. return new XAttribute(a.Name, (float)a);
  426. case XmlTypeCode.HexBinary:
  427. case XmlTypeCode.Language:
  428. return new XAttribute(a.Name, ((string)a).ToLower());
  429. }
  430. }
  431. return a;
  432. });
  433. }
  434. private static XNode NormalizeNode(XNode node, bool havePsvi)
  435. {
  436. // trim comments and processing instructions from normalized tree
  437. if (node is XComment || node is XProcessingInstruction)
  438. {
  439. return null;
  440. }
  441. if (node is XElement e)
  442. {
  443. return NormalizeElement(e, havePsvi);
  444. }
  445. // Only thing left is XCData and XText, so clone them
  446. return node;
  447. }
  448. private static XElement NormalizeElement(XElement element, bool havePsvi)
  449. {
  450. if (havePsvi)
  451. {
  452. IXmlSchemaInfo schemaInfo = element.GetSchemaInfo();
  453. XmlSchemaType schemaType = schemaInfo?.SchemaType;
  454. XmlTypeCode? typeCode = schemaType?.TypeCode;
  455. switch (typeCode)
  456. {
  457. case XmlTypeCode.Boolean:
  458. return new XElement(element.Name,
  459. NormalizeAttributes(element, true),
  460. (bool)element);
  461. case XmlTypeCode.DateTime:
  462. return new XElement(element.Name,
  463. NormalizeAttributes(element, true),
  464. (DateTime)element);
  465. case XmlTypeCode.Decimal:
  466. return new XElement(element.Name,
  467. NormalizeAttributes(element, true),
  468. (decimal)element);
  469. case XmlTypeCode.Double:
  470. return new XElement(element.Name,
  471. NormalizeAttributes(element, true),
  472. (double)element);
  473. case XmlTypeCode.Float:
  474. return new XElement(element.Name,
  475. NormalizeAttributes(element, true),
  476. (float)element);
  477. case XmlTypeCode.HexBinary:
  478. case XmlTypeCode.Language:
  479. return new XElement(element.Name,
  480. NormalizeAttributes(element, true),
  481. ((string)element).ToLower());
  482. default:
  483. return new XElement(element.Name,
  484. NormalizeAttributes(element, true),
  485. element.Nodes().Select(n => NormalizeNode(n, true)));
  486. }
  487. }
  488. return new XElement(element.Name,
  489. NormalizeAttributes(element, false),
  490. element.Nodes().Select(n => NormalizeNode(n, false)));
  491. }
  492. private static void SimplifyMarkupForPart(OpenXmlPart part, SimplifyMarkupSettings settings)
  493. {
  494. var parameters = new SimplifyMarkupParameters();
  495. if (part.ContentType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
  496. {
  497. var doc = (WordprocessingDocument)part.OpenXmlPackage;
  498. if (settings.RemoveGoBackBookmark)
  499. {
  500. XElement goBackBookmark = doc
  501. .MainDocumentPart
  502. .GetXDocument()
  503. .Descendants(W.bookmarkStart)
  504. .FirstOrDefault(bm => (string)bm.Attribute(W.name) == "_GoBack");
  505. if (goBackBookmark != null)
  506. {
  507. parameters.GoBackId = (int)goBackBookmark.Attribute(W.id);
  508. }
  509. }
  510. }
  511. XDocument xdoc = part.GetXDocument();
  512. XElement newRoot = xdoc.Root;
  513. // Need to do this first to enable simplifying hyperlinks.
  514. if (settings.RemoveContentControls || settings.RemoveSmartTags)
  515. {
  516. newRoot = (XElement)RemoveCustomXmlAndContentControlsTransform(newRoot, settings);
  517. }
  518. // This may touch many elements, so needs to be its own transform.
  519. if (settings.RemoveRsidInfo)
  520. {
  521. newRoot = (XElement)RemoveRsidTransform(newRoot);
  522. }
  523. var prevNewRoot = new XDocument(newRoot);
  524. while (true)
  525. {
  526. if (settings.RemoveComments ||
  527. settings.RemoveEndAndFootNotes ||
  528. settings.ReplaceTabsWithSpaces ||
  529. settings.RemoveFieldCodes ||
  530. settings.RemovePermissions ||
  531. settings.RemoveProof ||
  532. settings.RemoveBookmarks ||
  533. settings.RemoveWebHidden ||
  534. settings.RemoveGoBackBookmark ||
  535. settings.RemoveHyperlinks)
  536. {
  537. newRoot = (XElement)SimplifyMarkupTransform(newRoot, settings, parameters);
  538. }
  539. // Remove runs and run properties that have become empty due to previous transforms.
  540. newRoot = (XElement)RemoveEmptyRunsAndRunPropertiesTransform(newRoot);
  541. // Merge adjacent runs that have identical run properties.
  542. newRoot = (XElement)MergeAdjacentRunsTransform(newRoot);
  543. // Merge adjacent instrText elements.
  544. newRoot = (XElement)MergeAdjacentInstrText(newRoot);
  545. // Separate run children into separate runs
  546. newRoot = (XElement)SeparateRunChildrenIntoSeparateRuns(newRoot);
  547. if (XNode.DeepEquals(prevNewRoot.Root, newRoot))
  548. {
  549. break;
  550. }
  551. prevNewRoot = new XDocument(newRoot);
  552. }
  553. if (settings.NormalizeXml)
  554. {
  555. XAttribute[] nsAttrs =
  556. {
  557. new XAttribute(XNamespace.Xmlns + "wpc", WPC.wpc),
  558. new XAttribute(XNamespace.Xmlns + "mc", MC.mc),
  559. new XAttribute(XNamespace.Xmlns + "o", O.o),
  560. new XAttribute(XNamespace.Xmlns + "r", R.r),
  561. new XAttribute(XNamespace.Xmlns + "m", M.m),
  562. new XAttribute(XNamespace.Xmlns + "v", VML.vml),
  563. new XAttribute(XNamespace.Xmlns + "wp14", WP14.wp14),
  564. new XAttribute(XNamespace.Xmlns + "wp", WP.wp),
  565. new XAttribute(XNamespace.Xmlns + "w10", W10.w10),
  566. new XAttribute(XNamespace.Xmlns + "w", W.w),
  567. new XAttribute(XNamespace.Xmlns + "w14", W14.w14),
  568. new XAttribute(XNamespace.Xmlns + "w15", W15.w15),
  569. new XAttribute(XNamespace.Xmlns + "w16se", W16SE.w16se),
  570. new XAttribute(XNamespace.Xmlns + "wpg", WPG.wpg),
  571. new XAttribute(XNamespace.Xmlns + "wpi", WPI.wpi),
  572. new XAttribute(XNamespace.Xmlns + "wne", WNE.wne),
  573. new XAttribute(XNamespace.Xmlns + "wps", WPS.wps),
  574. new XAttribute(MC.Ignorable, "w14 wp14 w15 w16se")
  575. };
  576. XDocument newXDoc = Normalize(new XDocument(newRoot), null);
  577. newRoot = newXDoc.Root;
  578. if (newRoot != null)
  579. {
  580. foreach (XAttribute nsAttr in nsAttrs)
  581. {
  582. if (newRoot.Attribute(nsAttr.Name) == null)
  583. {
  584. newRoot.Add(nsAttr);
  585. }
  586. }
  587. }
  588. part.PutXDocument(newXDoc);
  589. }
  590. else
  591. {
  592. part.PutXDocument(new XDocument(newRoot));
  593. }
  594. }
  595. private static object SeparateRunChildrenIntoSeparateRuns(XNode node)
  596. {
  597. if (node is XElement element)
  598. {
  599. if (element.Name == W.r)
  600. {
  601. IEnumerable<XElement> runChildren = element.Elements().Where(e => e.Name != W.rPr);
  602. XElement rPr = element.Element(W.rPr);
  603. return runChildren.Select(rc => new XElement(W.r, rPr, rc));
  604. }
  605. return new XElement(element.Name,
  606. element.Attributes(),
  607. element.Nodes().Select(SeparateRunChildrenIntoSeparateRuns));
  608. }
  609. return node;
  610. }
  611. private static object SingleCharacterRunTransform(XNode node)
  612. {
  613. if (node is XElement element)
  614. {
  615. if (element.Name == W.r)
  616. return element.Elements()
  617. .Where(e => e.Name != W.rPr)
  618. .GroupAdjacent(sr => sr.Name == W.t)
  619. .Select(g =>
  620. {
  621. if (g.Key)
  622. {
  623. string s = g.Select(t => (string)t).StringConcatenate();
  624. return s.Select(c =>
  625. new XElement(W.r,
  626. element.Elements(W.rPr),
  627. new XElement(W.t,
  628. XmlUtil.GetXmlSpaceAttribute(c),
  629. c)));
  630. }
  631. return g.Select(sr =>
  632. new XElement(W.r,
  633. element.Elements(W.rPr),
  634. new XElement(sr.Name,
  635. sr.Attributes(),
  636. sr.Nodes().Select(SingleCharacterRunTransform))));
  637. });
  638. return new XElement(element.Name,
  639. element.Attributes(),
  640. element.Nodes().Select(SingleCharacterRunTransform));
  641. }
  642. return node;
  643. }
  644. private class SimplifyMarkupParameters
  645. {
  646. public int? GoBackId { get; set; }
  647. }
  648. }
  649. }