MarkupSimplifier.cs 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692
  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. using System;
  4. using System.Collections.Generic;
  5. using System.Diagnostics.CodeAnalysis;
  6. using System.Linq;
  7. using System.Xml.Linq;
  8. using System.Xml.Schema;
  9. using DocumentFormat.OpenXml.Packaging;
  10. namespace OpenXmlPowerTools
  11. {
  12. public partial class WmlDocument
  13. {
  14. public WmlDocument SimplifyMarkup(SimplifyMarkupSettings settings)
  15. {
  16. return MarkupSimplifier.SimplifyMarkup(this, settings);
  17. }
  18. }
  19. public class SimplifyMarkupSettings
  20. {
  21. public bool AcceptRevisions;
  22. public bool NormalizeXml;
  23. public bool RemoveBookmarks;
  24. public bool RemoveComments;
  25. public bool RemoveContentControls;
  26. public bool RemoveEndAndFootNotes;
  27. public bool RemoveFieldCodes;
  28. public bool RemoveGoBackBookmark;
  29. public bool RemoveHyperlinks;
  30. public bool RemoveLastRenderedPageBreak;
  31. public bool RemoveMarkupForDocumentComparison;
  32. public bool RemovePermissions;
  33. public bool RemoveProof;
  34. public bool RemoveRsidInfo;
  35. public bool RemoveSmartTags;
  36. public bool RemoveSoftHyphens;
  37. public bool RemoveWebHidden;
  38. public bool ReplaceTabsWithSpaces;
  39. }
  40. [SuppressMessage("ReSharper", "InconsistentNaming")]
  41. public static class MarkupSimplifier
  42. {
  43. public static WmlDocument SimplifyMarkup(WmlDocument doc, SimplifyMarkupSettings settings)
  44. {
  45. using (var streamDoc = new OpenXmlMemoryStreamDocument(doc))
  46. {
  47. using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
  48. SimplifyMarkup(document, settings);
  49. return streamDoc.GetModifiedWmlDocument();
  50. }
  51. }
  52. public static void SimplifyMarkup(WordprocessingDocument doc, SimplifyMarkupSettings settings)
  53. {
  54. if (settings.RemoveMarkupForDocumentComparison)
  55. {
  56. settings.RemoveRsidInfo = true;
  57. RemoveElementsForDocumentComparison(doc);
  58. }
  59. if (settings.RemoveRsidInfo)
  60. RemoveRsidInfoInSettings(doc);
  61. if (settings.AcceptRevisions)
  62. RevisionAccepter.AcceptRevisions(doc);
  63. foreach (OpenXmlPart part in doc.ContentParts())
  64. SimplifyMarkupForPart(part, settings);
  65. if (doc.MainDocumentPart.StyleDefinitionsPart != null)
  66. SimplifyMarkupForPart(doc.MainDocumentPart.StyleDefinitionsPart, settings);
  67. if (doc.MainDocumentPart.StylesWithEffectsPart != null)
  68. SimplifyMarkupForPart(doc.MainDocumentPart.StylesWithEffectsPart, settings);
  69. if (settings.RemoveComments)
  70. {
  71. WordprocessingCommentsPart commentsPart = doc.MainDocumentPart.WordprocessingCommentsPart;
  72. if (commentsPart != null) doc.MainDocumentPart.DeletePart(commentsPart);
  73. WordprocessingCommentsExPart commentsExPart = doc.MainDocumentPart.WordprocessingCommentsExPart;
  74. if (commentsExPart != null) doc.MainDocumentPart.DeletePart(commentsExPart);
  75. }
  76. }
  77. private static void RemoveRsidInfoInSettings(WordprocessingDocument doc)
  78. {
  79. DocumentSettingsPart part = doc.MainDocumentPart.DocumentSettingsPart;
  80. if (part == null) return;
  81. XDocument settingsXDoc = part.GetXDocument();
  82. settingsXDoc.Descendants(W.rsids).Remove();
  83. part.PutXDocument();
  84. }
  85. private static void RemoveElementsForDocumentComparison(WordprocessingDocument doc)
  86. {
  87. OpenXmlPart part = doc.ExtendedFilePropertiesPart;
  88. if (part != null)
  89. {
  90. XDocument appPropsXDoc = part.GetXDocument();
  91. appPropsXDoc.Descendants(EP.TotalTime).Remove();
  92. part.PutXDocument();
  93. }
  94. part = doc.CoreFilePropertiesPart;
  95. if (part != null)
  96. {
  97. XDocument corePropsXDoc = part.GetXDocument();
  98. corePropsXDoc.Descendants(CP.revision).Remove();
  99. corePropsXDoc.Descendants(DCTERMS.created).Remove();
  100. corePropsXDoc.Descendants(DCTERMS.modified).Remove();
  101. part.PutXDocument();
  102. }
  103. XDocument mainXDoc = doc.MainDocumentPart.GetXDocument();
  104. List<XElement> bookmarkStart = mainXDoc
  105. .Descendants(W.bookmarkStart)
  106. .Where(b => (string) b.Attribute(W.name) == "_GoBack")
  107. .ToList();
  108. foreach (XElement item in bookmarkStart)
  109. {
  110. IEnumerable<XElement> bookmarkEnd = mainXDoc
  111. .Descendants(W.bookmarkEnd)
  112. .Where(be => (int) be.Attribute(W.id) == (int) item.Attribute(W.id));
  113. bookmarkEnd.Remove();
  114. }
  115. bookmarkStart.Remove();
  116. doc.MainDocumentPart.PutXDocument();
  117. }
  118. public static XElement MergeAdjacentSuperfluousRuns(XElement element)
  119. {
  120. return (XElement) MergeAdjacentRunsTransform(element);
  121. }
  122. public static XElement TransformElementToSingleCharacterRuns(XElement element)
  123. {
  124. return (XElement) SingleCharacterRunTransform(element);
  125. }
  126. public static void TransformPartToSingleCharacterRuns(OpenXmlPart part)
  127. {
  128. // After transforming to single character runs, Rsid info will be invalid, so
  129. // remove from the part.
  130. XDocument xDoc = part.GetXDocument();
  131. var newRoot = (XElement) RemoveRsidTransform(xDoc.Root);
  132. newRoot = (XElement) SingleCharacterRunTransform(newRoot);
  133. xDoc.Elements().First().ReplaceWith(newRoot);
  134. part.PutXDocument();
  135. }
  136. public static void TransformToSingleCharacterRuns(WordprocessingDocument doc)
  137. {
  138. if (RevisionAccepter.HasTrackedRevisions(doc))
  139. throw new OpenXmlPowerToolsException(
  140. "Transforming a document to single character runs is not supported for " +
  141. "a document with tracked revisions.");
  142. foreach (OpenXmlPart part in doc.ContentParts())
  143. TransformPartToSingleCharacterRuns(part);
  144. }
  145. private static object RemoveCustomXmlAndContentControlsTransform(
  146. XNode node, SimplifyMarkupSettings simplifyMarkupSettings)
  147. {
  148. XElement element = node as XElement;
  149. if (element != null)
  150. {
  151. if (simplifyMarkupSettings.RemoveSmartTags &&
  152. element.Name == W.smartTag)
  153. return element
  154. .Elements()
  155. .Select(e =>
  156. RemoveCustomXmlAndContentControlsTransform(e,
  157. simplifyMarkupSettings));
  158. if (simplifyMarkupSettings.RemoveContentControls &&
  159. element.Name == W.sdt)
  160. return element
  161. .Elements(W.sdtContent)
  162. .Elements()
  163. .Select(e =>
  164. RemoveCustomXmlAndContentControlsTransform(e,
  165. simplifyMarkupSettings));
  166. return new XElement(element.Name,
  167. element.Attributes(),
  168. element.Nodes().Select(n => RemoveCustomXmlAndContentControlsTransform(n, simplifyMarkupSettings)));
  169. }
  170. return node;
  171. }
  172. private static object RemoveRsidTransform(XNode node)
  173. {
  174. var element = node as XElement;
  175. if (element == null) return node;
  176. if (element.Name == W.rsid)
  177. return null;
  178. return new XElement(element.Name,
  179. element
  180. .Attributes()
  181. .Where(a => (a.Name != W.rsid) &&
  182. (a.Name != W.rsidDel) &&
  183. (a.Name != W.rsidP) &&
  184. (a.Name != W.rsidR) &&
  185. (a.Name != W.rsidRDefault) &&
  186. (a.Name != W.rsidRPr) &&
  187. (a.Name != W.rsidSect) &&
  188. (a.Name != W.rsidTr)),
  189. element.Nodes().Select(n => RemoveRsidTransform(n)));
  190. }
  191. private static object MergeAdjacentRunsTransform(XNode node)
  192. {
  193. var element = node as XElement;
  194. if (element == null) return node;
  195. if (element.Name == W.p)
  196. return WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(element);
  197. return new XElement(element.Name,
  198. element.Attributes(),
  199. element.Nodes().Select(n => MergeAdjacentRunsTransform(n)));
  200. }
  201. private static object RemoveEmptyRunsAndRunPropertiesTransform(
  202. XNode node)
  203. {
  204. var element = node as XElement;
  205. if (element != null)
  206. {
  207. if (((element.Name == W.r) || (element.Name == W.rPr) || (element.Name == W.pPr)) &&
  208. !element.Elements().Any())
  209. return null;
  210. return new XElement(element.Name,
  211. element.Attributes(),
  212. element.Nodes().Select(n => RemoveEmptyRunsAndRunPropertiesTransform(n)));
  213. }
  214. return node;
  215. }
  216. private static object MergeAdjacentInstrText(
  217. XNode node)
  218. {
  219. var element = node as XElement;
  220. if (element != null)
  221. {
  222. if ((element.Name == W.r) && element.Elements(W.instrText).Any())
  223. {
  224. IEnumerable<IGrouping<bool, XElement>> grouped =
  225. element.Elements().GroupAdjacent(e => e.Name == W.instrText);
  226. return new XElement(W.r,
  227. grouped.Select(g =>
  228. {
  229. if (g.Key == false)
  230. return (object) g;
  231. // If .doc files are converted to .docx by the Binary to Open XML Translator,
  232. // the w:instrText elements might be empty, in which case newInstrText would
  233. // be an empty string.
  234. string newInstrText = g.Select(i => (string) i).StringConcatenate();
  235. if (string.IsNullOrEmpty(newInstrText))
  236. return new XElement(W.instrText);
  237. return new XElement(W.instrText,
  238. (newInstrText[0] == ' ') || (newInstrText[newInstrText.Length - 1] == ' ')
  239. ? new XAttribute(XNamespace.Xml + "space", "preserve")
  240. : null,
  241. newInstrText);
  242. }));
  243. }
  244. return new XElement(element.Name,
  245. element.Attributes(),
  246. element.Nodes().Select(n => MergeAdjacentInstrText(n)));
  247. }
  248. return node;
  249. }
  250. // lastRenderedPageBreak, permEnd, permStart, proofErr, noProof
  251. // softHyphen:
  252. // Remove when simplifying.
  253. // fldSimple, fldData, fldChar, instrText:
  254. // For hyperlinks, generate same in XHtml. Other than hyperlinks, do the following:
  255. // - collapse fldSimple
  256. // - remove fldSimple, fldData, fldChar, instrText.
  257. private static object SimplifyMarkupTransform(
  258. XNode node,
  259. SimplifyMarkupSettings settings,
  260. SimplifyMarkupParameters parameters)
  261. {
  262. var element = node as XElement;
  263. if (element == null) return node;
  264. if (settings.RemovePermissions &&
  265. ((element.Name == W.permEnd) ||
  266. (element.Name == W.permStart)))
  267. return null;
  268. if (settings.RemoveProof &&
  269. ((element.Name == W.proofErr) ||
  270. (element.Name == W.noProof)))
  271. return null;
  272. if (settings.RemoveSoftHyphens &&
  273. (element.Name == W.softHyphen))
  274. return null;
  275. if (settings.RemoveLastRenderedPageBreak &&
  276. (element.Name == W.lastRenderedPageBreak))
  277. return null;
  278. if (settings.RemoveBookmarks &&
  279. ((element.Name == W.bookmarkStart) ||
  280. (element.Name == W.bookmarkEnd)))
  281. return null;
  282. if (settings.RemoveGoBackBookmark &&
  283. (((element.Name == W.bookmarkStart) && ((int) element.Attribute(W.id) == parameters.GoBackId)) ||
  284. ((element.Name == W.bookmarkEnd) && ((int) element.Attribute(W.id) == parameters.GoBackId))))
  285. return null;
  286. if (settings.RemoveWebHidden &&
  287. (element.Name == W.webHidden))
  288. return null;
  289. if (settings.ReplaceTabsWithSpaces &&
  290. (element.Name == W.tab) &&
  291. (element.Parent != null && element.Parent.Name == W.r))
  292. return new XElement(W.t, new XAttribute(XNamespace.Xml + "space", "preserve"), " ");
  293. if (settings.RemoveComments &&
  294. ((element.Name == W.commentRangeStart) ||
  295. (element.Name == W.commentRangeEnd) ||
  296. (element.Name == W.commentReference) ||
  297. (element.Name == W.annotationRef)))
  298. return null;
  299. if (settings.RemoveComments &&
  300. (element.Name == W.rStyle) &&
  301. (element.Attribute(W.val).Value == "CommentReference"))
  302. return null;
  303. if (settings.RemoveEndAndFootNotes &&
  304. ((element.Name == W.endnoteReference) ||
  305. (element.Name == W.footnoteReference)))
  306. return null;
  307. if (settings.RemoveFieldCodes)
  308. {
  309. if (element.Name == W.fldSimple)
  310. return element.Elements().Select(e => SimplifyMarkupTransform(e, settings, parameters));
  311. if ((element.Name == W.fldData) ||
  312. (element.Name == W.fldChar) ||
  313. (element.Name == W.instrText))
  314. return null;
  315. }
  316. if (settings.RemoveHyperlinks &&
  317. (element.Name == W.hyperlink))
  318. return element.Elements();
  319. return new XElement(element.Name,
  320. element.Attributes(),
  321. element.Nodes().Select(n => SimplifyMarkupTransform(n, settings, parameters)));
  322. }
  323. private static XDocument Normalize(XDocument source, XmlSchemaSet schema)
  324. {
  325. var havePsvi = false;
  326. // validate, throw errors, add PSVI information
  327. if (schema != null)
  328. {
  329. source.Validate(schema, null, true);
  330. havePsvi = true;
  331. }
  332. return new XDocument(
  333. source.Declaration,
  334. source.Nodes().Select(n =>
  335. {
  336. // Remove comments, processing instructions, and text nodes that are
  337. // children of XDocument. Only white space text nodes are allowed as
  338. // children of a document, so we can remove all text nodes.
  339. if (n is XComment || n is XProcessingInstruction || n is XText)
  340. return null;
  341. var e = n as XElement;
  342. return e != null ? NormalizeElement(e, havePsvi) : n;
  343. }));
  344. }
  345. // TODO: Check whether this can be removed.
  346. //private static bool DeepEqualsWithNormalization(XDocument doc1, XDocument doc2, XmlSchemaSet schemaSet)
  347. //{
  348. // XDocument d1 = Normalize(doc1, schemaSet);
  349. // XDocument d2 = Normalize(doc2, schemaSet);
  350. // return XNode.DeepEquals(d1, d2);
  351. //}
  352. private static IEnumerable<XAttribute> NormalizeAttributes(XElement element, bool havePsvi)
  353. {
  354. return element.Attributes()
  355. .Where(a => !a.IsNamespaceDeclaration &&
  356. (a.Name != Xsi.schemaLocation) &&
  357. (a.Name != Xsi.noNamespaceSchemaLocation))
  358. .OrderBy(a => a.Name.NamespaceName)
  359. .ThenBy(a => a.Name.LocalName)
  360. .Select(a =>
  361. {
  362. if (havePsvi)
  363. {
  364. IXmlSchemaInfo schemaInfo = a.GetSchemaInfo();
  365. XmlSchemaType schemaType = schemaInfo != null ? schemaInfo.SchemaType : null;
  366. XmlTypeCode? typeCode = schemaType != null ? schemaType.TypeCode : (XmlTypeCode?) null;
  367. switch (typeCode)
  368. {
  369. case XmlTypeCode.Boolean:
  370. return new XAttribute(a.Name, (bool) a);
  371. case XmlTypeCode.DateTime:
  372. return new XAttribute(a.Name, (DateTime) a);
  373. case XmlTypeCode.Decimal:
  374. return new XAttribute(a.Name, (decimal) a);
  375. case XmlTypeCode.Double:
  376. return new XAttribute(a.Name, (double) a);
  377. case XmlTypeCode.Float:
  378. return new XAttribute(a.Name, (float) a);
  379. case XmlTypeCode.HexBinary:
  380. case XmlTypeCode.Language:
  381. return new XAttribute(a.Name,
  382. ((string) a).ToLower());
  383. }
  384. }
  385. return a;
  386. });
  387. }
  388. private static XNode NormalizeNode(XNode node, bool havePsvi)
  389. {
  390. // trim comments and processing instructions from normalized tree
  391. if (node is XComment || node is XProcessingInstruction)
  392. return null;
  393. var e = node as XElement;
  394. if (e != null)
  395. return NormalizeElement(e, havePsvi);
  396. // Only thing left is XCData and XText, so clone them
  397. return node;
  398. }
  399. private static XElement NormalizeElement(XElement element, bool havePsvi)
  400. {
  401. if (havePsvi)
  402. {
  403. IXmlSchemaInfo schemaInfo = element.GetSchemaInfo();
  404. XmlSchemaType schemaType = schemaInfo != null ? schemaInfo.SchemaType : null;
  405. XmlTypeCode? typeCode = schemaType != null ? schemaType.TypeCode : (XmlTypeCode?) null;
  406. switch (typeCode)
  407. {
  408. case XmlTypeCode.Boolean:
  409. return new XElement(element.Name,
  410. NormalizeAttributes(element, true),
  411. (bool) element);
  412. case XmlTypeCode.DateTime:
  413. return new XElement(element.Name,
  414. NormalizeAttributes(element, true),
  415. (DateTime) element);
  416. case XmlTypeCode.Decimal:
  417. return new XElement(element.Name,
  418. NormalizeAttributes(element, true),
  419. (decimal) element);
  420. case XmlTypeCode.Double:
  421. return new XElement(element.Name,
  422. NormalizeAttributes(element, true),
  423. (double) element);
  424. case XmlTypeCode.Float:
  425. return new XElement(element.Name,
  426. NormalizeAttributes(element, true),
  427. (float) element);
  428. case XmlTypeCode.HexBinary:
  429. case XmlTypeCode.Language:
  430. return new XElement(element.Name,
  431. NormalizeAttributes(element, true),
  432. ((string) element).ToLower());
  433. default:
  434. return new XElement(element.Name,
  435. NormalizeAttributes(element, true),
  436. element.Nodes().Select(n => NormalizeNode(n, true)));
  437. }
  438. }
  439. return new XElement(element.Name,
  440. NormalizeAttributes(element, false),
  441. element.Nodes().Select(n => NormalizeNode(n, false)));
  442. }
  443. private static void SimplifyMarkupForPart(OpenXmlPart part, SimplifyMarkupSettings settings)
  444. {
  445. var parameters = new SimplifyMarkupParameters();
  446. if (part.ContentType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
  447. {
  448. var doc = (WordprocessingDocument) part.OpenXmlPackage;
  449. if (settings.RemoveGoBackBookmark)
  450. {
  451. XElement goBackBookmark = doc
  452. .MainDocumentPart
  453. .GetXDocument()
  454. .Descendants(W.bookmarkStart)
  455. .FirstOrDefault(bm => (string) bm.Attribute(W.name) == "_GoBack");
  456. if (goBackBookmark != null)
  457. parameters.GoBackId = (int) goBackBookmark.Attribute(W.id);
  458. }
  459. }
  460. XDocument xdoc = part.GetXDocument();
  461. XElement newRoot = xdoc.Root;
  462. // Need to do this first to enable simplifying hyperlinks.
  463. if (settings.RemoveContentControls || settings.RemoveSmartTags)
  464. newRoot = (XElement) RemoveCustomXmlAndContentControlsTransform(newRoot, settings);
  465. // This may touch many elements, so needs to be its own transform.
  466. if (settings.RemoveRsidInfo)
  467. newRoot = (XElement) RemoveRsidTransform(newRoot);
  468. var prevNewRoot = new XDocument(newRoot);
  469. while (true)
  470. {
  471. if (settings.RemoveComments ||
  472. settings.RemoveEndAndFootNotes ||
  473. settings.ReplaceTabsWithSpaces ||
  474. settings.RemoveFieldCodes ||
  475. settings.RemovePermissions ||
  476. settings.RemoveProof ||
  477. settings.RemoveBookmarks ||
  478. settings.RemoveWebHidden ||
  479. settings.RemoveGoBackBookmark ||
  480. settings.RemoveHyperlinks)
  481. newRoot = (XElement) SimplifyMarkupTransform(newRoot, settings, parameters);
  482. // Remove runs and run properties that have become empty due to previous transforms.
  483. newRoot = (XElement) RemoveEmptyRunsAndRunPropertiesTransform(newRoot);
  484. // Merge adjacent runs that have identical run properties.
  485. newRoot = (XElement) MergeAdjacentRunsTransform(newRoot);
  486. // Merge adjacent instrText elements.
  487. newRoot = (XElement) MergeAdjacentInstrText(newRoot);
  488. // Separate run children into separate runs
  489. newRoot = (XElement) SeparateRunChildrenIntoSeparateRuns(newRoot);
  490. if (XNode.DeepEquals(prevNewRoot.Root, newRoot))
  491. break;
  492. prevNewRoot = new XDocument(newRoot);
  493. }
  494. if (settings.NormalizeXml)
  495. {
  496. XAttribute[] nsAttrs =
  497. {
  498. new XAttribute(XNamespace.Xmlns + "wpc", WPC.wpc),
  499. new XAttribute(XNamespace.Xmlns + "mc", MC.mc),
  500. new XAttribute(XNamespace.Xmlns + "o", O.o),
  501. new XAttribute(XNamespace.Xmlns + "r", R.r),
  502. new XAttribute(XNamespace.Xmlns + "m", M.m),
  503. new XAttribute(XNamespace.Xmlns + "v", VML.vml),
  504. new XAttribute(XNamespace.Xmlns + "wp14", WP14.wp14),
  505. new XAttribute(XNamespace.Xmlns + "wp", WP.wp),
  506. new XAttribute(XNamespace.Xmlns + "w10", W10.w10),
  507. new XAttribute(XNamespace.Xmlns + "w", W.w),
  508. new XAttribute(XNamespace.Xmlns + "w14", W14.w14),
  509. new XAttribute(XNamespace.Xmlns + "w15", W15.w15),
  510. new XAttribute(XNamespace.Xmlns + "w16se", W16SE.w16se),
  511. new XAttribute(XNamespace.Xmlns + "wpg", WPG.wpg),
  512. new XAttribute(XNamespace.Xmlns + "wpi", WPI.wpi),
  513. new XAttribute(XNamespace.Xmlns + "wne", WNE.wne),
  514. new XAttribute(XNamespace.Xmlns + "wps", WPS.wps),
  515. new XAttribute(MC.Ignorable, "w14 wp14 w15 w16se"),
  516. };
  517. XDocument newXDoc = Normalize(new XDocument(newRoot), null);
  518. newRoot = newXDoc.Root;
  519. if (newRoot != null)
  520. foreach (XAttribute nsAttr in nsAttrs)
  521. if (newRoot.Attribute(nsAttr.Name) == null)
  522. newRoot.Add(nsAttr);
  523. part.PutXDocument(newXDoc);
  524. }
  525. else
  526. {
  527. part.PutXDocument(new XDocument(newRoot));
  528. }
  529. }
  530. private static object SeparateRunChildrenIntoSeparateRuns(XNode node)
  531. {
  532. var element = node as XElement;
  533. if (element == null) return node;
  534. if (element.Name == W.r)
  535. {
  536. IEnumerable<XElement> runChildren = element.Elements().Where(e => e.Name != W.rPr);
  537. XElement rPr = element.Element(W.rPr);
  538. return runChildren.Select(rc => new XElement(W.r, rPr, rc));
  539. }
  540. return new XElement(element.Name,
  541. element.Attributes(),
  542. element.Nodes().Select(n => SeparateRunChildrenIntoSeparateRuns(n)));
  543. }
  544. private static object SingleCharacterRunTransform(XNode node)
  545. {
  546. var element = node as XElement;
  547. if (element == null) return node;
  548. if (element.Name == W.r)
  549. return element.Elements()
  550. .Where(e => e.Name != W.rPr)
  551. .GroupAdjacent(sr => sr.Name == W.t)
  552. .Select(g =>
  553. {
  554. if (g.Key)
  555. {
  556. string s = g.Select(t => (string) t).StringConcatenate();
  557. return s.Select(c =>
  558. new XElement(W.r,
  559. element.Elements(W.rPr),
  560. new XElement(W.t,
  561. c == ' ' ? new XAttribute(XNamespace.Xml + "space", "preserve") : null,
  562. c)));
  563. }
  564. return g.Select(sr =>
  565. new XElement(W.r,
  566. element.Elements(W.rPr),
  567. new XElement(sr.Name,
  568. sr.Attributes(),
  569. sr.Nodes().Select(n => SingleCharacterRunTransform(n)))));
  570. });
  571. return new XElement(element.Name,
  572. element.Attributes(),
  573. element.Nodes().Select(n => SingleCharacterRunTransform(n)));
  574. }
  575. private static class Xsi
  576. {
  577. private static readonly XNamespace xsi = "http://www.w3.org/2001/XMLSchema-instance";
  578. public static readonly XName schemaLocation = xsi + "schemaLocation";
  579. public static readonly XName noNamespaceSchemaLocation = xsi + "noNamespaceSchemaLocation";
  580. }
  581. public class InternalException : Exception
  582. {
  583. public InternalException(string message) : base(message)
  584. {
  585. }
  586. }
  587. public class InvalidSettingsException : Exception
  588. {
  589. public InvalidSettingsException(string message) : base(message)
  590. {
  591. }
  592. }
  593. private class SimplifyMarkupParameters
  594. {
  595. public int? GoBackId { get; set; }
  596. }
  597. }
  598. }