WmlToXml.cs 91 KB


  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. // Portions Copyright (c) Eric White Inc. All rights reserved.
  4. // Published at http://EricWhite.com
  5. // Resource Center and Documentation: http://ericwhite.com/
  6. // Developer: Eric White
  7. // Blog: http://www.ericwhite.com
  8. // Twitter: @EricWhiteDev
  9. // Email: eric@ericwhite.com
  10. using System;
  11. using System.Collections.Generic;
  12. using System.Linq;
  13. using System.Text;
  14. using System.Text.RegularExpressions;
  15. using System.IO;
  16. using System.Xml.Linq;
  17. using DocumentFormat.OpenXml.Packaging;
  18. using System.Drawing;
  19. namespace OpenXmlPowerTools
  20. {
  21. public class ContentTypeRule
  22. {
  23. public string ContentType;
  24. public string StyleName;
  25. public Regex StyleNameRegex;
  26. public Regex[] RegexArray;
  27. public Func<XElement, ContentTypeRule, WordprocessingDocument, WmlToXmlSettings, bool> MatchLambda;
  28. public bool ApplyRunContentTypes = true;
  29. public string[] DocumentTypeCollection;
  30. }
  31. public enum ValidationErrorType
  32. {
  33. Error,
  34. Warning,
  35. NotApplicable,
  36. }
  37. public class ValidationRuleDocumentTypeInfo
  38. {
  39. public string DocumentType;
  40. public ValidationErrorType ValidationErrorType;
  41. }
  42. public class GlobalValidationRule
  43. {
  44. public string[] RuleNames;
  45. public string[] RuleDescriptions;
  46. public string[] RuleTitles;
  47. public Func<GlobalValidationRule, WordprocessingDocument, WordprocessingDocument, XElement, WmlToXmlSettings, List<WmlToXmlValidationError>> GlobalRuleLambda;
  48. // if DocumentTypeInfo == null, then this rule runs for all document types, and with severity level of error
  49. public ValidationRuleDocumentTypeInfo[] DocumentTypeInfoCollection;
  50. public string Message;
  51. }
  52. public class BlockLevelContentValidationRule
  53. {
  54. public string[] RuleNames;
  55. public string[] RuleDescriptions;
  56. public string[] RuleTitles;
  57. public Regex StyleNameRegex;
  58. public Func<XElement, BlockLevelContentValidationRule, WordprocessingDocument, XElement, WmlToXmlSettings, List<WmlToXmlValidationError>> BlockLevelContentRuleLambda;
  59. public ValidationRuleDocumentTypeInfo[] DocumentTypeInfoCollection;
  60. public string Message;
  61. }
  62. public class WmlToXmlValidationError
  63. {
  64. public string RuleName;
  65. public ValidationErrorType ErrorType;
  66. public string ErrorTitle;
  67. public string ErrorMessage;
  68. public string BlockLevelContentIdentifier; // this string is the same as the unid that is in the source document. This string should be sufficient to identify and find any
  69. // invalid paragraph, table, row, cell, or anything else in the source document.
  70. }
  71. public class WmlToXmlProgressInfo
  72. {
  73. public int ContentCount;
  74. public int ContentTotal;
  75. public string InProgressMessage;
  76. }
  77. public class TransformInfo
  78. {
  79. public string DefaultLangFromStylesPart;
  80. }
  81. public class WmlToXmlContentTypeMetrics
  82. {
  83. public int Count;
  84. public int Tests;
  85. }
  86. public class WmlToXmlSettings
  87. {
  88. public List<ContentTypeRule> GlobalContentTypeRules;
  89. public List<ContentTypeRule> DocumentTypeContentTypeRules;
  90. public List<ContentTypeRule> DocumentContentTypeRules;
  91. public List<ContentTypeRule> RunContentTypeRules;
  92. public List<GlobalValidationRule> GlobalValidationRules;
  93. public List<BlockLevelContentValidationRule> BlockLevelContentValidationRules;
  94. public ListItemRetrieverSettings ListItemRetrieverSettings;
  95. public bool? InjectCommentForContentTypes;
  96. public XElement ContentTypeHierarchyDefinition;
  97. public Func<XElement, WmlToXmlSettings, bool> ContentTypeHierarchyLambda;
  98. public Dictionary<string, Func<string, OpenXmlPart, XElement, WmlToXmlSettings, object>> XmlGenerationLambdas;
  99. public DirectoryInfo ImageBase;
  100. public bool WriteImageFiles = true;
  101. public Action<WmlToXmlProgressInfo> ProgressFunction;
  102. public XDocument ContentTypeRegexExtension;
  103. public string DefaultLang;
  104. public string DocumentType;
  105. public Action<XDocument, XDocument, WmlToXmlSettings, OpenXmlPart> ApplyContentTypesCustom;
  106. public Dictionary<string, WmlToXmlContentTypeMetrics> ContentTypeCount = new Dictionary<string, WmlToXmlContentTypeMetrics>();
  107. public object UserData;
  108. public WmlToXmlSettings(
  109. List<ContentTypeRule> globalContentTypeRules,
  110. List<ContentTypeRule> documentTypeContentTypeRules,
  111. List<ContentTypeRule> documentContentTypeRules,
  112. List<ContentTypeRule> runContentTypeRules,
  113. List<GlobalValidationRule> globalValidationRules,
  114. List<BlockLevelContentValidationRule> blockLevelContentValidationRules,
  115. XElement contentTypeHierarchyDefinition,
  116. Func<XElement, WmlToXmlSettings, bool> contentTypeHierarchyLambda,
  117. Dictionary<string, Func<string, OpenXmlPart, XElement, WmlToXmlSettings, object>> xmlGenerationLambdas,
  118. DirectoryInfo imageBase,
  119. XDocument contentTypeRegexExtension)
  120. {
  121. GlobalContentTypeRules = globalContentTypeRules;
  122. DocumentTypeContentTypeRules = documentTypeContentTypeRules;
  123. DocumentContentTypeRules = documentContentTypeRules;
  124. RunContentTypeRules = runContentTypeRules;
  125. GlobalValidationRules = globalValidationRules;
  126. BlockLevelContentValidationRules = blockLevelContentValidationRules;
  127. ListItemRetrieverSettings = new ListItemRetrieverSettings();
  128. ContentTypeHierarchyDefinition = contentTypeHierarchyDefinition;
  129. ContentTypeHierarchyLambda = contentTypeHierarchyLambda;
  130. XmlGenerationLambdas = xmlGenerationLambdas;
  131. ImageBase = imageBase;
  132. ContentTypeRegexExtension = contentTypeRegexExtension;
  133. }
  134. public WmlToXmlSettings(
  135. List<ContentTypeRule> globalContentTypeRules,
  136. List<ContentTypeRule> documentTypeContentTypeRules,
  137. List<ContentTypeRule> documentContentTypeRules,
  138. List<ContentTypeRule> runContentTypeRules,
  139. List<GlobalValidationRule> globalValidationRules,
  140. List<BlockLevelContentValidationRule> blockLevelContentValidationRules,
  141. Func<XElement, WmlToXmlSettings, bool> contentTypeHierarchyLambda,
  142. Dictionary<string, Func<string, OpenXmlPart, XElement, WmlToXmlSettings, object>> xmlGenerationLambdas,
  143. ListItemRetrieverSettings listItemRetrieverSettings,
  144. DirectoryInfo imageBase,
  145. XDocument contentTypeRegexExtension)
  146. {
  147. GlobalContentTypeRules = globalContentTypeRules;
  148. DocumentTypeContentTypeRules = documentTypeContentTypeRules;
  149. DocumentContentTypeRules = documentContentTypeRules;
  150. RunContentTypeRules = runContentTypeRules;
  151. GlobalValidationRules = globalValidationRules;
  152. BlockLevelContentValidationRules = blockLevelContentValidationRules;
  153. ListItemRetrieverSettings = listItemRetrieverSettings;
  154. ContentTypeHierarchyLambda = contentTypeHierarchyLambda;
  155. XmlGenerationLambdas = xmlGenerationLambdas;
  156. ImageBase = imageBase;
  157. ContentTypeRegexExtension = contentTypeRegexExtension;
  158. }
  159. }
  160. public static class WmlToXml
  161. {
  162. public static WmlDocument ApplyContentTypes(WmlDocument document, WmlToXmlSettings settings)
  163. {
  164. using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(document))
  165. {
  166. using (WordprocessingDocument wDoc = streamDoc.GetWordprocessingDocument())
  167. {
  168. WmlToXmlUtil.AssignUnidToBlc(wDoc);
  169. ApplyContentTypes(wDoc, settings);
  170. }
  171. return streamDoc.GetModifiedWmlDocument();
  172. }
  173. }
  174. public static void ApplyContentTypes(WordprocessingDocument wDoc, WmlToXmlSettings settings)
  175. {
  176. #if false
  177. <Extensions>
  178. <Extension ContentType='Introduction'>
  179. <RegexExtension>
  180. <Regex>.*Infroduction.*</Regex>
  181. <Regex>.*Entroduction.*</Regex>
  182. </RegexExtension>
  183. </Extension>
  184. </Extensions>
  185. #endif
  186. if (settings.DocumentType == null || settings.DocumentType == "")
  187. throw new OpenXmlPowerToolsException("DocumentType must be set");
  188. if (settings.ContentTypeRegexExtension != null)
  189. {
  190. foreach (var ext in settings.ContentTypeRegexExtension.Root.Elements("Extension"))
  191. {
  192. var ct = (string)ext.Attribute("ContentType");
  193. var rules = settings.DocumentContentTypeRules.Concat(settings.DocumentTypeContentTypeRules).Concat(settings.GlobalContentTypeRules);
  194. var ruleToUpdate = rules
  195. .FirstOrDefault(r => r.ContentType == ct);
  196. if (ruleToUpdate == null)
  197. throw new OpenXmlPowerToolsException("ContentTypeRexexExtension refers to content type that does not exist");
  198. var oldRegexRules = ruleToUpdate.RegexArray.ToList();
  199. var newRegexRules = ext.Elements("RegexExtension").Elements("Regex").Select(z => new Regex(z.Value)).ToArray();
  200. var regexArray = oldRegexRules.Concat(newRegexRules).ToArray();
  201. ruleToUpdate.RegexArray = regexArray;
  202. }
  203. }
  204. if (settings.ProgressFunction != null)
  205. {
  206. WmlToXmlProgressInfo pi = new WmlToXmlProgressInfo()
  207. {
  208. ContentCount = 0,
  209. ContentTotal = 0,
  210. InProgressMessage = "Simplify markup" + Environment.NewLine,
  211. };
  212. settings.ProgressFunction(pi);
  213. }
  214. SimplifyMarkupSettings markupSimplifierSettings = new SimplifyMarkupSettings()
  215. {
  216. AcceptRevisions = true,
  217. NormalizeXml = true,
  218. RemoveBookmarks = false,
  219. RemoveComments = true,
  220. RemoveContentControls = false,
  221. RemoveEndAndFootNotes = false,
  222. RemoveFieldCodes = false,
  223. RemoveGoBackBookmark = true,
  224. RemoveHyperlinks = false,
  225. RemoveLastRenderedPageBreak = true,
  226. RemoveMarkupForDocumentComparison = false,
  227. RemovePermissions = true,
  228. RemoveProof = true,
  229. RemoveRsidInfo = true,
  230. RemoveSmartTags = true,
  231. RemoveSoftHyphens = false,
  232. RemoveWebHidden = true,
  233. ReplaceTabsWithSpaces = false,
  234. };
  235. MarkupSimplifier.SimplifyMarkup(wDoc, markupSimplifierSettings);
  236. if (settings.ProgressFunction != null)
  237. {
  238. WmlToXmlProgressInfo pi = new WmlToXmlProgressInfo()
  239. {
  240. ContentCount = 0,
  241. ContentTotal = 0,
  242. InProgressMessage = "Assemble formatting" + Environment.NewLine,
  243. };
  244. settings.ProgressFunction(pi);
  245. }
  246. FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings();
  247. formattingAssemblerSettings.RemoveStyleNamesFromParagraphAndRunProperties = false;
  248. formattingAssemblerSettings.RestrictToSupportedLanguages = false;
  249. formattingAssemblerSettings.RestrictToSupportedNumberingFormats = false;
  250. FormattingAssembler.AssembleFormatting(wDoc, formattingAssemblerSettings);
  251. ContentTypeApplierInfo ctai = new ContentTypeApplierInfo();
  252. XDocument sXDoc = wDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
  253. XElement defaultParagraphStyle = sXDoc
  254. .Root
  255. .Elements(W.style)
  256. .FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true &&
  257. (string)st.Attribute(W.type) == "paragraph");
  258. if (defaultParagraphStyle != null)
  259. ctai.DefaultParagraphStyleName = (string)defaultParagraphStyle.Attribute(W.styleId);
  260. XElement defaultCharacterStyle = sXDoc
  261. .Root
  262. .Elements(W.style)
  263. .FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true &&
  264. (string)st.Attribute(W.type) == "character");
  265. if (defaultCharacterStyle != null)
  266. ctai.DefaultCharacterStyleName = (string)defaultCharacterStyle.Attribute(W.styleId);
  267. XElement defaultTableStyle = sXDoc
  268. .Root
  269. .Elements(W.style)
  270. .FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true &&
  271. (string)st.Attribute(W.type) == "table");
  272. if (defaultTableStyle != null)
  273. ctai.DefaultTableStyleName = (string)defaultTableStyle.Attribute(W.styleId);
  274. if (settings.ProgressFunction != null)
  275. {
  276. WmlToXmlProgressInfo pi = new WmlToXmlProgressInfo()
  277. {
  278. ContentCount = 0,
  279. ContentTotal = 0,
  280. InProgressMessage = "Assemble list item information" + Environment.NewLine,
  281. };
  282. settings.ProgressFunction(pi);
  283. }
  284. ListItemRetrieverSettings listItemRetrieverSettings = new ListItemRetrieverSettings();
  285. AssembleListItemInformation(wDoc, settings.ListItemRetrieverSettings);
  286. ApplyContentTypesForRuleSet(settings, ctai, wDoc);
  287. }
  288. public static XElement ProduceContentTypeXml(WmlDocument document, WmlToXmlSettings settings)
  289. {
  290. using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(document))
  291. {
  292. using (WordprocessingDocument doc = streamDoc.GetWordprocessingDocument())
  293. {
  294. return ProduceContentTypeXml(doc, settings);
  295. }
  296. }
  297. }
  298. public static XElement ProduceContentTypeXml(WordprocessingDocument wDoc, WmlToXmlSettings settings)
  299. {
  300. if (settings.DocumentType == null || settings.DocumentType == "")
  301. throw new OpenXmlPowerToolsException("DocumentType must be set");
  302. var mainPart = wDoc.MainDocumentPart;
  303. var mainXDoc = mainPart.GetXDocument();
  304. #if false
  305. <w:styles xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" mc:Ignorable="w14 w15 w16se">
  306. <w:docDefaults>
  307. <w:rPrDefault>
  308. <w:rPr>
  309. <w:rFonts w:ascii="Georgia" w:eastAsiaTheme="minorHAnsi" w:hAnsi="Georgia" w:cs="Times New Roman"/>
  310. <w:lang w:val="en-US" w:eastAsia="en-US" w:bidi="ar-SA"/>
  311. </w:rPr>
  312. </w:rPrDefault>
  313. <w:pPrDefault/>
  314. </w:docDefaults>
  315. #endif
  316. AssignLevelsToContent(mainXDoc, settings);
  317. // Call RetrieveListItem so that all paragraphs are initialized with ListItemInfo
  318. var firstParagraph = mainXDoc.Descendants(W.p).FirstOrDefault();
  319. // if there is no content, then return an empty document.
  320. if (firstParagraph == null)
  321. return new XElement("ContentTypeXml");
  322. var listItem = ListItemRetriever.RetrieveListItem(wDoc, firstParagraph);
  323. // Annotate runs associated with fields, so that can retrieve hyperlinks that are stored as fields.
  324. FieldRetriever.AnnotateWithFieldInfo(wDoc.MainDocumentPart);
  325. AnnotateRunsThatUseFieldsForNumbering(mainXDoc);
  326. var newRoot = (XElement)AnnotateRunsThatUseFldSimple(mainXDoc.Root);
  327. mainXDoc.Root.ReplaceWith(newRoot);
  328. wDoc.MainDocumentPart.PutXDocument();
  329. // Annotate runs associated with fields, so that can retrieve hyperlinks that are stored as fields.
  330. FieldRetriever.AnnotateWithFieldInfo(wDoc.MainDocumentPart);
  331. mainXDoc = wDoc.MainDocumentPart.GetXDocument();
  332. var body = mainXDoc.Root.Descendants(W.body).FirstOrDefault();
  333. if (body == null)
  334. throw new OpenXmlPowerToolsException("Internal error: invalid document");
  335. var contentList = body.Elements()
  336. .Where(e => e.Attribute(PtOpenXml.Level) != null)
  337. .ToList();
  338. var rootLevelContentList = contentList
  339. .Where(h => (int)h.Attribute(PtOpenXml.Level) == 1)
  340. .ToList();
  341. var contentTypeXml = new XElement("ContentTypeXml",
  342. rootLevelContentList
  343. .Select(h =>
  344. {
  345. var childrenHeadings = GetChildrenHeadings(mainPart, contentList, h, settings);
  346. XElement xml = (XElement)ProduceXmlTransform(mainPart, h, settings);
  347. if (xml != null)
  348. xml.Add(childrenHeadings);
  349. return xml;
  350. }));
  351. contentTypeXml = HierarchyPerSettings(contentTypeXml, settings);
  352. return contentTypeXml;
  353. }
  354. private static XElement HierarchyPerSettings(XElement contentTypeXml, WmlToXmlSettings settings)
  355. {
  356. #if false
  357. <Root>
  358. <DocumentType DocumentType="AuthoritativeText">
  359. <ContentTypeXml IsRoot="true">
  360. <VolumeContainer />
  361. </ContentTypeXml>
  362. #endif
  363. var hierarchyDefinition = settings
  364. .ContentTypeHierarchyDefinition
  365. .Elements("DocumentType")
  366. .FirstOrDefault(e => (string)e.Attribute("DocumentType") == settings.DocumentType);
  367. if (hierarchyDefinition == null)
  368. throw new OpenXmlPowerToolsException("Invalid content type hierarchy definition - no hierarchy definition for specified document type");
  369. HashSet<XName> hierarchyElements = new HashSet<XName>(hierarchyDefinition.DescendantsAndSelf().Select(d => d.Name).Distinct());
  370. Stack<XElement> stack = new Stack<XElement>();
  371. var rootElement = hierarchyDefinition
  372. .Elements()
  373. .FirstOrDefault(e => (bool)e.Attribute("IsRoot"));
  374. if (rootElement == null)
  375. throw new OpenXmlPowerToolsException("Invalid content type hierarchy definition - no root element");
  376. stack.Push(rootElement);
  377. var currentlyLookingAt = hierarchyDefinition.Element(rootElement.Name);
  378. foreach (var item in contentTypeXml.Elements())
  379. {
  380. if (!hierarchyElements.Contains(item.Name))
  381. throw new OpenXmlPowerToolsException(string.Format("Invalid Content Type Hierarchy Definition - missing def for {0}", item.Name));
  382. bool found = false;
  383. var possibleChildItem = currentlyLookingAt.Element(item.Name);
  384. if (possibleChildItem != null)
  385. {
  386. if (!possibleChildItem.HasAttributes)
  387. found = true;
  388. if (!found)
  389. {
  390. var anyMismatch = possibleChildItem.Attributes().Any(a =>
  391. {
  392. var val1 = a.Value;
  393. var a2 = item.Attribute(a.Name);
  394. if (a2 == null)
  395. return true;
  396. var val2 = a2.Value;
  397. if (val1 != val2)
  398. return true;
  399. return false;
  400. });
  401. if (!anyMismatch)
  402. found = true;
  403. }
  404. }
  405. if (found)
  406. {
  407. item.Add(new XAttribute(PtOpenXml.IndentLevel, stack.Count()));
  408. stack.Push(item);
  409. currentlyLookingAt = FindCurrentlyLookingAt(hierarchyDefinition, item);
  410. continue;
  411. }
  412. if (hierarchyElements.Contains(item.Name))
  413. {
  414. while (true)
  415. {
  416. if (stack.Count() == 1)
  417. {
  418. // have encountered an unexpected hierarchy element. have gone up the stack, and no element up the stack allows for this as a child element.
  419. // Therefore, put it at level one, and let the Narrdoc transform generate invalid narrdoc.
  420. item.Add(new XAttribute(PtOpenXml.IndentLevel, stack.Count()));
  421. break;
  422. }
  423. stack.Pop();
  424. var last = stack.Peek();
  425. currentlyLookingAt = FindCurrentlyLookingAt(hierarchyDefinition, last);
  426. bool found2 = false;
  427. var possibleChildItem2 = currentlyLookingAt.Element(item.Name);
  428. if (possibleChildItem2 != null)
  429. {
  430. if (!possibleChildItem2.HasAttributes)
  431. found2 = true;
  432. if (!found2)
  433. {
  434. var anyMismatch2 = possibleChildItem2.Attributes().Any(a =>
  435. {
  436. var val1 = a.Value;
  437. var a2 = item.Attribute(a.Name);
  438. if (a2 == null)
  439. return true;
  440. var val2 = a2.Value;
  441. if (val1 != val2)
  442. return true;
  443. return false;
  444. });
  445. if (!anyMismatch2)
  446. found2 = true;
  447. }
  448. }
  449. if (found2)
  450. {
  451. item.Add(new XAttribute(PtOpenXml.IndentLevel, stack.Count()));
  452. stack.Push(item);
  453. currentlyLookingAt = FindCurrentlyLookingAt(hierarchyDefinition, item);
  454. break;
  455. }
  456. if (stack.Count() == 0)
  457. throw new OpenXmlPowerToolsException("Internal error = reached top of hierarchy - prob not an internal error - some other error");
  458. }
  459. continue;
  460. }
  461. // otherwise continue on to next item.
  462. }
  463. var hierarchicalContentTypeXml = new XElement("ContentTypeXml",
  464. HierarchyPerSettingsTransform(contentTypeXml.Elements(), 1));
  465. hierarchicalContentTypeXml.DescendantsAndSelf().Attributes(PtOpenXml.IndentLevel).Remove();
  466. return hierarchicalContentTypeXml;
  467. }
  468. private static XElement FindCurrentlyLookingAt(XElement hierarchyDefinition, XElement item)
  469. {
  470. var candidates = hierarchyDefinition
  471. .Elements(item.Name)
  472. .OrderByDescending(e => e.Attributes().Count());
  473. var theOne = candidates
  474. .FirstOrDefault(c =>
  475. {
  476. if (!c.HasAttributes)
  477. return true;
  478. var anyMismatch2 = c.Attributes().Any(a =>
  479. {
  480. var val1 = a.Value;
  481. var a2 = item.Attribute(a.Name);
  482. if (a2 == null)
  483. return true;
  484. var val2 = a2.Value;
  485. if (val1 != val2)
  486. return true;
  487. return false;
  488. });
  489. if (anyMismatch2)
  490. return false;
  491. return true;
  492. });
  493. if (theOne == null)
  494. throw new OpenXmlPowerToolsException("Internal error");
  495. return theOne;
  496. }
  497. private static object HierarchyPerSettingsTransform(IEnumerable<XElement> list, int level)
  498. {
  499. // small optimization - other code in this method would have same effect, but this is more efficient.
  500. if (!list.Any())
  501. return null;
  502. List<int> groupingKeys = new List<int>();
  503. int currentGroupingKey = 0;
  504. foreach (var item in list)
  505. {
  506. if (item.Attribute(PtOpenXml.IndentLevel) == null)
  507. throw new OpenXmlPowerToolsException(string.Format("Invalid Content Type Hierarchy Definition - missing def for {0}", item.Name));
  508. if ((int)item.Attribute(PtOpenXml.IndentLevel) == level)
  509. {
  510. currentGroupingKey += 1;
  511. }
  512. groupingKeys.Add(currentGroupingKey);
  513. }
  514. var zipped = list
  515. .Zip(groupingKeys, (item, key) => new
  516. {
  517. Item = item,
  518. Key = key,
  519. })
  520. .GroupBy(z => z.Key)
  521. .ToList();
  522. var newContent = zipped
  523. .Select(z =>
  524. {
  525. var first = z.First().Item;
  526. var newItem = new XElement(first.Name,
  527. first.Attributes(),
  528. first.Elements(),
  529. HierarchyPerSettingsTransform(z.Skip(1).Select(r => r.Item), level + 1));
  530. return newItem;
  531. })
  532. .ToList();
  533. return newContent;
  534. }
  535. // this is where we need to do the same type of run annotation as for complex fields, but for simple fields.
  536. // I think that we may need to split up the run following the simple field
  537. #if false
  538. <w:p pt:StyleName="Caption" pt:ContentType="Caption" pt:Level="2">
  539. <w:r pt:ContentType="Span">
  540. <w:t xml:space="preserve">Table </w:t>
  541. </w:r>
  542. <w:r>
  543. <w:fldChar w:fldCharType="begin" />
  544. </w:r>
  545. <w:r>
  546. <w:instrText xml:space="preserve"> STYLEREF 1 \s </w:instrText>
  547. </w:r>
  548. <w:r>
  549. <w:fldChar w:fldCharType="separate" />
  550. </w:r>
  551. <w:r pt:ContentType="Span">
  552. <w:t>1</w:t>
  553. </w:r>
  554. <w:r>
  555. <w:fldChar w:fldCharType="end" />
  556. </w:r>
  557. <w:r pt:ContentType="Span">
  558. <w:t>.</w:t>
  559. </w:r>
  560. <w:r>
  561. <w:fldChar w:fldCharType="begin" />
  562. </w:r>
  563. <w:r>
  564. <w:instrText xml:space="preserve"> SEQ Table \* ARABIC </w:instrText>
  565. </w:r>
  566. <w:r>
  567. <w:fldChar w:fldCharType="separate" />
  568. </w:r>
  569. <w:r pt:ContentType="Span">
  570. <w:t>1</w:t>
  571. </w:r>
  572. <w:r>
  573. <w:fldChar w:fldCharType="end" />
  574. </w:r>
  575. <w:r pt:ContentType="Span">
  576. <w:t>Type the title here</w:t>
  577. </w:r>
  578. </w:p>
  579. #endif
  580. private static void AnnotateRunsThatUseFieldsForNumbering(XDocument mainXDoc)
  581. {
  582. var cachedAnnotationInformation = mainXDoc.Root.Annotation<Dictionary<int, List<XElement>>>();
  583. if (cachedAnnotationInformation == null)
  584. return;
  585. StringBuilder sb = new StringBuilder();
  586. foreach (var item in cachedAnnotationInformation)
  587. {
  588. var instrText = FieldRetriever.InstrText(mainXDoc.Root, item.Key).TrimStart('{').TrimEnd('}');
  589. var fi = FieldRetriever.ParseField(instrText);
  590. if (fi.FieldType.ToUpper() == "SEQ" || fi.FieldType.ToUpper() == "STYLEREF")
  591. {
  592. var runsForField = mainXDoc
  593. .Root
  594. .Descendants()
  595. .Where(d =>
  596. {
  597. Stack<FieldRetriever.FieldElementTypeInfo> stack = d.Annotation<Stack<FieldRetriever.FieldElementTypeInfo>>();
  598. if (stack == null)
  599. return false;
  600. if (stack.Any(stackItem => stackItem.Id == item.Key && stackItem.FieldElementType == FieldRetriever.FieldElementTypeEnum.Result))
  601. return true;
  602. return false;
  603. })
  604. .Select(d => d.AncestorsAndSelf(W.r).FirstOrDefault())
  605. .Where(z9 => z9 != null)
  606. .GroupAdjacent(o => o)
  607. .Select(g => g.First())
  608. .Where(r => r.Element(W.t) != null)
  609. .ToList();
  610. if (!runsForField.Any())
  611. continue;
  612. var lastRun = runsForField.LastOrDefault();
  613. var para = lastRun
  614. .Ancestors(W.p)
  615. .FirstOrDefault();
  616. if (para == null)
  617. throw new OpenXmlPowerToolsException("Internal error - invalid document");
  618. // if already processed
  619. if (para.Descendants(W.r).Any(r => r.Attribute(PtOpenXml.ListItemRun) != null))
  620. continue;
  621. var lastFldCharRun = para
  622. .Elements(W.r)
  623. .LastOrDefault(r =>
  624. {
  625. if (r.Element(W.fldChar) == null)
  626. return false;
  627. Stack<FieldRetriever.FieldElementTypeInfo> stack = r.Annotation<Stack<FieldRetriever.FieldElementTypeInfo>>();
  628. if (stack == null)
  629. return false;
  630. if (stack.Any(stackItem =>
  631. {
  632. var instrText2 = FieldRetriever.InstrText(mainXDoc.Root, stackItem.Id).TrimStart('{').TrimEnd('}');
  633. var fi2 = FieldRetriever.ParseField(instrText2);
  634. if (fi2.FieldType.ToUpper() == "SEQ" || fi2.FieldType.ToUpper() == "STYLEREF")
  635. return true;
  636. return false;
  637. }))
  638. return true;
  639. return false;
  640. });
  641. var elementAfter = lastFldCharRun
  642. .ElementsAfterSelf(W.r)
  643. .FirstOrDefault();
  644. // elementAfter may be null - that is ok - the rest of the routine works properly in this case.
  645. var listItemText = para
  646. .Elements(W.r)
  647. .TakeWhile(e => e != elementAfter)
  648. .Select(r1 => r1.Descendants(W.t).Select(t => (string)t).StringConcatenate())
  649. .StringConcatenate()
  650. .Trim();
  651. var nextRun = lastFldCharRun
  652. .ElementsAfterSelf(W.r)
  653. .FirstOrDefault(nr => nr.Element(W.t) != null);
  654. var lastFldCharRunText = lastFldCharRun
  655. .ElementsBeforeSelf(W.r)
  656. .Reverse()
  657. .First(r => r.Element(W.t) != null)
  658. .Element(W.t);
  659. string sepCharsString = "";
  660. if (nextRun != null)
  661. {
  662. var nextRunTextElement = nextRun
  663. .Element(W.t);
  664. var nextRunText = nextRunTextElement.Value;
  665. var sepChars = nextRunText
  666. .TakeWhile(ch => ch == '.' || ch == ' ')
  667. .ToList();
  668. sepCharsString = nextRunText.Substring(0, sepChars.Count());
  669. nextRunText = nextRunText.Substring(sepChars.Count());
  670. nextRunTextElement.Value = nextRunText;
  671. lastFldCharRunText.Value = lastFldCharRunText.Value + sepCharsString;
  672. }
  673. Regex re = new Regex("[A-F0-9.]+$");
  674. Match m = re.Match(listItemText);
  675. string matchedValue = null;
  676. if (m.Success)
  677. {
  678. matchedValue = m.Value;
  679. }
  680. if (matchedValue != null)
  681. {
  682. matchedValue += sepCharsString;
  683. matchedValue = matchedValue.TrimStart('.');
  684. matchedValue = matchedValue.TrimEnd('.', ' ');
  685. foreach (var run in para.Elements(W.r).TakeWhile(e => e != elementAfter).Where(e => e.Element(W.t) != null))
  686. run.Add(new XAttribute(PtOpenXml.ListItemRun, matchedValue));
  687. }
  688. }
  689. #if false
  690. // old code
  691. if (fi.FieldType.ToUpper() == "SEQ")
  692. {
  693. // have it
  694. var runsForField = mainXDoc
  695. .Root
  696. .Descendants()
  697. .Where(d =>
  698. {
  699. Stack<FieldRetriever.FieldElementTypeInfo> stack = d.Annotation<Stack<FieldRetriever.FieldElementTypeInfo>>();
  700. if (stack == null)
  701. return false;
  702. if (stack.Any(stackItem => stackItem.Id == item.Key && stackItem.FieldElementType == FieldRetriever.FieldElementTypeEnum.Result))
  703. return true;
  704. return false;
  705. })
  706. .Select(d => d.AncestorsAndSelf(W.r).FirstOrDefault())
  707. .Where(z9 => z9 != null)
  708. .GroupAdjacent(o => o)
  709. .Select(g => g.First())
  710. .Where(r => r.Element(W.t) != null)
  711. .ToList();
  712. if (!runsForField.Any())
  713. continue;
  714. var lastRun = runsForField
  715. .Last();
  716. var lastRunTextElement = lastRun
  717. .Element(W.t);
  718. var lastRunText = lastRunTextElement.Value;
  719. var nextRun = lastRun
  720. .ElementsAfterSelf(W.r)
  721. .FirstOrDefault(r => r.Element(W.t) != null);
  722. if (nextRun != null)
  723. {
  724. var nextRunTextElement = nextRun
  725. .Element(W.t);
  726. var nextRunText = nextRunTextElement.Value;
  727. var sepChars = nextRunText
  728. .TakeWhile(ch => ch == '.' || ch == ' ')
  729. .ToList();
  730. nextRunText = nextRunText.Substring(sepChars.Count());
  731. nextRunTextElement.Value = nextRunText;
  732. lastRunText = lastRunTextElement.Value + sepChars.Select(ch => ch.ToString()).StringConcatenate();
  733. lastRunTextElement.Value = lastRunText;
  734. }
  735. lastRun.Add(new XAttribute(PtOpenXml.ListItemRun, lastRunText));
  736. foreach (var runbefore in lastRun
  737. .ElementsBeforeSelf(W.r)
  738. .Where(rz => rz.Element(W.t) != null))
  739. {
  740. runbefore.Add(new XAttribute(PtOpenXml.ListItemRun, lastRunText));
  741. }
  742. }
  743. #endif
  744. }
  745. }
  746. #if false
  747. <w:p pt14:StyleName="Caption">
  748. <w:r>
  749. <w:t xml:space="preserve">Box </w:t>
  750. </w:r>
  751. <w:fldSimple w:instr=" SEQ Box \* ARABIC ">
  752. <w:r>
  753. <w:t>1</w:t>
  754. </w:r>
  755. </w:fldSimple>
  756. <w:r>
  757. <w:t>. Type the title here</w:t>
  758. </w:r>
  759. </w:p>
  760. #endif
  761. private static object AnnotateRunsThatUseFldSimple(XNode node)
  762. {
  763. var element = node as XElement;
  764. if (element != null)
  765. {
  766. if (element.Name == W.p &&
  767. element.Elements(W.fldSimple).Any(fs =>
  768. {
  769. var instrText = ((string)fs.Attribute(W.instr)).Trim();
  770. return instrText.StartsWith("SEQ");
  771. }))
  772. {
  773. var fldSimple = element.Elements(W.fldSimple).FirstOrDefault(fs =>
  774. {
  775. var instrText = ((string)fs.Attribute(W.instr)).Trim();
  776. return instrText.StartsWith("SEQ");
  777. });
  778. var instr = ((string)fldSimple.Attribute(W.instr)).Trim();
  779. // we have to do some funny business here because Word puts the ". " as part of the text following the fldSimple, and we want that text to be part of the list item.
  780. var runAfter = fldSimple.ElementsAfterSelf(W.r).FirstOrDefault();
  781. var runAfterText = runAfter.Elements(W.t).Select(t => (string)t).StringConcatenate();
  782. var runAfterTextTrimmed = runAfterText.TrimStart('.', ' ');
  783. var listItemNum = fldSimple.Elements(W.r).Elements(W.t).Select(t => (string)t).StringConcatenate();
  784. var runsBefore = element
  785. .Elements()
  786. .TakeWhile(fs => fs.Name != W.fldSimple || (fs.Name == W.fldSimple && !((string)fs.Attribute(W.instr)).Trim().StartsWith("SEQ")))
  787. .Select(e =>
  788. {
  789. #if false
  790. <w:r pt14:StyleName="DefaultParagraphFont" pt14:FontName="Calibri" pt14:LanguageType="western" pt14:ListItemRun="3" xmlns:pt14="http://powertools.codeplex.com/2011" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
  791. <w:rPr>
  792. <w:rFonts w:asciiTheme="minorHAnsi" w:hAnsiTheme="minorHAnsi" w:eastAsiaTheme="minorHAnsi" w:cstheme="minorBidi" w:ascii="Calibri" w:hAnsi="Calibri" w:eastAsia="Calibri" w:cs="" />
  793. <w:bCs />
  794. <w:sz w:val="22" />
  795. <w:szCs w:val="22" />
  796. <w:lang w:bidi="ar-SA" w:eastAsia="en-US" w:val="en-US" />
  797. </w:rPr>
  798. <w:t>3.</w:t>
  799. </w:r>
  800. #endif
  801. var newE = new XElement(e); // clone
  802. if (e.Value != "" && e.Attribute(PtOpenXml.ListItemRun) == null)
  803. newE.Add(new XAttribute(PtOpenXml.ListItemRun, listItemNum));
  804. return newE;
  805. })
  806. .ToList();
  807. var fldSimpleRuns = fldSimple.Elements().Select(e =>
  808. {
  809. var newE = new XElement(e.Name,
  810. e.Attributes(),
  811. new XAttribute(PtOpenXml.ListItemRun, listItemNum),
  812. e.Elements());
  813. return newE;
  814. });
  815. var runAfterTextTrimmedLength = runAfterText.Length - runAfterTextTrimmed.Length;
  816. XElement runAfterListItemElement = null;
  817. if (runAfterTextTrimmedLength != 0)
  818. {
  819. runAfterListItemElement = new XElement(W.r,
  820. runAfter.Attributes(),
  821. new XAttribute(PtOpenXml.ListItemRun, listItemNum),
  822. runAfter.Elements(W.rPr),
  823. new XElement(W.t, runAfterText.Substring(0, runAfterTextTrimmedLength)));
  824. }
  825. XElement runAfterRemainderElement = new XElement(W.r,
  826. runAfter.Attributes(),
  827. runAfter.Elements(W.rPr),
  828. new XElement(W.t, runAfterText.Substring(runAfterTextTrimmedLength)));
  829. var newPara = new XElement(W.p,
  830. element.Attributes(),
  831. runsBefore,
  832. fldSimpleRuns,
  833. runAfterListItemElement,
  834. runAfterRemainderElement,
  835. fldSimple.ElementsAfterSelf(W.r).Skip(1));
  836. return newPara;
  837. }
  838. return new XElement(element.Name,
  839. element.Attributes(),
  840. element.Nodes().Select(n => AnnotateRunsThatUseFldSimple(n)));
  841. }
  842. return node;
  843. }
  844. // this method produces the XML for an endnote or footnote - the blockLevelContentContainer is the w:endnote or w:footnote element, and it produces the content type XML for the
  845. // contents of the endnote or footnote, to be inserted en situ in the ContentTypeXml.
  846. public static object ProduceContentTypeXmlForBlockLevelContentContainer(WordprocessingDocument wDoc, WmlToXmlSettings settings, OpenXmlPart part, XElement blockLevelContentContainer)
  847. {
  848. AssignLevelsToContentForEndFootNote(blockLevelContentContainer, settings);
  849. // Call RetrieveListItem so that all paragraphs are initialized with ListItemInfo
  850. var firstParagraph = blockLevelContentContainer.Descendants(W.p).FirstOrDefault();
  851. var listItem = ListItemRetriever.RetrieveListItem(wDoc, firstParagraph);
  852. var contentList = blockLevelContentContainer.Elements()
  853. .Where(e => e.Attribute(PtOpenXml.Level) != null)
  854. .ToList();
  855. var rootLevelContentList = contentList
  856. .Where(h => (int)h.Attribute(PtOpenXml.Level) == 1)
  857. .ToList();
  858. var contentTypeXml = rootLevelContentList
  859. .Select(h =>
  860. {
  861. var childrenHeadings = GetChildrenHeadings(part, contentList, h, settings);
  862. XElement xml = (XElement)ProduceXmlTransform(part, h, settings);
  863. if (xml != null)
  864. xml.Add(childrenHeadings);
  865. return xml;
  866. });
  867. return contentTypeXml;
  868. }
  869. private static object GetChildrenHeadings(OpenXmlPart part, List<XElement> contentList, XElement parent, WmlToXmlSettings settings)
  870. {
  871. return contentList
  872. .SkipWhile(h => h != parent)
  873. .Skip(1)
  874. .TakeWhile(h => (int)h.Attribute(PtOpenXml.Level) > (int)parent.Attribute(PtOpenXml.Level))
  875. .Where(h => (int)h.Attribute(PtOpenXml.Level) == (int)parent.Attribute(PtOpenXml.Level) + 1)
  876. .Select(h =>
  877. {
  878. var childrenHeadings = GetChildrenHeadings(part, contentList, h, settings);
  879. XElement xml = (XElement)ProduceXmlTransform(part, h, settings);
  880. if (xml != null)
  881. xml.Add(childrenHeadings);
  882. return xml;
  883. }
  884. );
  885. }
  886. public static object ProduceXmlTransform(OpenXmlPart part, XNode node, WmlToXmlSettings settings)
  887. {
  888. var element = node as XElement;
  889. if (element != null)
  890. {
  891. if (settings.XmlGenerationLambdas == null)
  892. throw new ArgumentOutOfRangeException("Xml Generation Lambdas are required");
  893. var contentType = (string)element.Attribute(PtOpenXml.ContentType);
  894. if (element.Name == W.t || element.Name == W.fldSimple)
  895. return element.Nodes().Select(z => ProduceXmlTransform(part, z, settings));
  896. if (contentType == null && element.Name == W.r)
  897. {
  898. if (settings.XmlGenerationLambdas.ContainsKey("Run"))
  899. {
  900. var lamda = settings.XmlGenerationLambdas["Run"];
  901. var newElement = lamda(contentType, part, element, settings);
  902. return newElement;
  903. }
  904. else
  905. {
  906. throw new OpenXmlPowerToolsException("Entry for Run content type in XML generation lambdas is required");
  907. }
  908. }
  909. if (element.Name == W.hyperlink)
  910. {
  911. if (settings.XmlGenerationLambdas.ContainsKey("Hyperlink"))
  912. {
  913. var lamda = settings.XmlGenerationLambdas["Hyperlink"];
  914. var newElement = lamda(contentType, part, element, settings);
  915. return newElement;
  916. }
  917. else
  918. {
  919. throw new OpenXmlPowerToolsException("Entry for Hyperlink content type in XML generation lambdas is required");
  920. }
  921. }
  922. if (contentType != null)
  923. {
  924. if (settings.XmlGenerationLambdas != null)
  925. {
  926. if (settings.XmlGenerationLambdas.ContainsKey(contentType))
  927. {
  928. var lamda = settings.XmlGenerationLambdas[contentType];
  929. var newElement = lamda(contentType, part, element, settings);
  930. string lang = (string)element.Elements(W.pPr).Elements(W.rPr).Elements(W.lang).Attributes(W.val).FirstOrDefault();
  931. if (lang == null)
  932. lang = settings.DefaultLang;
  933. if (lang != null && !lang.StartsWith("en")) // TODO we are not generating lang if English, but this needs revised after analysis
  934. {
  935. var n = newElement as XElement;
  936. if (n != null)
  937. {
  938. n.Add(new XAttribute("Lang", lang));
  939. if (n.Attribute("Unid") == null && element.Attribute(PtOpenXml.Unid) != null)
  940. n.Add(new XAttribute("Unid", element.Attribute(PtOpenXml.Unid).Value));
  941. return n;
  942. }
  943. }
  944. var n2 = newElement as XElement;
  945. if (n2 != null && n2.Attribute("Unid") == null && element.Attribute(PtOpenXml.Unid) != null)
  946. {
  947. n2.Add(new XAttribute("Unid", element.Attribute(PtOpenXml.Unid).Value));
  948. return n2;
  949. }
  950. return newElement;
  951. }
  952. }
  953. // if no generation rules are set, or if there is no rule for this content type, then
  954. // generate the default, for now.
  955. // todo this is not ideal in my mind. Need to think about this more. Maybe every content type
  956. // must have a generation lambda.
  957. return new XElement(contentType, new XElement("Content",
  958. element.Elements().Select(rce => ProduceXmlTransform(part, rce, settings))));
  959. }
  960. // ignore any other elements
  961. return null;
  962. }
  963. #if false
  964. // The following code inserts an XML comment for unicode characters above 256
  965. // This could be made more efficient - group characters together and create fewer XText nodes.
  966. // As it is, it is pretty slow, so should be used only for debugging.
  967. var xt = node as XText;
  968. if (xt != null)
  969. {
  970. var newContent = xt.Value.Select(c =>
  971. {
  972. var ic = (int)c;
  973. if (ic < 256)
  974. return (object)new XText(c.ToString());
  975. return new[] {
  976. (object)new XText(c.ToString()),
  977. new XComment(ic.ToString("X")),
  978. };
  979. })
  980. .ToList();
  981. return newContent;
  982. }
  983. #endif
  984. return node;
  985. }
  986. private static void AssignLevelsToContent(XDocument mainXDoc, WmlToXmlSettings settings)
  987. {
  988. var contentWithContentType = mainXDoc
  989. .Root
  990. .Descendants()
  991. .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr || d.Name == W.tc)
  992. .Where(d => d.Attribute(PtOpenXml.ContentType) != null)
  993. .ToList();
  994. int currentLevel = 1;
  995. foreach (var content in contentWithContentType)
  996. {
  997. var thisLevel = GetIndentLevel(content, settings);
  998. if (thisLevel == null)
  999. {
  1000. content.Add(new XAttribute(PtOpenXml.Level, currentLevel));
  1001. }
  1002. else
  1003. {
  1004. if (content.Attribute(PtOpenXml.Level) == null)
  1005. content.Add(new XAttribute(PtOpenXml.Level, thisLevel));
  1006. currentLevel = (int)thisLevel + 1;
  1007. }
  1008. }
  1009. }
  1010. private static void AssignLevelsToContentForEndFootNote(XElement blockLevelContentContainer, WmlToXmlSettings settings)
  1011. {
  1012. var contentWithContentType = blockLevelContentContainer
  1013. .Descendants()
  1014. .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr || d.Name == W.tc)
  1015. .Where(d => d.Attribute(PtOpenXml.ContentType) != null)
  1016. .ToList();
  1017. foreach (var content in contentWithContentType)
  1018. content.Add(new XAttribute(PtOpenXml.Level, 1));
  1019. }
  1020. private static int? GetIndentLevel(XElement blockLevelContent, WmlToXmlSettings settings)
  1021. {
  1022. if (settings.ContentTypeHierarchyLambda(blockLevelContent, settings))
  1023. return 1;
  1024. return 2;
  1025. }
  1026. // Apply the Document rules first, then apply the DocumentType rules, then apply the Global rules. First one that matches, wins.
  1027. private static void ApplyContentTypesForRuleSet(WmlToXmlSettings settings, ContentTypeApplierInfo ctai, WordprocessingDocument wDoc)
  1028. {
  1029. ApplyRulesToPart(settings, ctai, wDoc, wDoc.MainDocumentPart);
  1030. if (wDoc.MainDocumentPart.EndnotesPart != null)
  1031. ApplyRulesToPart(settings, ctai, wDoc, wDoc.MainDocumentPart.EndnotesPart);
  1032. if (wDoc.MainDocumentPart.FootnotesPart != null)
  1033. ApplyRulesToPart(settings, ctai, wDoc, wDoc.MainDocumentPart.FootnotesPart);
  1034. }
  1035. private static void ApplyRulesToPart(WmlToXmlSettings settings, ContentTypeApplierInfo ctai, WordprocessingDocument wDoc, OpenXmlPart part)
  1036. {
  1037. var partXDoc = part.GetXDocument();
  1038. var styleXDoc = wDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
  1039. settings.ApplyContentTypesCustom?.Invoke(partXDoc, styleXDoc, settings, part); // this applies content types that are easy to find
  1040. // the function should add the ContentType attribute to paragraphs, which will then cause
  1041. // rules to not run for the paragraph
  1042. // in the following, filter for blc that does not have content type already set by ApplyContentTypesCustom
  1043. var blockContent = partXDoc.Descendants()
  1044. .Where(d => (d.Name == W.p || d.Name == W.tbl || d.Name == W.tr || d.Name == W.tc) && d.Attribute(PtOpenXml.ContentType) == null);
  1045. int totalCount = 0;
  1046. if (settings.ProgressFunction != null)
  1047. {
  1048. totalCount = blockContent.Count();
  1049. string message;
  1050. if (part is MainDocumentPart)
  1051. message = "Apply rules to main document part";
  1052. else if (part is EndnotesPart)
  1053. message = "Apply rules to endnotes part";
  1054. else
  1055. message = "Apply rules to footnotes part";
  1056. WmlToXmlProgressInfo pi = new WmlToXmlProgressInfo()
  1057. {
  1058. ContentTotal = totalCount,
  1059. ContentCount = 0,
  1060. InProgressMessage = message + Environment.NewLine,
  1061. };
  1062. settings.ProgressFunction(pi);
  1063. }
  1064. var count = 0;
  1065. foreach (var blc in blockContent)
  1066. {
  1067. if (settings.ProgressFunction != null)
  1068. {
  1069. ++count;
  1070. if (count < 50 || (count) % 10 == 0 || count == totalCount)
  1071. {
  1072. var msg = string.Format(" {0} of {1}", count, totalCount);
  1073. msg += "".PadRight(msg.Length, '\b');
  1074. WmlToXmlProgressInfo pi2 = new WmlToXmlProgressInfo()
  1075. {
  1076. ContentTotal = totalCount,
  1077. ContentCount = count,
  1078. InProgressMessage = msg,
  1079. };
  1080. settings.ProgressFunction(pi2);
  1081. }
  1082. }
  1083. string styleOfBlc = null;
  1084. string styleOfBlcUC = null;
  1085. if (blc.Name == W.p)
  1086. {
  1087. var styleIdOfBlc = (string)blc.Elements(W.pPr).Elements(W.pStyle).Attributes(W.val).FirstOrDefault();
  1088. if (styleIdOfBlc != null)
  1089. {
  1090. styleOfBlc = (string)styleXDoc
  1091. .Root
  1092. .Elements(W.style)
  1093. .Where(s => (string)s.Attribute(W.styleId) == styleIdOfBlc && (string)s.Attribute(W.type) == "paragraph")
  1094. .Elements(W.name)
  1095. .Attributes(W.val)
  1096. .FirstOrDefault();
  1097. }
  1098. if (styleOfBlc == null)
  1099. styleOfBlc = ctai.DefaultParagraphStyleName;
  1100. styleOfBlcUC = styleOfBlc.ToUpper();
  1101. }
  1102. else if (blc.Name == W.tbl)
  1103. {
  1104. var styleIdOfBlc = (string)blc.Elements(W.tblPr).Elements(W.tblStyle).Attributes(W.val).FirstOrDefault();
  1105. if (styleIdOfBlc != null)
  1106. {
  1107. styleOfBlc = (string)styleXDoc
  1108. .Root
  1109. .Elements(W.style)
  1110. .Where(s => (string)s.Attribute(W.styleId) == styleIdOfBlc && (string)s.Attribute(W.type) == "table")
  1111. .Elements(W.name)
  1112. .Attributes(W.val)
  1113. .FirstOrDefault();
  1114. }
  1115. if (styleOfBlc == null)
  1116. styleOfBlc = ctai.DefaultTableStyleName;
  1117. styleOfBlcUC = styleOfBlc.ToUpper();
  1118. }
  1119. ///////////////////////////////////////////////////////////////////////////////////////////
  1120. // The following is useful to get a list of all content types and the code gen list
  1121. //var contentTypeList = settings
  1122. // .DocumentContentTypeRules
  1123. // .Concat(settings.DocumentTypeContentTypeRules)
  1124. // .Concat(settings.GlobalContentTypeRules)
  1125. // .Select(ct => ct.ContentType)
  1126. // .Distinct()
  1127. // .OrderBy(n => n)
  1128. // .ToList();
  1129. //var contentTypeCodeGenList = settings
  1130. // .XmlGenerationLambdas
  1131. // .Select(xgl => xgl.Key)
  1132. // .OrderBy(n => n)
  1133. // .ToList();
  1134. //var rulesWithoutGenCode = contentTypeList
  1135. // .Except(contentTypeCodeGenList)
  1136. // .ToList();
  1137. //var codeGenWithoutRules = contentTypeCodeGenList
  1138. // .Except(contentTypeList)
  1139. // .ToList();
  1140. //var s10 = codeGenWithoutRules.Select(m => m + Environment.NewLine).StringConcatenate();
  1141. //Console.WriteLine(s10);
  1142. //var s9 = contentTypeList.Select(m => m + Environment.NewLine).StringConcatenate();
  1143. //Console.WriteLine(s9);
  1144. // Apply the Document rules first, then apply the DocumentType rules, then apply the Global rules. First one that matches, wins.
  1145. foreach (var rule in settings.DocumentContentTypeRules.Concat(settings.DocumentTypeContentTypeRules).Concat(settings.GlobalContentTypeRules))
  1146. {
  1147. if (rule.DocumentTypeCollection != null)
  1148. {
  1149. if (!rule.DocumentTypeCollection.Any(dt => dt == settings.DocumentType))
  1150. continue;
  1151. }
  1152. if (settings.ContentTypeCount.ContainsKey(rule.ContentType))
  1153. settings.ContentTypeCount[rule.ContentType].Tests = settings.ContentTypeCount[rule.ContentType].Tests + 1;
  1154. else
  1155. settings.ContentTypeCount.Add(rule.ContentType, new WmlToXmlContentTypeMetrics() { Count = 0, Tests = 1 });
  1156. bool stylePass = false;
  1157. bool styleRegexPass = false;
  1158. bool regexPass = false;
  1159. bool matchLambdaPass = false;
  1160. stylePass = rule.StyleName == null || rule.StyleName.ToUpper() == styleOfBlcUC;
  1161. if (stylePass)
  1162. {
  1163. styleRegexPass = rule.StyleNameRegex == null;
  1164. if (rule.StyleNameRegex != null && styleOfBlc != null)
  1165. styleRegexPass = rule.StyleNameRegex.IsMatch(styleOfBlc);
  1166. }
  1167. if (stylePass && styleRegexPass)
  1168. {
  1169. regexPass = rule.RegexArray == null;
  1170. if (rule.RegexArray != null)
  1171. {
  1172. // clone the blc because OpenXmlRegex.Match replaces content, mucks with the run, probably should not if it only is used to find content.
  1173. var clonedBlc = new XElement(blc);
  1174. // following removes the subtitle created by a soft break, so that the pattern matches appropriately.
  1175. clonedBlc = RemoveContentAfterBR(clonedBlc);
  1176. #if false
  1177. <p p1:FontName="Georgia" p1:LanguageType="western" p1:AbstractNumId="28" xmlns:p1="http://powertools.codeplex.com/2011" xmlns="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
  1178. <r p1:ListItemRun="1.1" p1:FontName="Georgia" p1:LanguageType="western">
  1179. <t xml:space="preserve">1.1</t>
  1180. </r>
  1181. #endif
  1182. // remove list item runs so that they are not matched in the content
  1183. clonedBlc.Elements(W.r).Where(r => r.Attribute(PtOpenXml.ListItemRun) != null).Remove();
  1184. for (int i = 0; i < rule.RegexArray.Length; i++)
  1185. {
  1186. if (OpenXmlRegex.Match(new[] { clonedBlc }, rule.RegexArray[i]) != 0)
  1187. {
  1188. regexPass = true;
  1189. break;
  1190. }
  1191. }
  1192. }
  1193. }
  1194. if (stylePass && styleRegexPass && regexPass)
  1195. {
  1196. matchLambdaPass = rule.MatchLambda == null;
  1197. if (rule.MatchLambda != null)
  1198. {
  1199. if (rule.MatchLambda(blc, rule, wDoc, settings))
  1200. matchLambdaPass = true;
  1201. }
  1202. }
  1203. if (stylePass && styleRegexPass && regexPass && matchLambdaPass)
  1204. {
  1205. if (settings.ContentTypeCount.ContainsKey(rule.ContentType))
  1206. settings.ContentTypeCount[rule.ContentType].Count = settings.ContentTypeCount[rule.ContentType].Count + 1;
  1207. else
  1208. settings.ContentTypeCount.Add(rule.ContentType, new WmlToXmlContentTypeMetrics() { Count = 1, Tests = 1 });
  1209. AddContentTypeToBlockContent(settings, part, blc, rule.ContentType);
  1210. if (rule.ApplyRunContentTypes)
  1211. ApplyRunContentTypes(settings, ctai, wDoc, blc, settings.RunContentTypeRules, part, partXDoc);
  1212. break;
  1213. }
  1214. else
  1215. ApplyRunContentTypes(settings, ctai, wDoc, blc, settings.RunContentTypeRules, part, partXDoc);
  1216. }
  1217. }
  1218. var root = part.GetXDocument().Root;
  1219. if (root == null)
  1220. throw new ContentApplierException("Internal error");
  1221. var ptNamespace = root.Attribute(XNamespace.Xmlns + "pt");
  1222. if (ptNamespace == null)
  1223. {
  1224. root.Add(new XAttribute(XNamespace.Xmlns + "pt", PtOpenXml.pt.NamespaceName));
  1225. }
  1226. var ignorable = (string)root.Attribute(MC.Ignorable);
  1227. if (ignorable != null)
  1228. {
  1229. var list = ignorable.Split(' ');
  1230. if (!list.Contains("pt"))
  1231. {
  1232. ignorable += " pt";
  1233. root.Attribute(MC.Ignorable).Value = ignorable;
  1234. }
  1235. }
  1236. else
  1237. {
  1238. root.Add(new XAttribute(MC.Ignorable, "pt"));
  1239. }
  1240. if (settings.ProgressFunction != null)
  1241. {
  1242. WmlToXmlProgressInfo pi = new WmlToXmlProgressInfo()
  1243. {
  1244. ContentTotal = totalCount,
  1245. ContentCount = totalCount,
  1246. InProgressMessage = Environment.NewLine + " Done" + Environment.NewLine,
  1247. };
  1248. settings.ProgressFunction(pi);
  1249. }
  1250. part.PutXDocument();
  1251. var mainPart = part as MainDocumentPart;
  1252. if (mainPart != null)
  1253. {
  1254. if (mainPart.WordprocessingCommentsPart != null)
  1255. mainPart.WordprocessingCommentsPart.PutXDocument();
  1256. }
  1257. }
  1258. private static XElement RemoveContentAfterBR(XElement clonedBlc)
  1259. {
  1260. if (clonedBlc.Name != W.p)
  1261. return clonedBlc;
  1262. var cloned2 = new XElement(clonedBlc.Name,
  1263. clonedBlc.Attributes(),
  1264. clonedBlc.Elements().TakeWhile(r => r.Element(W.br) == null));
  1265. return cloned2;
  1266. }
  1267. private static void ApplyRunContentTypes(WmlToXmlSettings settings, ContentTypeApplierInfo ctai, WordprocessingDocument wDoc,
  1268. XElement blockLevelContent, List<ContentTypeRule> runContentTypeRuleList, OpenXmlPart part, XDocument mainXDoc)
  1269. {
  1270. var runContent = blockLevelContent.Descendants()
  1271. .Where(d => d.Name == W.r || d.Name == W.hyperlink || d.Name == W.sdt || d.Name == W.bookmarkStart);
  1272. foreach (var rlc in runContent)
  1273. {
  1274. if (rlc.Name == W.r || rlc.Name == W.sdt)
  1275. {
  1276. var runStyle = (string)rlc.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault();
  1277. if (runStyle == null)
  1278. runStyle = ctai.DefaultCharacterStyleName;
  1279. foreach (var rule in runContentTypeRuleList)
  1280. {
  1281. if (settings.ContentTypeCount.ContainsKey(rule.ContentType))
  1282. settings.ContentTypeCount[rule.ContentType].Tests = settings.ContentTypeCount[rule.ContentType].Tests + 1;
  1283. else
  1284. settings.ContentTypeCount.Add(rule.ContentType, new WmlToXmlContentTypeMetrics() { Count = 0, Tests = 1 });
  1285. if (rule.StyleName != null && rule.StyleName != runStyle)
  1286. continue;
  1287. if (rule.StyleNameRegex != null)
  1288. throw new OpenXmlPowerToolsException("Invalid Run ContentType Rule - StyleNameRegex not allowed");
  1289. if (rule.RegexArray != null)
  1290. throw new OpenXmlPowerToolsException("Invalid Run ContentType Rule - Regex not allowed");
  1291. if (rule.MatchLambda != null)
  1292. {
  1293. if (rule.MatchLambda(rlc, rule, wDoc, settings))
  1294. {
  1295. if (settings.ContentTypeCount.ContainsKey(rule.ContentType))
  1296. settings.ContentTypeCount[rule.ContentType].Count = settings.ContentTypeCount[rule.ContentType].Count + 1;
  1297. else
  1298. settings.ContentTypeCount.Add(rule.ContentType, new WmlToXmlContentTypeMetrics() { Count = 1, Tests = 1 });
  1299. AddContentTypeToRunContent(settings, part, rlc, rule.ContentType);
  1300. break;
  1301. }
  1302. continue;
  1303. }
  1304. if (settings.ContentTypeCount.ContainsKey(rule.ContentType))
  1305. settings.ContentTypeCount[rule.ContentType].Count = settings.ContentTypeCount[rule.ContentType].Count + 1;
  1306. else
  1307. settings.ContentTypeCount.Add(rule.ContentType, new WmlToXmlContentTypeMetrics() { Count = 1, Tests = 1 });
  1308. AddContentTypeToRunContent(settings, part, rlc, rule.ContentType);
  1309. break;
  1310. }
  1311. }
  1312. else if (rlc.Name == W.hyperlink)
  1313. {
  1314. foreach (var run in rlc.Descendants(W.r))
  1315. AddContentTypeToRunContent(settings, part, run, "Hyperlink");
  1316. }
  1317. else if (rlc.Name == W.bookmarkStart)
  1318. {
  1319. AddContentTypeToRunContent(settings, part, rlc, "Anchor");
  1320. }
  1321. }
  1322. }
  1323. private static XAttribute[] NamespaceAttributes =
  1324. {
  1325. new XAttribute(XNamespace.Xmlns + "wpc", WPC.wpc),
  1326. new XAttribute(XNamespace.Xmlns + "mc", MC.mc),
  1327. new XAttribute(XNamespace.Xmlns + "o", O.o),
  1328. new XAttribute(XNamespace.Xmlns + "r", R.r),
  1329. new XAttribute(XNamespace.Xmlns + "m", M.m),
  1330. new XAttribute(XNamespace.Xmlns + "v", VML.vml),
  1331. new XAttribute(XNamespace.Xmlns + "wp14", WP14.wp14),
  1332. new XAttribute(XNamespace.Xmlns + "wp", WP.wp),
  1333. new XAttribute(XNamespace.Xmlns + "w10", W10.w10),
  1334. new XAttribute(XNamespace.Xmlns + "w", W.w),
  1335. new XAttribute(XNamespace.Xmlns + "w14", W14.w14),
  1336. new XAttribute(XNamespace.Xmlns + "w15", W15.w15),
  1337. new XAttribute(XNamespace.Xmlns + "w16se", W16SE.w16se),
  1338. new XAttribute(XNamespace.Xmlns + "wpg", WPG.wpg),
  1339. new XAttribute(XNamespace.Xmlns + "wpi", WPI.wpi),
  1340. new XAttribute(XNamespace.Xmlns + "wne", WNE.wne),
  1341. new XAttribute(XNamespace.Xmlns + "wps", WPS.wps),
  1342. new XAttribute(XNamespace.Xmlns + "pt", PtOpenXml.pt),
  1343. new XAttribute(MC.Ignorable, "w14 wp14 w15 w16se pt"),
  1344. };
  1345. public static void AddContentTypeToBlockContent(WmlToXmlSettings settings, OpenXmlPart part, XElement blc, string contentType)
  1346. {
  1347. // add the attribute to the block content
  1348. blc.Add(new XAttribute(PtOpenXml.ContentType, contentType));
  1349. var mainPart = part as MainDocumentPart;
  1350. if (settings.InjectCommentForContentTypes != null && (bool)settings.InjectCommentForContentTypes)
  1351. {
  1352. if (mainPart != null)
  1353. {
  1354. // add a comment, if appropriate
  1355. int commentNumber = 1;
  1356. XDocument newComments = null;
  1357. if (mainPart.WordprocessingCommentsPart != null)
  1358. {
  1359. newComments = mainPart.WordprocessingCommentsPart.GetXDocument();
  1360. newComments.Declaration.Standalone = "yes";
  1361. newComments.Declaration.Encoding = "UTF-8";
  1362. var ids = newComments.Root.Elements(W.comment).Select(f => (int)f.Attribute(W.id));
  1363. if (ids.Any())
  1364. commentNumber = ids.Max() + 1;
  1365. }
  1366. else
  1367. {
  1368. part.AddNewPart<WordprocessingCommentsPart>();
  1369. newComments = mainPart.WordprocessingCommentsPart.GetXDocument();
  1370. newComments.Declaration.Standalone = "yes";
  1371. newComments.Declaration.Encoding = "UTF-8";
  1372. newComments.Add(new XElement(W.comments, NamespaceAttributes));
  1373. commentNumber = 1;
  1374. }
  1375. #if false
  1376. <w:comment w:id="12"
  1377. w:author="Eric White"
  1378. w:date="2016-03-20T18:50:00Z"
  1379. w:initials="EW">
  1380. <w:p w14:paraId="7E227B98"
  1381. w14:textId="6FA2BE6B"
  1382. w:rsidR="00425889"
  1383. w:rsidRDefault="00425889">
  1384. <w:pPr>
  1385. <w:pStyle w:val="CommentText"/>
  1386. </w:pPr>
  1387. <w:r>
  1388. <w:rPr>
  1389. <w:rStyle w:val="CommentReference"/>
  1390. </w:rPr>
  1391. <w:annotationRef/>
  1392. </w:r>
  1393. <w:r>
  1394. <w:t>Nil</w:t>
  1395. </w:r>
  1396. </w:p>
  1397. </w:comment>
  1398. #endif
  1399. XElement newElement = new XElement(W.comment,
  1400. new XAttribute(W.id, commentNumber),
  1401. new XElement(W.p,
  1402. new XElement(W.pPr,
  1403. new XElement(W.pStyle,
  1404. new XAttribute(W.val, "CommentText"))),
  1405. new XElement(W.r,
  1406. new XElement(W.rPr,
  1407. new XElement(W.rStyle,
  1408. new XAttribute(W.val, "CommentReference"))),
  1409. new XElement(W.annotationRef)),
  1410. new XElement(W.r,
  1411. new XElement(W.t,
  1412. new XText(contentType)))));
  1413. newComments.Root.Add(newElement);
  1414. #if false
  1415. <w:r>
  1416. <w:rPr>
  1417. <w:rStyle w:val="CommentReference"/>
  1418. </w:rPr>
  1419. <w:commentReference w:id="12"/>
  1420. </w:r>
  1421. #endif
  1422. XElement commentRun = new XElement(W.r,
  1423. new XElement(W.rPr,
  1424. new XElement(W.rStyle, new XAttribute(W.val, "CommentReference"))),
  1425. new XElement(W.commentReference,
  1426. new XAttribute(W.id, commentNumber)));
  1427. var firstRunInParagraph = blc
  1428. .DescendantsTrimmed(W.txbxContent)
  1429. .Where(r => r.Name == W.r)
  1430. .FirstOrDefault();
  1431. if (firstRunInParagraph != null)
  1432. {
  1433. // for now, only do the work of inserting a comment if it is easy. For content types for tables, rows and cells, not inserting a comment.
  1434. if (firstRunInParagraph.Parent.Name == W.p)
  1435. firstRunInParagraph.AddBeforeSelf(commentRun);
  1436. }
  1437. else
  1438. {
  1439. // for now, only do the work of inserting a comment if it is easy. For content types for tables, rows and cells, not inserting a comment.
  1440. if (blc.Name == W.p)
  1441. blc.Add(commentRun);
  1442. }
  1443. if (mainPart.StyleDefinitionsPart == null)
  1444. {
  1445. throw new ContentApplierException("Document does not have styles definition part");
  1446. }
  1447. XDocument stylesXDoc = mainPart.StyleDefinitionsPart.GetXDocument();
  1448. var style =
  1449. @"<w:style w:type=""paragraph""
  1450. w:styleId=""CommentText""
  1451. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1452. <w:name w:val=""annotation text""/>
  1453. <w:basedOn w:val=""Normal""/>
  1454. <w:link w:val=""CommentTextChar""/>
  1455. <w:semiHidden/>
  1456. <w:rPr>
  1457. <w:sz w:val=""20""/>
  1458. <w:szCs w:val=""20""/>
  1459. </w:rPr>
  1460. </w:style>
  1461. ";
  1462. AddIfMissing(stylesXDoc, style);
  1463. style =
  1464. @"<w:style w:type=""paragraph""
  1465. w:styleId=""CommentSubject""
  1466. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1467. <w:name w:val=""annotation subject""/>
  1468. <w:basedOn w:val=""CommentText""/>
  1469. <w:next w:val=""CommentText""/>
  1470. <w:semiHidden/>
  1471. <w:rPr>
  1472. <w:b/>
  1473. <w:bCs/>
  1474. </w:rPr>
  1475. </w:style>
  1476. ";
  1477. AddIfMissing(stylesXDoc, style);
  1478. style =
  1479. @"<w:style w:type=""character""
  1480. w:styleId=""CommentReference""
  1481. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1482. <w:name w:val=""annotation reference""/>
  1483. <w:basedOn w:val=""DefaultParagraphFont""/>
  1484. <w:uiPriority w:val=""99""/>
  1485. <w:semiHidden/>
  1486. <w:unhideWhenUsed/>
  1487. <w:rsid w:val=""00872729""/>
  1488. <w:rPr>
  1489. <w:sz w:val=""16""/>
  1490. <w:szCs w:val=""16""/>
  1491. </w:rPr>
  1492. </w:style>
  1493. ";
  1494. AddIfMissing(stylesXDoc, style);
  1495. style =
  1496. @"<w:style w:type=""character""
  1497. w:customStyle=""1""
  1498. w:styleId=""CommentTextChar""
  1499. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1500. <w:name w:val=""Comment Text Char""/>
  1501. <w:basedOn w:val=""DefaultParagraphFont""/>
  1502. <w:link w:val=""CommentText""/>
  1503. <w:semiHidden/>
  1504. <w:rsid w:val=""00A43CEC""/>
  1505. <w:rPr>
  1506. <w:lang w:val=""en-GB""
  1507. w:eastAsia=""zh-CN""/>
  1508. </w:rPr>
  1509. </w:style>
  1510. ";
  1511. AddIfMissing(stylesXDoc, style);
  1512. mainPart.StyleDefinitionsPart.PutXDocument();
  1513. }
  1514. }
  1515. }
  1516. private static void AddContentTypeToRunContent(WmlToXmlSettings settings, OpenXmlPart part, XElement rlc, string contentType)
  1517. {
  1518. // if there is already a content type for this run level content, then nothing to do. First one wins.
  1519. if (rlc.Attribute(PtOpenXml.ContentType) != null)
  1520. return;
  1521. // add the attribute to the block level content
  1522. rlc.Add(new XAttribute(PtOpenXml.ContentType, contentType));
  1523. if (settings.InjectCommentForContentTypes != null && (bool)settings.InjectCommentForContentTypes)
  1524. {
  1525. var mainPart = part as MainDocumentPart;
  1526. if (mainPart != null)
  1527. {
  1528. // add a comment, if appropriate
  1529. int commentNumber = 1;
  1530. XDocument newComments = null;
  1531. if (mainPart.WordprocessingCommentsPart != null)
  1532. {
  1533. newComments = mainPart.WordprocessingCommentsPart.GetXDocument();
  1534. newComments.Declaration.Standalone = "yes";
  1535. newComments.Declaration.Encoding = "UTF-8";
  1536. var ids = newComments.Root.Elements(W.comment).Select(f => (int)f.Attribute(W.id));
  1537. if (ids.Any())
  1538. commentNumber = ids.Max() + 1;
  1539. }
  1540. else
  1541. {
  1542. mainPart.AddNewPart<WordprocessingCommentsPart>();
  1543. newComments = mainPart.WordprocessingCommentsPart.GetXDocument();
  1544. newComments.Declaration.Standalone = "yes";
  1545. newComments.Declaration.Encoding = "UTF-8";
  1546. newComments.Add(new XElement(W.comments, NamespaceAttributes));
  1547. commentNumber = 1;
  1548. }
  1549. XElement newElement = new XElement(W.comment,
  1550. new XAttribute(W.id, commentNumber),
  1551. new XElement(W.p,
  1552. new XElement(W.pPr,
  1553. new XElement(W.pStyle,
  1554. new XAttribute(W.val, "CommentText"))),
  1555. new XElement(W.r,
  1556. new XElement(W.rPr,
  1557. new XElement(W.rStyle,
  1558. new XAttribute(W.val, "CommentReference"))),
  1559. new XElement(W.annotationRef)),
  1560. new XElement(W.r,
  1561. new XElement(W.t,
  1562. new XText(contentType)))));
  1563. newComments.Root.Add(newElement);
  1564. XElement commentRun = new XElement(W.r,
  1565. new XElement(W.rPr,
  1566. new XElement(W.rStyle, new XAttribute(W.val, "CommentReference"))),
  1567. new XElement(W.commentReference,
  1568. new XAttribute(W.id, commentNumber)));
  1569. var firstRunInParagraph = rlc
  1570. .DescendantsTrimmed(W.txbxContent)
  1571. .Where(r => r.Name == W.r)
  1572. .FirstOrDefault();
  1573. // for now, only do the work of inserting a comment if it is easy. For content types for tables, rows and cells, not inserting a comment.
  1574. if (rlc.Parent.Name == W.p)
  1575. rlc.AddBeforeSelf(commentRun);
  1576. if (mainPart.StyleDefinitionsPart == null)
  1577. {
  1578. throw new ContentApplierException("Document does not have styles definition part");
  1579. }
  1580. XDocument stylesXDoc = mainPart.StyleDefinitionsPart.GetXDocument();
  1581. var style =
  1582. @"<w:style w:type=""paragraph""
  1583. w:styleId=""CommentText""
  1584. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1585. <w:name w:val=""annotation text""/>
  1586. <w:basedOn w:val=""Normal""/>
  1587. <w:link w:val=""CommentTextChar""/>
  1588. <w:semiHidden/>
  1589. <w:rPr>
  1590. <w:sz w:val=""20""/>
  1591. <w:szCs w:val=""20""/>
  1592. </w:rPr>
  1593. </w:style>
  1594. ";
  1595. AddIfMissing(stylesXDoc, style);
  1596. style =
  1597. @"<w:style w:type=""paragraph""
  1598. w:styleId=""CommentSubject""
  1599. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1600. <w:name w:val=""annotation subject""/>
  1601. <w:basedOn w:val=""CommentText""/>
  1602. <w:next w:val=""CommentText""/>
  1603. <w:semiHidden/>
  1604. <w:rPr>
  1605. <w:b/>
  1606. <w:bCs/>
  1607. </w:rPr>
  1608. </w:style>
  1609. ";
  1610. AddIfMissing(stylesXDoc, style);
  1611. style =
  1612. @"<w:style w:type=""character""
  1613. w:styleId=""CommentReference""
  1614. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1615. <w:name w:val=""annotation reference""/>
  1616. <w:basedOn w:val=""DefaultParagraphFont""/>
  1617. <w:uiPriority w:val=""99""/>
  1618. <w:semiHidden/>
  1619. <w:unhideWhenUsed/>
  1620. <w:rsid w:val=""00872729""/>
  1621. <w:rPr>
  1622. <w:sz w:val=""16""/>
  1623. <w:szCs w:val=""16""/>
  1624. </w:rPr>
  1625. </w:style>
  1626. ";
  1627. AddIfMissing(stylesXDoc, style);
  1628. style =
  1629. @"<w:style w:type=""character""
  1630. w:customStyle=""1""
  1631. w:styleId=""CommentTextChar""
  1632. xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  1633. <w:name w:val=""Comment Text Char""/>
  1634. <w:basedOn w:val=""DefaultParagraphFont""/>
  1635. <w:link w:val=""CommentText""/>
  1636. <w:semiHidden/>
  1637. <w:rsid w:val=""00A43CEC""/>
  1638. <w:rPr>
  1639. <w:lang w:val=""en-GB""
  1640. w:eastAsia=""zh-CN""/>
  1641. </w:rPr>
  1642. </w:style>
  1643. ";
  1644. AddIfMissing(stylesXDoc, style);
  1645. mainPart.StyleDefinitionsPart.PutXDocument();
  1646. }
  1647. }
  1648. }
  1649. private static void AddIfMissing(XDocument stylesXDoc, string commentStyle)
  1650. {
  1651. XElement e1 = XElement.Parse(commentStyle);
  1652. #if false
  1653. <w:style w:type=""character""
  1654. w:customStyle=""1""
  1655. w:styleId=""CommentTextChar""
  1656. #endif
  1657. var existingStyle = stylesXDoc
  1658. .Root
  1659. .Elements(W.style)
  1660. .FirstOrDefault(e2 =>
  1661. {
  1662. XName name = W.type;
  1663. string v1 = (string)e1.Attribute(name);
  1664. string v2 = (string)e2.Attribute(name);
  1665. if (v1 != v2)
  1666. return false;
  1667. name = W.customStyle;
  1668. v1 = (string)e1.Attribute(name);
  1669. v2 = (string)e2.Attribute(name);
  1670. if (v1 != v2)
  1671. return false;
  1672. name = W.styleId;
  1673. v1 = (string)e1.Attribute(name);
  1674. v2 = (string)e2.Attribute(name);
  1675. if (v1 != v2)
  1676. return false;
  1677. return true;
  1678. });
  1679. if (existingStyle != null)
  1680. return;
  1681. stylesXDoc.Root.Add(e1);
  1682. }
  1683. private static void AssembleListItemInformation(WordprocessingDocument wordDoc, ListItemRetrieverSettings settings)
  1684. {
  1685. XDocument xDoc = wordDoc.MainDocumentPart.GetXDocument();
  1686. foreach (var para in xDoc.Descendants(W.p))
  1687. {
  1688. ListItemRetriever.RetrieveListItem(wordDoc, para, settings);
  1689. }
  1690. }
  1691. private class ContentTypeApplierInfo
  1692. {
  1693. public string DefaultParagraphStyleName;
  1694. public string DefaultCharacterStyleName;
  1695. public string DefaultTableStyleName;
  1696. public ContentTypeApplierInfo()
  1697. {
  1698. }
  1699. }
  1700. public class ContentApplierException : Exception
  1701. {
  1702. public ContentApplierException(string message) : base(message) { }
  1703. }
  1704. public static List<WmlToXmlValidationError> ValidateContentTypeXml(WmlDocument wmlRawSourceDocument, WmlDocument wmlWithContentTypeApplied, XElement contentTypeXml, WmlToXmlSettings settings)
  1705. {
  1706. List<WmlToXmlValidationError> errorList = new List<WmlToXmlValidationError>();
  1707. using (MemoryStream msContentTypeApplied = new MemoryStream())
  1708. using (MemoryStream msRawSourceDocument = new MemoryStream())
  1709. {
  1710. msContentTypeApplied.Write(wmlWithContentTypeApplied.DocumentByteArray, 0, wmlWithContentTypeApplied.DocumentByteArray.Length);
  1711. msRawSourceDocument.Write(wmlRawSourceDocument.DocumentByteArray, 0, wmlRawSourceDocument.DocumentByteArray.Length);
  1712. using (WordprocessingDocument wDocContentTypeApplied = WordprocessingDocument.Open(msContentTypeApplied, true))
  1713. using (WordprocessingDocument wDocRawSourceDocument = WordprocessingDocument.Open(msRawSourceDocument, true))
  1714. {
  1715. foreach (var vr in settings.GlobalValidationRules)
  1716. {
  1717. if (settings.DocumentType != null &&
  1718. vr.DocumentTypeInfoCollection != null)
  1719. {
  1720. var thisdti = vr.DocumentTypeInfoCollection.FirstOrDefault(dti => dti.DocumentType == settings.DocumentType);
  1721. if (thisdti == null)
  1722. throw new OpenXmlPowerToolsException("Incorrect setup of Validation Rules");
  1723. if (thisdti.ValidationErrorType == ValidationErrorType.NotApplicable)
  1724. continue;
  1725. }
  1726. if (vr.GlobalRuleLambda != null)
  1727. {
  1728. var valErrors = vr.GlobalRuleLambda(vr, wDocRawSourceDocument, wDocContentTypeApplied, contentTypeXml, settings);
  1729. if (valErrors != null && valErrors.Any())
  1730. {
  1731. foreach (var ve in valErrors)
  1732. {
  1733. errorList.Add(ve);
  1734. }
  1735. }
  1736. }
  1737. }
  1738. var mXDoc = wDocContentTypeApplied.MainDocumentPart.GetXDocument();
  1739. var sXDoc = wDocContentTypeApplied.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
  1740. var defaultParagraphStyle = sXDoc
  1741. .Root
  1742. .Elements(W.style)
  1743. .FirstOrDefault(s => (string)s.Attribute(W._default) == "1");
  1744. string defaultParagraphStyleName = null;
  1745. if (defaultParagraphStyle != null)
  1746. defaultParagraphStyleName = (string)defaultParagraphStyle.Attribute(W.styleId);
  1747. foreach (var blc in mXDoc.Root.Descendants().Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr || d.Name == W.tc))
  1748. {
  1749. var styleId = (string)blc
  1750. .Elements(W.pPr)
  1751. .Elements(W.pStyle)
  1752. .Attributes(W.val)
  1753. .FirstOrDefault();
  1754. var styleName = (string)sXDoc
  1755. .Root
  1756. .Elements(W.style)
  1757. .Where(s => (string)s.Attribute(W.styleId) == styleId)
  1758. .Elements(W.name)
  1759. .Attributes(W.val)
  1760. .FirstOrDefault();
  1761. if (styleName == null && blc.Name == W.p)
  1762. styleName = defaultParagraphStyleName;
  1763. foreach (var vr in settings.BlockLevelContentValidationRules)
  1764. {
  1765. if (settings.DocumentType != null &&
  1766. vr.DocumentTypeInfoCollection != null)
  1767. {
  1768. if (!vr.DocumentTypeInfoCollection.Any(dti => dti.DocumentType == settings.DocumentType))
  1769. continue;
  1770. }
  1771. bool matchStyle = true;
  1772. if (vr.StyleNameRegex != null)
  1773. {
  1774. if (styleName == null)
  1775. {
  1776. matchStyle = false;
  1777. }
  1778. else
  1779. {
  1780. var match = vr.StyleNameRegex.Match(styleName);
  1781. matchStyle = match.Success;
  1782. }
  1783. }
  1784. if (matchStyle && vr.BlockLevelContentRuleLambda != null)
  1785. {
  1786. var valErrors = vr.BlockLevelContentRuleLambda(blc, vr, wDocContentTypeApplied, contentTypeXml, settings);
  1787. if (valErrors != null && valErrors.Any())
  1788. {
  1789. foreach (var ve in valErrors)
  1790. {
  1791. errorList.Add(ve);
  1792. }
  1793. }
  1794. }
  1795. }
  1796. }
  1797. }
  1798. }
  1799. List<WmlToXmlValidationError> sortedErrorList = errorList
  1800. .OrderBy(e =>
  1801. {
  1802. int b;
  1803. if (int.TryParse(e.BlockLevelContentIdentifier, out b))
  1804. return b;
  1805. return 0;
  1806. })
  1807. .ToList();
  1808. return sortedErrorList;
  1809. }
  1810. }
  1811. public static class WmlToXmlUtil
  1812. {
  1813. public static WmlDocument AssignUnidToBlc(WmlDocument wmlDoc)
  1814. {
  1815. using (MemoryStream ms = new MemoryStream())
  1816. {
  1817. ms.Write(wmlDoc.DocumentByteArray, 0, wmlDoc.DocumentByteArray.Length);
  1818. using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true))
  1819. {
  1820. AssignUnidToBlc(wDoc);
  1821. }
  1822. return new WmlDocument(wmlDoc.FileName, ms.ToArray());
  1823. }
  1824. }
  1825. public static void AssignUnidToBlc(WordprocessingDocument wDoc)
  1826. {
  1827. var xDoc = wDoc.MainDocumentPart.GetXDocument();
  1828. List<XElement> elementsInOrder = new List<XElement>();
  1829. DetermineElementOrder(xDoc.Root.Descendants(W.body).FirstOrDefault(), elementsInOrder);
  1830. var unid = 1;
  1831. foreach (var b in elementsInOrder)
  1832. {
  1833. var unidString = unid.ToString();
  1834. if (b.Attribute(PtOpenXml.Unid) != null)
  1835. b.Attribute(PtOpenXml.Unid).Value = unidString;
  1836. else
  1837. b.Add(new XAttribute(PtOpenXml.Unid, unidString));
  1838. unid++;
  1839. }
  1840. IgnorePt14Namespace(xDoc.Root);
  1841. wDoc.MainDocumentPart.PutXDocument();
  1842. }
  1843. private static void DetermineElementOrder(XElement element, List<XElement> elementList)
  1844. {
  1845. foreach (var childElement in element.Elements())
  1846. {
  1847. if (childElement.Name == W.p)
  1848. {
  1849. elementList.Add(childElement);
  1850. continue;
  1851. }
  1852. else if (childElement.Name == W.tbl || childElement.Name == W.tc || childElement.Name == W.sdt ||
  1853. childElement.Name == W.sdtContent)
  1854. {
  1855. DetermineElementOrder(childElement, elementList);
  1856. continue;
  1857. }
  1858. else if (childElement.Name == W.tr)
  1859. {
  1860. foreach (var tc in childElement.Elements())
  1861. DetermineElementOrder(tc, elementList);
  1862. elementList.Add(childElement);
  1863. continue;
  1864. }
  1865. }
  1866. }
  1867. private static void IgnorePt14Namespace(XElement root)
  1868. {
  1869. if (root.Attribute(XNamespace.Xmlns + "pt14") == null)
  1870. {
  1871. root.Add(new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName));
  1872. }
  1873. var ignorable = (string)root.Attribute(MC.Ignorable);
  1874. if (ignorable != null)
  1875. {
  1876. var list = ignorable.Split(' ');
  1877. if (!list.Contains("pt14"))
  1878. {
  1879. ignorable += " pt14";
  1880. root.Attribute(MC.Ignorable).Value = ignorable;
  1881. }
  1882. }
  1883. else
  1884. {
  1885. root.Add(new XAttribute(MC.Ignorable, "pt14"));
  1886. }
  1887. }
  1888. }
  1889. }