FieldRetriever.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. using System;
  4. using System.Collections.Generic;
  5. using System.Linq;
  6. using System.Text;
  7. using System.Xml.Linq;
  8. using DocumentFormat.OpenXml.Packaging;
  9. namespace OpenXmlPowerTools
  10. {
  11. public class FieldRetriever
  12. {
  13. public static string InstrText(XElement root, int id)
  14. {
  15. XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
  16. #if false
  17. // This is the old code. Both versions work - the caching version is significantly faster.
  18. var relevantElements = root.Descendants()
  19. .Where(e =>
  20. {
  21. Stack<FieldElementTypeInfo> s = e.Annotation<Stack<FieldElementTypeInfo>>();
  22. if (s != null)
  23. return s.Any(z => z.Id == id &&
  24. z.FieldElementType == FieldElementTypeEnum.InstrText);
  25. return false;
  26. })
  27. .ToList();
  28. #else
  29. var cachedAnnotationInformation = root.Annotation<Dictionary<int, List<XElement>>>();
  30. if (cachedAnnotationInformation == null)
  31. throw new OpenXmlPowerToolsException("Internal error");
  32. // it is possible that a field code contains no instr text
  33. if (!cachedAnnotationInformation.ContainsKey(id))
  34. return "";
  35. var relevantElements = cachedAnnotationInformation[id];
  36. #endif
  37. var groupedSubFields = relevantElements
  38. .GroupAdjacent(e =>
  39. {
  40. Stack<FieldElementTypeInfo> s = e.Annotation<Stack<FieldElementTypeInfo>>();
  41. var stackElement = s.FirstOrDefault(z => z.Id == id);
  42. var elementsBefore = s.TakeWhile(z => z != stackElement);
  43. return elementsBefore.Any();
  44. })
  45. .ToList();
  46. var instrText = groupedSubFields
  47. .Select(g =>
  48. {
  49. if (g.Key == false)
  50. {
  51. return g.Select(e =>
  52. {
  53. Stack<FieldElementTypeInfo> s = e.Annotation<Stack<FieldElementTypeInfo>>();
  54. var stackElement = s.FirstOrDefault(z => z.Id == id);
  55. if (stackElement.FieldElementType == FieldElementTypeEnum.InstrText &&
  56. e.Name == w + "instrText")
  57. return e.Value;
  58. return "";
  59. })
  60. .StringConcatenate();
  61. }
  62. else
  63. {
  64. Stack<FieldElementTypeInfo> s = g.First().Annotation<Stack<FieldElementTypeInfo>>();
  65. var stackElement = s.FirstOrDefault(z => z.Id == id);
  66. var elementBefore = s.TakeWhile(z => z != stackElement).Last();
  67. var subFieldId = elementBefore.Id;
  68. return InstrText(root, subFieldId);
  69. }
  70. })
  71. .StringConcatenate();
  72. return "{" + instrText + "}";
  73. }
  74. public static void AnnotateWithFieldInfo(OpenXmlPart part)
  75. {
  76. XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
  77. XElement root = part.GetXDocument().Root;
  78. var r = root.DescendantsAndSelf()
  79. .Rollup(
  80. new FieldElementTypeStack
  81. {
  82. Id = 0,
  83. FiStack = null,
  84. },
  85. (e, s) =>
  86. {
  87. if (e.Name == w + "fldChar")
  88. {
  89. if (e.Attribute(w + "fldCharType").Value == "begin")
  90. {
  91. Stack<FieldElementTypeInfo> fis;
  92. if (s.FiStack == null)
  93. fis = new Stack<FieldElementTypeInfo>();
  94. else
  95. fis = new Stack<FieldElementTypeInfo>(s.FiStack.Reverse());
  96. fis.Push(
  97. new FieldElementTypeInfo
  98. {
  99. Id = s.Id + 1,
  100. FieldElementType = FieldElementTypeEnum.Begin,
  101. });
  102. return new FieldElementTypeStack
  103. {
  104. Id = s.Id + 1,
  105. FiStack = fis,
  106. };
  107. };
  108. if (e.Attribute(w + "fldCharType").Value == "separate")
  109. {
  110. Stack<FieldElementTypeInfo> fis = new Stack<FieldElementTypeInfo>(s.FiStack.Reverse());
  111. FieldElementTypeInfo wfi = fis.Pop();
  112. fis.Push(
  113. new FieldElementTypeInfo
  114. {
  115. Id = wfi.Id,
  116. FieldElementType = FieldElementTypeEnum.Separate,
  117. });
  118. return new FieldElementTypeStack
  119. {
  120. Id = s.Id,
  121. FiStack = fis,
  122. };
  123. }
  124. if (e.Attribute(w + "fldCharType").Value == "end")
  125. {
  126. Stack<FieldElementTypeInfo> fis = new Stack<FieldElementTypeInfo>(s.FiStack.Reverse());
  127. FieldElementTypeInfo wfi = fis.Pop();
  128. return new FieldElementTypeStack
  129. {
  130. Id = s.Id,
  131. FiStack = fis,
  132. };
  133. }
  134. }
  135. if (s.FiStack == null || s.FiStack.Count() == 0)
  136. return s;
  137. FieldElementTypeInfo wfi3 = s.FiStack.Peek();
  138. if (wfi3.FieldElementType == FieldElementTypeEnum.Begin)
  139. {
  140. Stack<FieldElementTypeInfo> fis = new Stack<FieldElementTypeInfo>(s.FiStack.Reverse());
  141. FieldElementTypeInfo wfi2 = fis.Pop();
  142. fis.Push(
  143. new FieldElementTypeInfo
  144. {
  145. Id = wfi2.Id,
  146. FieldElementType = FieldElementTypeEnum.InstrText,
  147. });
  148. return new FieldElementTypeStack
  149. {
  150. Id = s.Id,
  151. FiStack = fis,
  152. };
  153. }
  154. if (wfi3.FieldElementType == FieldElementTypeEnum.Separate)
  155. {
  156. Stack<FieldElementTypeInfo> fis = new Stack<FieldElementTypeInfo>(s.FiStack.Reverse());
  157. FieldElementTypeInfo wfi2 = fis.Pop();
  158. fis.Push(
  159. new FieldElementTypeInfo
  160. {
  161. Id = wfi2.Id,
  162. FieldElementType = FieldElementTypeEnum.Result,
  163. });
  164. return new FieldElementTypeStack
  165. {
  166. Id = s.Id,
  167. FiStack = fis,
  168. };
  169. }
  170. if (wfi3.FieldElementType == FieldElementTypeEnum.End)
  171. {
  172. Stack<FieldElementTypeInfo> fis = new Stack<FieldElementTypeInfo>(s.FiStack.Reverse());
  173. fis.Pop();
  174. if (!fis.Any())
  175. fis = null;
  176. return new FieldElementTypeStack
  177. {
  178. Id = s.Id,
  179. FiStack = fis,
  180. };
  181. }
  182. return s;
  183. });
  184. var elementPlusInfo = root.DescendantsAndSelf().PtZip(r, (t1, t2) =>
  185. {
  186. return new
  187. {
  188. Element = t1,
  189. Id = t2.Id,
  190. WmlFieldInfoStack = t2.FiStack,
  191. };
  192. });
  193. foreach (var item in elementPlusInfo)
  194. {
  195. if (item.WmlFieldInfoStack != null)
  196. item.Element.AddAnnotation(item.WmlFieldInfoStack);
  197. }
  198. //This code is useful when you want to take a look at the annotations, making sure that they are made correctly.
  199. //
  200. //foreach (var desc in root.DescendantsAndSelf())
  201. //{
  202. // Stack<FieldElementTypeInfo> s = desc.Annotation<Stack<FieldElementTypeInfo>>();
  203. // if (s != null)
  204. // {
  205. // Console.WriteLine(desc.Name.LocalName.PadRight(20));
  206. // foreach (var item in s)
  207. // {
  208. // Console.WriteLine(" {0:0000} {1}", item.Id, item.FieldElementType.ToString());
  209. // Console.ReadKey();
  210. // }
  211. // }
  212. //}
  213. var cachedAnnotationInformation = new Dictionary<int, List<XElement>>();
  214. foreach (var desc in root.DescendantsTrimmed(d => d.Name == W.rPr || d.Name == W.pPr))
  215. {
  216. Stack<FieldElementTypeInfo> s = desc.Annotation<Stack<FieldElementTypeInfo>>();
  217. if (s != null )
  218. {
  219. foreach (var item in s)
  220. {
  221. if (item.FieldElementType == FieldElementTypeEnum.InstrText)
  222. {
  223. if (cachedAnnotationInformation.ContainsKey(item.Id))
  224. {
  225. cachedAnnotationInformation[item.Id].Add(desc);
  226. }
  227. else
  228. {
  229. cachedAnnotationInformation.Add(item.Id, new List<XElement>() { desc });
  230. }
  231. }
  232. }
  233. }
  234. }
  235. root.AddAnnotation(cachedAnnotationInformation);
  236. }
  237. private enum State
  238. {
  239. InToken,
  240. InWhiteSpace,
  241. InQuotedToken,
  242. OnOpeningQuote,
  243. OnClosingQuote,
  244. OnBackslash,
  245. }
  246. private static string[] GetTokens(string field)
  247. {
  248. State state = State.InWhiteSpace;
  249. int tokenStart = 0;
  250. char quoteStart = char.MinValue;
  251. List<string> tokens = new List<string>();
  252. for (int c = 0; c < field.Length; c++)
  253. {
  254. if (Char.IsWhiteSpace(field[c]))
  255. {
  256. if (state == State.InToken)
  257. {
  258. tokens.Add(field.Substring(tokenStart, c - tokenStart));
  259. state = State.InWhiteSpace;
  260. continue;
  261. }
  262. if (state == State.OnOpeningQuote)
  263. {
  264. tokenStart = c;
  265. state = State.InQuotedToken;
  266. }
  267. if (state == State.OnClosingQuote)
  268. state = State.InWhiteSpace;
  269. continue;
  270. }
  271. if (field[c] == '\\')
  272. {
  273. if (state == State.InQuotedToken)
  274. {
  275. state = State.OnBackslash;
  276. continue;
  277. }
  278. }
  279. if (state == State.OnBackslash)
  280. {
  281. state = State.InQuotedToken;
  282. continue;
  283. }
  284. if (field[c] == '"' || field[c] == '\'' || field[c] == 0x201d)
  285. {
  286. if (state == State.InWhiteSpace)
  287. {
  288. quoteStart = field[c];
  289. state = State.OnOpeningQuote;
  290. continue;
  291. }
  292. if (state == State.InQuotedToken)
  293. {
  294. if (field[c] == quoteStart)
  295. {
  296. tokens.Add(field.Substring(tokenStart, c - tokenStart));
  297. state = State.OnClosingQuote;
  298. continue;
  299. }
  300. continue;
  301. }
  302. if (state == State.OnOpeningQuote)
  303. {
  304. if (field[c] == quoteStart)
  305. {
  306. state = State.OnClosingQuote;
  307. continue;
  308. }
  309. else
  310. {
  311. tokenStart = c;
  312. state = State.InQuotedToken;
  313. continue;
  314. }
  315. }
  316. continue;
  317. }
  318. if (state == State.InWhiteSpace)
  319. {
  320. tokenStart = c;
  321. state = State.InToken;
  322. continue;
  323. }
  324. if (state == State.OnOpeningQuote)
  325. {
  326. tokenStart = c;
  327. state = State.InQuotedToken;
  328. continue;
  329. }
  330. if (state == State.OnClosingQuote)
  331. {
  332. tokenStart = c;
  333. state = State.InToken;
  334. continue;
  335. }
  336. }
  337. if (state == State.InToken)
  338. tokens.Add(field.Substring(tokenStart, field.Length - tokenStart));
  339. return tokens.ToArray();
  340. }
  341. public static FieldInfo ParseField(string field)
  342. {
  343. FieldInfo emptyField = new FieldInfo
  344. {
  345. FieldType = "",
  346. Arguments = new string[] { },
  347. Switches = new string[] { },
  348. };
  349. if (field.Length == 0)
  350. return emptyField;
  351. string fieldType = field.TrimStart().Split(' ').FirstOrDefault();
  352. if (fieldType == null)
  353. return emptyField;
  354. if (fieldType.ToUpper() != "HYPERLINK" &&
  355. fieldType.ToUpper() != "REF" &&
  356. fieldType.ToUpper() != "SEQ" &&
  357. fieldType.ToUpper() != "STYLEREF")
  358. return emptyField;
  359. string[] tokens = GetTokens(field);
  360. if (tokens.Length == 0)
  361. return emptyField;
  362. FieldInfo fieldInfo = new FieldInfo()
  363. {
  364. FieldType = tokens[0],
  365. Switches = tokens.Where(t => t[0] == '\\').ToArray(),
  366. Arguments = tokens.Skip(1).Where(t => t[0] != '\\').ToArray(),
  367. };
  368. return fieldInfo;
  369. }
  370. public class FieldInfo
  371. {
  372. public string FieldType;
  373. public string[] Switches;
  374. public string[] Arguments;
  375. }
  376. public enum FieldElementTypeEnum
  377. {
  378. Begin,
  379. InstrText,
  380. Separate,
  381. Result,
  382. End,
  383. };
  384. public class FieldElementTypeInfo
  385. {
  386. public int Id;
  387. public FieldElementTypeEnum FieldElementType;
  388. }
  389. public class FieldElementTypeStack
  390. {
  391. public int Id;
  392. public Stack<FieldElementTypeInfo> FiStack;
  393. }
  394. }
  395. }