ListItemRetriever.cs 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169
  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. using System;
  4. using System.Collections.Generic;
  5. using System.Linq;
  6. using System.Text;
  7. using System.Xml.Linq;
  8. using DocumentFormat.OpenXml.Packaging;
  9. namespace OpenXmlPowerTools
  10. {
  11. public class ListItemRetrieverSettings
  12. {
  13. public static Dictionary<string, Func<string, int, string, string>> DefaultListItemTextImplementations =
  14. new Dictionary<string, Func<string, int, string, string>>()
  15. {
  16. {"fr-FR", ListItemTextGetter_fr_FR.GetListItemText},
  17. {"tr-TR", ListItemTextGetter_tr_TR.GetListItemText},
  18. {"ru-RU", ListItemTextGetter_ru_RU.GetListItemText},
  19. {"sv-SE", ListItemTextGetter_sv_SE.GetListItemText},
  20. {"zh-CN", ListItemTextGetter_zh_CN.GetListItemText},
  21. };
  22. public Dictionary<string, Func<string, int, string, string>> ListItemTextImplementations;
  23. public ListItemRetrieverSettings()
  24. {
  25. ListItemTextImplementations = DefaultListItemTextImplementations;
  26. }
  27. }
  28. public class ListItemRetriever
  29. {
  30. public class ListItemSourceSet
  31. {
  32. public int NumId; // numId from the paragraph or style
  33. public XElement Num; // num element from the numbering part
  34. public int AbstractNumId; // abstract numId
  35. public XElement AbstractNum; // abstractNum element
  36. public ListItemSourceSet(XDocument numXDoc, XDocument styleXDoc, int numId)
  37. {
  38. NumId = numId;
  39. Num = numXDoc
  40. .Root
  41. .Elements(W.num)
  42. .FirstOrDefault(n => (int)n.Attribute(W.numId) == numId);
  43. AbstractNumId = (int)Num
  44. .Elements(W.abstractNumId)
  45. .Attributes(W.val)
  46. .FirstOrDefault();
  47. AbstractNum = numXDoc
  48. .Root
  49. .Elements(W.abstractNum)
  50. .Where(e => (int)e.Attribute(W.abstractNumId) == AbstractNumId)
  51. .FirstOrDefault();
  52. }
  53. public int? StartOverride(int ilvl)
  54. {
  55. var lvlOverride = Num
  56. .Elements(W.lvlOverride)
  57. .FirstOrDefault(nlo => (int)nlo.Attribute(W.ilvl) == ilvl);
  58. if (lvlOverride != null)
  59. return (int?)lvlOverride
  60. .Elements(W.startOverride)
  61. .Attributes(W.val)
  62. .FirstOrDefault();
  63. return null;
  64. }
  65. public XElement OverrideLvl(int ilvl)
  66. {
  67. var lvlOverride = Num
  68. .Elements(W.lvlOverride)
  69. .FirstOrDefault(nlo => (int)nlo.Attribute(W.ilvl) == ilvl);
  70. if (lvlOverride != null)
  71. return lvlOverride.Element(W.lvl);
  72. return null;
  73. }
  74. public XElement AbstractLvl(int ilvl)
  75. {
  76. return AbstractNum
  77. .Elements(W.lvl)
  78. .FirstOrDefault(al => (int)al.Attribute(W.ilvl) == ilvl);
  79. }
  80. public XElement Lvl(int ilvl)
  81. {
  82. var overrideLvl = OverrideLvl(ilvl);
  83. if (overrideLvl != null)
  84. return overrideLvl;
  85. return AbstractLvl(ilvl);
  86. }
  87. }
  88. public class ListItemSource
  89. {
  90. public ListItemSourceSet Main;
  91. public string NumStyleLinkName;
  92. public ListItemSourceSet NumStyleLink;
  93. public int Style_ilvl;
  94. // for list item sources that use numStyleLink, there are two abstractId values.
  95. // The abstractId that is use is in num->abstractNum->numStyleLink->style->num->abstractNum
  96. public ListItemSource(XDocument numXDoc, XDocument stylesXDoc, int numId)
  97. {
  98. Main = new ListItemSourceSet(numXDoc, stylesXDoc, numId);
  99. NumStyleLinkName = (string)Main
  100. .AbstractNum
  101. .Elements(W.numStyleLink)
  102. .Attributes(W.val)
  103. .FirstOrDefault();
  104. if (NumStyleLinkName != null)
  105. {
  106. var numStyleLinkNumId = (int?)stylesXDoc
  107. .Root
  108. .Elements(W.style)
  109. .Where(s => (string)s.Attribute(W.styleId) == NumStyleLinkName)
  110. .Elements(W.pPr)
  111. .Elements(W.numPr)
  112. .Elements(W.numId)
  113. .Attributes(W.val)
  114. .FirstOrDefault();
  115. if (numStyleLinkNumId != null)
  116. NumStyleLink = new ListItemSourceSet(numXDoc, stylesXDoc, (int)numStyleLinkNumId);
  117. }
  118. }
  119. public XElement Lvl(int ilvl)
  120. {
  121. if (NumStyleLink != null)
  122. {
  123. var lvl = NumStyleLink.Lvl(ilvl);
  124. if (lvl == null)
  125. {
  126. for (int i = ilvl - 1; i >= 0; i--)
  127. {
  128. lvl = NumStyleLink.Lvl(i);
  129. if (lvl != null)
  130. break;
  131. }
  132. }
  133. return lvl;
  134. }
  135. var lvl2 = Main.Lvl(ilvl);
  136. if (lvl2 == null)
  137. {
  138. for (int i = ilvl - 1; i >= 0; i--)
  139. {
  140. lvl2 = Main.Lvl(i);
  141. if (lvl2 != null)
  142. break;
  143. }
  144. }
  145. return lvl2;
  146. }
  147. public int? StartOverride(int ilvl)
  148. {
  149. if (NumStyleLink != null)
  150. {
  151. var startOverride = NumStyleLink.StartOverride(ilvl);
  152. if (startOverride != null)
  153. return startOverride;
  154. }
  155. return Main.StartOverride(ilvl);
  156. }
  157. public int Start(int ilvl)
  158. {
  159. var lvl = Lvl(ilvl);
  160. var start = (int?)lvl.Elements(W.start).Attributes(W.val).FirstOrDefault();
  161. if (start != null)
  162. return (int)start;
  163. return 0;
  164. }
  165. public int AbstractNumId
  166. {
  167. get
  168. {
  169. return Main.AbstractNumId;
  170. }
  171. }
  172. }
  173. public class ListItemInfo
  174. {
  175. public bool IsListItem;
  176. public bool IsZeroNumId;
  177. public ListItemSource FromStyle;
  178. public ListItemSource FromParagraph;
  179. private int? mAbstractNumId = null;
  180. public int? AbstractNumId
  181. {
  182. get
  183. {
  184. // note: this property does not get NumStyleLinkAbstractNumId
  185. // it presumes that we are only interested in AbstractNumId
  186. // however, it is easy enough to change if necessary
  187. if (mAbstractNumId != null)
  188. return mAbstractNumId;
  189. if (FromParagraph != null)
  190. mAbstractNumId = FromParagraph.AbstractNumId;
  191. else if (FromStyle != null)
  192. mAbstractNumId = FromStyle.AbstractNumId;
  193. return mAbstractNumId;
  194. }
  195. }
  196. public XElement Lvl(int ilvl)
  197. {
  198. if (FromParagraph != null)
  199. {
  200. var lvl = FromParagraph.Lvl(ilvl);
  201. if (lvl == null)
  202. {
  203. for (int i = ilvl - 1; i >= 0; i--)
  204. {
  205. lvl = FromParagraph.Lvl(i);
  206. if (lvl != null)
  207. break;
  208. }
  209. }
  210. return lvl;
  211. }
  212. var lvl2 = FromStyle.Lvl(ilvl);
  213. if (lvl2 == null)
  214. {
  215. for (int i = ilvl - 1; i >= 0; i--)
  216. {
  217. lvl2 = FromParagraph.Lvl(i);
  218. if (lvl2 != null)
  219. break;
  220. }
  221. }
  222. return lvl2;
  223. }
  224. public int Start(int ilvl)
  225. {
  226. if (FromParagraph != null)
  227. return FromParagraph.Start(ilvl);
  228. return FromStyle.Start(ilvl);
  229. }
  230. public int Start(int ilvl, bool takeOverride, out bool isOverride)
  231. {
  232. if (FromParagraph != null)
  233. {
  234. if (takeOverride)
  235. {
  236. var startOverride = FromParagraph.StartOverride(ilvl);
  237. if (startOverride != null)
  238. {
  239. isOverride = true;
  240. return (int)startOverride;
  241. }
  242. }
  243. isOverride = false;
  244. return FromParagraph.Start(ilvl);
  245. }
  246. else if (this.FromStyle != null)
  247. {
  248. if (takeOverride)
  249. {
  250. var startOverride = FromStyle.StartOverride(ilvl);
  251. if (startOverride != null)
  252. {
  253. isOverride = true;
  254. return (int)startOverride;
  255. }
  256. }
  257. isOverride = false;
  258. return FromStyle.Start(ilvl);
  259. }
  260. isOverride = false;
  261. return 0;
  262. }
  263. public int? StartOverride(int ilvl)
  264. {
  265. if (FromParagraph != null)
  266. {
  267. var startOverride = FromParagraph.StartOverride(ilvl);
  268. if (startOverride != null)
  269. return (int)startOverride;
  270. return null;
  271. }
  272. else if (this.FromStyle != null)
  273. {
  274. var startOverride = FromStyle.StartOverride(ilvl);
  275. if (startOverride != null)
  276. return (int)startOverride;
  277. return null;
  278. }
  279. return null;
  280. }
  281. private int? mNumId;
  282. public int NumId
  283. {
  284. get
  285. {
  286. if (mNumId != null)
  287. return (int)mNumId;
  288. if (FromParagraph != null)
  289. mNumId = FromParagraph.Main.NumId;
  290. else if (FromStyle != null)
  291. mNumId = FromStyle.Main.NumId;
  292. return (int)mNumId;
  293. }
  294. }
  295. public ListItemInfo() { }
  296. public ListItemInfo(bool isListItem, bool isZeroNumId)
  297. {
  298. IsListItem = isListItem;
  299. IsZeroNumId = isZeroNumId;
  300. }
  301. }
  302. public static void SetParagraphLevel(XElement paragraph, int ilvl)
  303. {
  304. var pi = paragraph.Annotation<ParagraphInfo>();
  305. if (pi == null)
  306. {
  307. pi = new ParagraphInfo()
  308. {
  309. Ilvl = ilvl,
  310. };
  311. paragraph.AddAnnotation(pi);
  312. return;
  313. }
  314. throw new OpenXmlPowerToolsException("Internal error - should never set ilvl more than once.");
  315. }
  316. public static int GetParagraphLevel(XElement paragraph)
  317. {
  318. var pi = paragraph.Annotation<ParagraphInfo>();
  319. if (pi != null)
  320. return pi.Ilvl;
  321. throw new OpenXmlPowerToolsException("Internal error - should never ask for ilvl without it first being set.");
  322. }
  323. public static ListItemInfo GetListItemInfo(XDocument numXDoc, XDocument stylesXDoc, XElement paragraph)
  324. {
  325. // The following is an optimization - only determine ListItemInfo once for a
  326. // paragraph.
  327. ListItemInfo listItemInfo = paragraph.Annotation<ListItemInfo>();
  328. if (listItemInfo != null)
  329. return listItemInfo;
  330. throw new OpenXmlPowerToolsException("Attempting to retrieve ListItemInfo before initialization");
  331. }
  332. private static ListItemInfo NotAListItem = new ListItemInfo(false, true);
  333. private static ListItemInfo ZeroNumId = new ListItemInfo(false, false);
  334. public static void InitListItemInfo(XDocument numXDoc, XDocument stylesXDoc, XElement paragraph)
  335. {
  336. if (FirstRunIsEmptySectionBreak(paragraph))
  337. {
  338. paragraph.AddAnnotation(NotAListItem);
  339. return;
  340. }
  341. int? paragraphNumId = null;
  342. XElement paragraphNumberingProperties = paragraph
  343. .Elements(W.pPr)
  344. .Elements(W.numPr)
  345. .FirstOrDefault();
  346. if (paragraphNumberingProperties != null)
  347. {
  348. paragraphNumId = (int?)paragraphNumberingProperties
  349. .Elements(W.numId)
  350. .Attributes(W.val)
  351. .FirstOrDefault();
  352. // if numPr of paragraph does not contain numId, then it is not a list item.
  353. // if numId of paragraph == 0, then this is not a list item, regardless of the markup in the style.
  354. if (paragraphNumId == null || paragraphNumId == 0)
  355. {
  356. paragraph.AddAnnotation(NotAListItem);
  357. return;
  358. }
  359. }
  360. string paragraphStyleName = GetParagraphStyleName(stylesXDoc, paragraph);
  361. var listItemInfo = GetListItemInfoFromCache(numXDoc, paragraphStyleName, paragraphNumId);
  362. if (listItemInfo != null)
  363. {
  364. paragraph.AddAnnotation(listItemInfo);
  365. if (listItemInfo.FromParagraph != null)
  366. {
  367. var para_ilvl = (int?)paragraphNumberingProperties
  368. .Elements(W.ilvl)
  369. .Attributes(W.val)
  370. .FirstOrDefault();
  371. if (para_ilvl == null)
  372. para_ilvl = 0;
  373. var abstractNum = listItemInfo.FromParagraph.Main.AbstractNum;
  374. var multiLevelType = (string)abstractNum.Elements(W.multiLevelType).Attributes(W.val).FirstOrDefault();
  375. if (multiLevelType == "singleLevel")
  376. para_ilvl = 0;
  377. SetParagraphLevel(paragraph, (int)para_ilvl);
  378. }
  379. else if (listItemInfo.FromStyle != null)
  380. {
  381. int this_ilvl = listItemInfo.FromStyle.Style_ilvl;
  382. var abstractNum = listItemInfo.FromStyle.Main.AbstractNum;
  383. var multiLevelType = (string)abstractNum.Elements(W.multiLevelType).Attributes(W.val).FirstOrDefault();
  384. if (multiLevelType == "singleLevel")
  385. this_ilvl = 0;
  386. SetParagraphLevel(paragraph, this_ilvl);
  387. }
  388. return;
  389. }
  390. listItemInfo = new ListItemInfo();
  391. int? style_ilvl = null;
  392. bool? styleZeroNumId = null;
  393. if (paragraphStyleName != null)
  394. {
  395. listItemInfo.FromStyle = InitializeStyleListItemSource(numXDoc, stylesXDoc, paragraph, paragraphStyleName,
  396. out style_ilvl, out styleZeroNumId);
  397. }
  398. int? paragraph_ilvl = null;
  399. bool? paragraphZeroNumId = null;
  400. if (paragraphNumberingProperties != null && paragraphNumberingProperties.Element(W.numId) != null)
  401. {
  402. listItemInfo.FromParagraph = InitializeParagraphListItemSource(numXDoc, stylesXDoc, paragraph, paragraphNumberingProperties, out paragraph_ilvl, out paragraphZeroNumId);
  403. }
  404. if (styleZeroNumId == true && paragraphZeroNumId == null ||
  405. paragraphZeroNumId == true)
  406. {
  407. paragraph.AddAnnotation(NotAListItem);
  408. AddListItemInfoIntoCache(numXDoc, paragraphStyleName, paragraphNumId, NotAListItem);
  409. return;
  410. }
  411. int ilvlToSet = 0;
  412. if (paragraph_ilvl != null)
  413. ilvlToSet = (int)paragraph_ilvl;
  414. else if (style_ilvl != null)
  415. ilvlToSet = (int)style_ilvl;
  416. if (listItemInfo.FromParagraph != null)
  417. {
  418. var abstractNum = listItemInfo.FromParagraph.Main.AbstractNum;
  419. var multiLevelType = (string)abstractNum.Elements(W.multiLevelType).Attributes(W.val).FirstOrDefault();
  420. if (multiLevelType == "singleLevel")
  421. ilvlToSet = 0;
  422. }
  423. else if (listItemInfo.FromStyle != null)
  424. {
  425. var abstractNum = listItemInfo.FromStyle.Main.AbstractNum;
  426. var multiLevelType = (string)abstractNum.Elements(W.multiLevelType).Attributes(W.val).FirstOrDefault();
  427. if (multiLevelType == "singleLevel")
  428. ilvlToSet = 0;
  429. }
  430. SetParagraphLevel(paragraph, ilvlToSet);
  431. listItemInfo.IsListItem = listItemInfo.FromStyle != null || listItemInfo.FromParagraph != null;
  432. paragraph.AddAnnotation(listItemInfo);
  433. AddListItemInfoIntoCache(numXDoc, paragraphStyleName, paragraphNumId, listItemInfo);
  434. }
  435. private static string GetParagraphStyleName(XDocument stylesXDoc, XElement paragraph)
  436. {
  437. var paragraphStyleName = (string)paragraph
  438. .Elements(W.pPr)
  439. .Elements(W.pStyle)
  440. .Attributes(W.val)
  441. .FirstOrDefault();
  442. if (paragraphStyleName == null)
  443. paragraphStyleName = GetDefaultParagraphStyleName(stylesXDoc);
  444. return paragraphStyleName;
  445. }
  446. private static bool FirstRunIsEmptySectionBreak(XElement paragraph)
  447. {
  448. var firstRun = paragraph
  449. .DescendantsTrimmed(W.txbxContent)
  450. .Where(d => d.Name == W.r)
  451. .FirstOrDefault();
  452. var hasTextElement = paragraph
  453. .DescendantsTrimmed(W.txbxContent)
  454. .Where(d => d.Name == W.r)
  455. .Elements(W.t)
  456. .Any();
  457. if (firstRun == null || !hasTextElement)
  458. {
  459. if (paragraph
  460. .Elements(W.pPr)
  461. .Elements(W.sectPr)
  462. .Any())
  463. return true;
  464. }
  465. return false;
  466. }
  467. private static ListItemSource InitializeParagraphListItemSource(XDocument numXDoc, XDocument stylesXDoc, XElement paragraph, XElement paragraphNumberingProperties, out int? ilvl, out bool? zeroNumId)
  468. {
  469. zeroNumId = null;
  470. // Paragraph numbering properties must contain a numId.
  471. int? numId = (int?)paragraphNumberingProperties
  472. .Elements(W.numId)
  473. .Attributes(W.val)
  474. .FirstOrDefault();
  475. ilvl = (int?)paragraphNumberingProperties
  476. .Elements(W.ilvl)
  477. .Attributes(W.val)
  478. .FirstOrDefault();
  479. if (numId == null)
  480. {
  481. zeroNumId = true;
  482. return null;
  483. }
  484. var num = numXDoc
  485. .Root
  486. .Elements(W.num)
  487. .FirstOrDefault(n => (int)n.Attribute(W.numId) == numId);
  488. if (num == null)
  489. {
  490. zeroNumId = true;
  491. return null;
  492. }
  493. zeroNumId = false;
  494. if (ilvl == null)
  495. ilvl = 0;
  496. ListItemSource listItemSource = new ListItemSource(numXDoc, stylesXDoc, (int)numId);
  497. return listItemSource;
  498. }
  499. private static ListItemSource InitializeStyleListItemSource(XDocument numXDoc, XDocument stylesXDoc, XElement paragraph, string paragraphStyleName,
  500. out int? ilvl, out bool? zeroNumId)
  501. {
  502. zeroNumId = null;
  503. XElement pPr = FormattingAssembler.ParagraphStyleRollup(paragraph, stylesXDoc, GetDefaultParagraphStyleName(stylesXDoc));
  504. if (pPr != null)
  505. {
  506. XElement styleNumberingProperties = pPr
  507. .Elements(W.numPr)
  508. .FirstOrDefault();
  509. if (styleNumberingProperties != null && styleNumberingProperties.Element(W.numId) != null)
  510. {
  511. int numId = (int)styleNumberingProperties
  512. .Elements(W.numId)
  513. .Attributes(W.val)
  514. .FirstOrDefault();
  515. ilvl = (int?)styleNumberingProperties
  516. .Elements(W.ilvl)
  517. .Attributes(W.val)
  518. .FirstOrDefault();
  519. if (ilvl == null)
  520. ilvl = 0;
  521. if (numId == 0)
  522. {
  523. zeroNumId = true;
  524. return null;
  525. }
  526. // make sure that the numId is valid
  527. XElement num = numXDoc
  528. .Root
  529. .Elements(W.num)
  530. .Where(e => (int)e.Attribute(W.numId) == numId)
  531. .FirstOrDefault();
  532. if (num == null)
  533. {
  534. zeroNumId = true;
  535. return null;
  536. }
  537. ListItemSource listItemSource = new ListItemSource(numXDoc, stylesXDoc, numId);
  538. listItemSource.Style_ilvl = (int)ilvl;
  539. zeroNumId = false;
  540. return listItemSource;
  541. }
  542. }
  543. ilvl = null;
  544. return null;
  545. }
  546. private static string GetDefaultParagraphStyleName(XDocument stylesXDoc)
  547. {
  548. XElement defaultParagraphStyle;
  549. string defaultParagraphStyleName = null;
  550. StylesInfo stylesInfo = stylesXDoc.Annotation<StylesInfo>();
  551. if (stylesInfo != null)
  552. defaultParagraphStyleName = stylesInfo.DefaultParagraphStyleName;
  553. else
  554. {
  555. defaultParagraphStyle = stylesXDoc
  556. .Root
  557. .Elements(W.style)
  558. .FirstOrDefault(s =>
  559. {
  560. if ((string)s.Attribute(W.type) != "paragraph")
  561. return false;
  562. var defaultAttribute = s.Attribute(W._default);
  563. var isDefault = false;
  564. if (defaultAttribute != null &&
  565. (bool)s.Attribute(W._default).ToBoolean())
  566. isDefault = true;
  567. return isDefault;
  568. });
  569. defaultParagraphStyleName = null;
  570. if (defaultParagraphStyle != null)
  571. defaultParagraphStyleName = (string)defaultParagraphStyle.Attribute(W.styleId);
  572. stylesInfo = new StylesInfo()
  573. {
  574. DefaultParagraphStyleName = defaultParagraphStyleName,
  575. };
  576. stylesXDoc.AddAnnotation(stylesInfo);
  577. }
  578. return defaultParagraphStyleName;
  579. }
  580. private static ListItemInfo GetListItemInfoFromCache(XDocument numXDoc, string styleName, int? numId)
  581. {
  582. string key =
  583. (styleName == null ? "" : styleName) +
  584. "|" +
  585. (numId == null ? "" : numId.ToString());
  586. var numXDocRoot = numXDoc.Root;
  587. Dictionary<string, ListItemInfo> listItemInfoCache =
  588. numXDocRoot.Annotation<Dictionary<string, ListItemInfo>>();
  589. if (listItemInfoCache == null)
  590. {
  591. listItemInfoCache = new Dictionary<string, ListItemInfo>();
  592. numXDocRoot.AddAnnotation(listItemInfoCache);
  593. }
  594. if (listItemInfoCache.ContainsKey(key))
  595. return listItemInfoCache[key];
  596. return null;
  597. }
  598. private static void AddListItemInfoIntoCache(XDocument numXDoc, string styleName, int? numId, ListItemInfo listItemInfo)
  599. {
  600. string key =
  601. (styleName == null ? "" : styleName) +
  602. "|" +
  603. (numId == null ? "" : numId.ToString());
  604. var numXDocRoot = numXDoc.Root;
  605. Dictionary<string, ListItemInfo> listItemInfoCache =
  606. numXDocRoot.Annotation<Dictionary<string, ListItemInfo>>();
  607. if (listItemInfoCache == null)
  608. {
  609. listItemInfoCache = new Dictionary<string, ListItemInfo>();
  610. numXDocRoot.AddAnnotation(listItemInfoCache);
  611. }
  612. if (!listItemInfoCache.ContainsKey(key))
  613. listItemInfoCache.Add(key, listItemInfo);
  614. }
  615. public class LevelNumbers
  616. {
  617. public int[] LevelNumbersArray;
  618. }
  619. private class StylesInfo
  620. {
  621. public string DefaultParagraphStyleName;
  622. }
  623. private class ParagraphInfo
  624. {
  625. public int Ilvl;
  626. }
  627. private class ReverseAxis
  628. {
  629. public XElement PreviousParagraph;
  630. }
  631. public static string RetrieveListItem(WordprocessingDocument wordDoc, XElement paragraph)
  632. {
  633. return RetrieveListItem(wordDoc, paragraph, null);
  634. }
  635. public static string RetrieveListItem(WordprocessingDocument wordDoc, XElement paragraph, ListItemRetrieverSettings settings)
  636. {
  637. if (wordDoc.MainDocumentPart.NumberingDefinitionsPart == null)
  638. return null;
  639. var listItemInfo = paragraph.Annotation<ListItemInfo>();
  640. if (listItemInfo == null)
  641. InitializeListItemRetriever(wordDoc, settings);
  642. listItemInfo = paragraph.Annotation<ListItemInfo>();
  643. if (!listItemInfo.IsListItem)
  644. return null;
  645. var numberingDefinitionsPart = wordDoc
  646. .MainDocumentPart
  647. .NumberingDefinitionsPart;
  648. if (numberingDefinitionsPart == null)
  649. return null;
  650. StyleDefinitionsPart styleDefinitionsPart = wordDoc
  651. .MainDocumentPart
  652. .StyleDefinitionsPart;
  653. if (styleDefinitionsPart == null)
  654. return null;
  655. var numXDoc = numberingDefinitionsPart.GetXDocument();
  656. var stylesXDoc = styleDefinitionsPart.GetXDocument();
  657. var lvl = listItemInfo.Lvl(GetParagraphLevel(paragraph));
  658. string lvlText = (string)lvl.Elements(W.lvlText).Attributes(W.val).FirstOrDefault();
  659. if (lvlText == null)
  660. return null;
  661. var levelNumbersAnnotation = paragraph.Annotation<LevelNumbers>();
  662. if (levelNumbersAnnotation == null)
  663. throw new OpenXmlPowerToolsException("Internal error");
  664. int[] levelNumbers = levelNumbersAnnotation.LevelNumbersArray;
  665. string languageIdentifier = GetLanguageIdentifier(paragraph, stylesXDoc);
  666. string listItem = FormatListItem(listItemInfo, levelNumbers, GetParagraphLevel(paragraph),
  667. lvlText, stylesXDoc, languageIdentifier, settings);
  668. return listItem;
  669. }
  670. private static string GetLanguageIdentifier(XElement paragraph, XDocument stylesXDoc)
  671. {
  672. var languageType = (string)paragraph
  673. .DescendantsTrimmed(W.txbxContent)
  674. .Where(d => d.Name == W.r)
  675. .Attributes(PtOpenXml.LanguageType)
  676. .FirstOrDefault();
  677. string languageIdentifier = null;
  678. if (languageType == null || languageType == "western")
  679. {
  680. languageIdentifier = (string)paragraph
  681. .Elements(W.r)
  682. .Elements(W.rPr)
  683. .Elements(W.lang)
  684. .Attributes(W.val)
  685. .FirstOrDefault();
  686. if (languageIdentifier == null)
  687. languageIdentifier = (string)stylesXDoc
  688. .Root
  689. .Elements(W.docDefaults)
  690. .Elements(W.rPrDefault)
  691. .Elements(W.rPr)
  692. .Elements(W.lang)
  693. .Attributes(W.val)
  694. .FirstOrDefault();
  695. }
  696. else if (languageType == "eastAsia")
  697. {
  698. languageIdentifier = (string)paragraph
  699. .Elements(W.r)
  700. .Elements(W.rPr)
  701. .Elements(W.lang)
  702. .Attributes(W.eastAsia)
  703. .FirstOrDefault();
  704. if (languageIdentifier == null)
  705. languageIdentifier = (string)stylesXDoc
  706. .Root
  707. .Elements(W.docDefaults)
  708. .Elements(W.rPrDefault)
  709. .Elements(W.rPr)
  710. .Elements(W.lang)
  711. .Attributes(W.eastAsia)
  712. .FirstOrDefault();
  713. }
  714. else if (languageType == "bidi")
  715. {
  716. languageIdentifier = (string)paragraph
  717. .Elements(W.r)
  718. .Elements(W.rPr)
  719. .Elements(W.lang)
  720. .Attributes(W.bidi)
  721. .FirstOrDefault();
  722. if (languageIdentifier == null)
  723. languageIdentifier = (string)stylesXDoc
  724. .Root
  725. .Elements(W.docDefaults)
  726. .Elements(W.rPrDefault)
  727. .Elements(W.rPr)
  728. .Elements(W.lang)
  729. .Attributes(W.bidi)
  730. .FirstOrDefault();
  731. }
  732. if (languageIdentifier == null)
  733. languageIdentifier = "en-US";
  734. return languageIdentifier;
  735. }
  736. private static void InitializeListItemRetriever(WordprocessingDocument wordDoc, ListItemRetrieverSettings settings)
  737. {
  738. foreach (var part in wordDoc.ContentParts())
  739. InitializeListItemRetrieverForPart(wordDoc, part, settings);
  740. #if false
  741. foreach (var part in wordDoc.ContentParts())
  742. {
  743. var xDoc = part.GetXDocument();
  744. var paras = xDoc
  745. .Descendants(W.p)
  746. .Where(p =>
  747. p.Annotation<ListItemInfo>() == null);
  748. if (paras.Any())
  749. Console.WriteLine("Error");
  750. }
  751. #endif
  752. }
  753. private static void InitializeListItemRetrieverForPart(WordprocessingDocument wordDoc, OpenXmlPart part, ListItemRetrieverSettings settings)
  754. {
  755. var mainXDoc = part.GetXDocument();
  756. var numPart = wordDoc.MainDocumentPart.NumberingDefinitionsPart;
  757. if (numPart == null)
  758. return;
  759. var numXDoc = numPart.GetXDocument();
  760. var stylesPart = wordDoc.MainDocumentPart.StyleDefinitionsPart;
  761. if (stylesPart == null)
  762. return;
  763. var stylesXDoc = stylesPart.GetXDocument();
  764. var rootNode = mainXDoc.Root;
  765. InitializeListItemRetrieverForStory(numXDoc, stylesXDoc, rootNode);
  766. var textBoxes = mainXDoc
  767. .Root
  768. .Descendants(W.txbxContent);
  769. foreach (var textBox in textBoxes)
  770. InitializeListItemRetrieverForStory(numXDoc, stylesXDoc, textBox);
  771. }
  772. private static void InitializeListItemRetrieverForStory(XDocument numXDoc, XDocument stylesXDoc, XElement rootNode)
  773. {
  774. var paragraphs = rootNode
  775. .DescendantsTrimmed(W.txbxContent)
  776. .Where(p => p.Name == W.p);
  777. foreach (var paragraph in paragraphs)
  778. InitListItemInfo(numXDoc, stylesXDoc, paragraph);
  779. var abstractNumIds = paragraphs
  780. .Select(paragraph =>
  781. {
  782. ListItemInfo listItemInfo = paragraph.Annotation<ListItemInfo>();
  783. if (!listItemInfo.IsListItem)
  784. return (int?)null;
  785. return listItemInfo.AbstractNumId;
  786. })
  787. .Where(a => a != null)
  788. .Distinct()
  789. .ToList();
  790. // when debugging, it is sometimes useful to cause processing of a specific abstractNumId first.
  791. // the following code enables this.
  792. //int? abstractIdToProcessFirst = null;
  793. //if (abstractIdToProcessFirst != null)
  794. //{
  795. // abstractNumIds = (new[] { abstractIdToProcessFirst })
  796. // .Concat(abstractNumIds.Where(ani => ani != abstractIdToProcessFirst))
  797. // .ToList();
  798. //}
  799. foreach (var abstractNumId in abstractNumIds)
  800. {
  801. var listItems = paragraphs
  802. .Where(paragraph =>
  803. {
  804. var listItemInfo = paragraph.Annotation<ListItemInfo>();
  805. if (!listItemInfo.IsListItem)
  806. return false;
  807. return listItemInfo.AbstractNumId == abstractNumId;
  808. })
  809. .ToList();
  810. // annotate paragraphs with previous paragraphs so that we can look backwards with good perf
  811. XElement prevParagraph = null;
  812. foreach (var paragraph in listItems)
  813. {
  814. ReverseAxis reverse = new ReverseAxis()
  815. {
  816. PreviousParagraph = prevParagraph,
  817. };
  818. paragraph.AddAnnotation(reverse);
  819. prevParagraph = paragraph;
  820. }
  821. var startOverrideAlreadyUsed = new List<int>();
  822. List<int> previous = null;
  823. ListItemInfo[] listItemInfoInEffectForStartOverride = new ListItemInfo[] {
  824. null,
  825. null,
  826. null,
  827. null,
  828. null,
  829. null,
  830. null,
  831. null,
  832. null,
  833. null,
  834. };
  835. foreach (var paragraph in listItems)
  836. {
  837. var listItemInfo = paragraph.Annotation<ListItemInfo>();
  838. var ilvl = GetParagraphLevel(paragraph);
  839. listItemInfoInEffectForStartOverride[ilvl] = listItemInfo;
  840. ListItemInfo listItemInfoInEffect = null;
  841. if (ilvl > 0)
  842. listItemInfoInEffect = listItemInfoInEffectForStartOverride[ilvl - 1];
  843. var levelNumbers = new List<int>();
  844. for (int level = 0; level <= ilvl; level++)
  845. {
  846. var numId = listItemInfo.NumId;
  847. var startOverride = listItemInfo.StartOverride(level);
  848. int? inEffectStartOverride = null;
  849. if (listItemInfoInEffect != null)
  850. inEffectStartOverride = listItemInfoInEffect.StartOverride(level);
  851. if (level == ilvl)
  852. {
  853. var lvl = listItemInfo.Lvl(ilvl);
  854. var lvlRestart = (int?)lvl.Elements(W.lvlRestart).Attributes(W.val).FirstOrDefault();
  855. if (lvlRestart != null)
  856. {
  857. var previousPara = PreviousParagraphsForLvlRestart(paragraph, (int)lvlRestart)
  858. .FirstOrDefault(p =>
  859. {
  860. var plvl = GetParagraphLevel(p);
  861. return plvl == ilvl;
  862. });
  863. if (previousPara != null)
  864. previous = previousPara.Annotation<LevelNumbers>().LevelNumbersArray.ToList();
  865. }
  866. }
  867. if (previous == null ||
  868. level >= previous.Count() ||
  869. (level == ilvl && startOverride != null && !startOverrideAlreadyUsed.Contains(numId)))
  870. {
  871. if (previous == null || level >= previous.Count())
  872. {
  873. var start = listItemInfo.Start(level);
  874. // only look at startOverride if the level that we're examining is same as the paragraph's level.
  875. if (level == ilvl)
  876. {
  877. if (startOverride != null && !startOverrideAlreadyUsed.Contains(numId))
  878. {
  879. startOverrideAlreadyUsed.Add(numId);
  880. start = (int)startOverride;
  881. }
  882. else
  883. {
  884. if (startOverride != null)
  885. start = (int)startOverride;
  886. if (inEffectStartOverride != null && inEffectStartOverride > start)
  887. start = (int)inEffectStartOverride;
  888. }
  889. }
  890. levelNumbers.Add(start);
  891. }
  892. else
  893. {
  894. var start = listItemInfo.Start(level);
  895. // only look at startOverride if the level that we're examining is same as the paragraph's level.
  896. if (level == ilvl)
  897. {
  898. if (startOverride != null)
  899. {
  900. if (!startOverrideAlreadyUsed.Contains(numId))
  901. {
  902. startOverrideAlreadyUsed.Add(numId);
  903. start = (int)startOverride;
  904. }
  905. }
  906. }
  907. levelNumbers.Add(start);
  908. }
  909. }
  910. else
  911. {
  912. int? thisNumber = null;
  913. if (level == ilvl)
  914. {
  915. if (startOverride != null)
  916. {
  917. if (!startOverrideAlreadyUsed.Contains(numId))
  918. {
  919. startOverrideAlreadyUsed.Add(numId);
  920. thisNumber = (int)startOverride;
  921. }
  922. thisNumber = previous.ElementAt(level) + 1;
  923. }
  924. else
  925. {
  926. thisNumber = previous.ElementAt(level) + 1;
  927. }
  928. }
  929. else
  930. {
  931. thisNumber = previous.ElementAt(level);
  932. }
  933. levelNumbers.Add((int)thisNumber);
  934. }
  935. }
  936. var levelNumbersAnno = new LevelNumbers()
  937. {
  938. LevelNumbersArray = levelNumbers.ToArray()
  939. };
  940. paragraph.AddAnnotation(levelNumbersAnno);
  941. previous = levelNumbers;
  942. }
  943. }
  944. }
  945. private static IEnumerable<XElement> PreviousParagraphsForLvlRestart(XElement paragraph, int ilvl)
  946. {
  947. var current = paragraph;
  948. while (true)
  949. {
  950. var ra = current.Annotation<ReverseAxis>();
  951. if (ra == null || ra.PreviousParagraph == null)
  952. yield break;
  953. var raLvl = GetParagraphLevel(ra.PreviousParagraph);
  954. if (raLvl < ilvl)
  955. yield break;
  956. yield return ra.PreviousParagraph;
  957. current = ra.PreviousParagraph;
  958. }
  959. }
  960. private static string FormatListItem(ListItemInfo lii, int[] levelNumbers, int ilvl,
  961. string lvlText, XDocument styles, string languageCultureName, ListItemRetrieverSettings settings)
  962. {
  963. string[] formatTokens = GetFormatTokens(lvlText).ToArray();
  964. XElement lvl = lii.Lvl(ilvl);
  965. bool isLgl = lvl.Elements(W.isLgl).Any();
  966. string listItem = formatTokens.Select((t, l) =>
  967. {
  968. if (t.Substring(0, 1) != "%")
  969. return t;
  970. int indentationLevel;
  971. if (!Int32.TryParse(t.Substring(1), out indentationLevel))
  972. return t;
  973. indentationLevel -= 1;
  974. if (indentationLevel >= levelNumbers.Length)
  975. indentationLevel = levelNumbers.Length - 1;
  976. int levelNumber = levelNumbers[indentationLevel];
  977. string levelText = null;
  978. XElement rlvl = lii.Lvl(indentationLevel);
  979. string numFmtForLevel = (string)rlvl.Elements(W.numFmt).Attributes(W.val).FirstOrDefault();
  980. if (numFmtForLevel == null)
  981. {
  982. var numFmtElement = rlvl.Elements(MC.AlternateContent).Elements(MC.Choice).Elements(W.numFmt).FirstOrDefault();
  983. if (numFmtElement != null && (string)numFmtElement.Attribute(W.val) == "custom")
  984. numFmtForLevel = (string)numFmtElement.Attribute(W.format);
  985. }
  986. if (numFmtForLevel != "none")
  987. {
  988. if (isLgl && numFmtForLevel != "decimalZero")
  989. numFmtForLevel = "decimal";
  990. }
  991. if (languageCultureName != null && settings != null)
  992. {
  993. if (settings.ListItemTextImplementations.ContainsKey(languageCultureName))
  994. {
  995. var impl = settings.ListItemTextImplementations[languageCultureName];
  996. levelText = impl(languageCultureName, levelNumber, numFmtForLevel);
  997. }
  998. }
  999. if (levelText == null)
  1000. levelText = ListItemTextGetter_Default.GetListItemText(languageCultureName, levelNumber, numFmtForLevel);
  1001. return levelText;
  1002. }).StringConcatenate();
  1003. return listItem;
  1004. }
  1005. private static IEnumerable<string> GetFormatTokens(string lvlText)
  1006. {
  1007. int i = 0;
  1008. while (true)
  1009. {
  1010. if (i >= lvlText.Length)
  1011. yield break;
  1012. if (lvlText[i] == '%' && i <= lvlText.Length - 2)
  1013. {
  1014. yield return lvlText.Substring(i, 2);
  1015. i += 2;
  1016. continue;
  1017. }
  1018. int percentIndex = lvlText.IndexOf('%', i);
  1019. if (percentIndex == -1 || percentIndex > lvlText.Length - 2)
  1020. {
  1021. yield return lvlText.Substring(i);
  1022. yield break;
  1023. }
  1024. yield return lvlText.Substring(i, percentIndex - i);
  1025. yield return lvlText.Substring(percentIndex, 2);
  1026. i = percentIndex + 2;
  1027. }
  1028. }
  1029. }
  1030. }