ListItemRetriever.cs 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172
  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. using System;
  4. using System.Collections.Generic;
  5. using System.Linq;
  6. using System.Text;
  7. using System.Xml.Linq;
  8. using DocumentFormat.OpenXml.Packaging;
  9. namespace OpenXmlPowerTools
  10. {
  11. public class ListItemRetrieverSettings
  12. {
  13. public static Dictionary<string, Func<string, int, string, string>> DefaultListItemTextImplementations =
  14. new Dictionary<string, Func<string, int, string, string>>()
  15. {
  16. {"fr-FR", ListItemTextGetter_fr_FR.GetListItemText},
  17. {"tr-TR", ListItemTextGetter_tr_TR.GetListItemText},
  18. {"ru-RU", ListItemTextGetter_ru_RU.GetListItemText},
  19. {"sv-SE", ListItemTextGetter_sv_SE.GetListItemText},
  20. {"zh-CN", ListItemTextGetter_zh_CN.GetListItemText},
  21. };
  22. public Dictionary<string, Func<string, int, string, string>> ListItemTextImplementations;
  23. public ListItemRetrieverSettings()
  24. {
  25. ListItemTextImplementations = DefaultListItemTextImplementations;
  26. }
  27. }
  28. public class ListItemRetriever
  29. {
  30. public class ListItemSourceSet
  31. {
  32. public int NumId; // numId from the paragraph or style
  33. public XElement Num; // num element from the numbering part
  34. public int AbstractNumId; // abstract numId
  35. public XElement AbstractNum; // abstractNum element
  36. public ListItemSourceSet(XDocument numXDoc, XDocument styleXDoc, int numId)
  37. {
  38. NumId = numId;
  39. Num = numXDoc
  40. .Root
  41. .Elements(W.num)
  42. .FirstOrDefault(n => (int)n.Attribute(W.numId) == numId);
  43. AbstractNumId = (int)Num
  44. .Elements(W.abstractNumId)
  45. .Attributes(W.val)
  46. .FirstOrDefault();
  47. AbstractNum = numXDoc
  48. .Root
  49. .Elements(W.abstractNum)
  50. .Where(e => (int)e.Attribute(W.abstractNumId) == AbstractNumId)
  51. .FirstOrDefault();
  52. }
  53. public int? StartOverride(int ilvl)
  54. {
  55. var lvlOverride = Num
  56. .Elements(W.lvlOverride)
  57. .FirstOrDefault(nlo => (int)nlo.Attribute(W.ilvl) == ilvl);
  58. if (lvlOverride != null)
  59. return (int?)lvlOverride
  60. .Elements(W.startOverride)
  61. .Attributes(W.val)
  62. .FirstOrDefault();
  63. return null;
  64. }
  65. public XElement OverrideLvl(int ilvl)
  66. {
  67. var lvlOverride = Num
  68. .Elements(W.lvlOverride)
  69. .FirstOrDefault(nlo => (int)nlo.Attribute(W.ilvl) == ilvl);
  70. if (lvlOverride != null)
  71. return lvlOverride.Element(W.lvl);
  72. return null;
  73. }
  74. public XElement AbstractLvl(int ilvl)
  75. {
  76. return AbstractNum
  77. .Elements(W.lvl)
  78. .FirstOrDefault(al => (int)al.Attribute(W.ilvl) == ilvl);
  79. }
  80. public XElement Lvl(int ilvl)
  81. {
  82. var overrideLvl = OverrideLvl(ilvl);
  83. if (overrideLvl != null)
  84. return overrideLvl;
  85. return AbstractLvl(ilvl);
  86. }
  87. }
  88. public class ListItemSource
  89. {
  90. public ListItemSourceSet Main;
  91. public string NumStyleLinkName;
  92. public ListItemSourceSet NumStyleLink;
  93. public int Style_ilvl;
  94. // for list item sources that use numStyleLink, there are two abstractId values.
  95. // The abstractId that is use is in num->abstractNum->numStyleLink->style->num->abstractNum
  96. public ListItemSource(XDocument numXDoc, XDocument stylesXDoc, int numId)
  97. {
  98. Main = new ListItemSourceSet(numXDoc, stylesXDoc, numId);
  99. NumStyleLinkName = (string)Main
  100. .AbstractNum
  101. .Elements(W.numStyleLink)
  102. .Attributes(W.val)
  103. .FirstOrDefault();
  104. if (NumStyleLinkName != null)
  105. {
  106. var numStyleLinkNumId = (int?)stylesXDoc
  107. .Root
  108. .Elements(W.style)
  109. .Where(s => (string)s.Attribute(W.styleId) == NumStyleLinkName)
  110. .Elements(W.pPr)
  111. .Elements(W.numPr)
  112. .Elements(W.numId)
  113. .Attributes(W.val)
  114. .FirstOrDefault();
  115. if (numStyleLinkNumId != null)
  116. NumStyleLink = new ListItemSourceSet(numXDoc, stylesXDoc, (int)numStyleLinkNumId);
  117. }
  118. }
  119. public XElement Lvl(int ilvl)
  120. {
  121. var lvl2 = Main.Lvl(ilvl);
  122. if (lvl2 == null)
  123. {
  124. for (int i = ilvl - 1; i >= 0; i--)
  125. {
  126. lvl2 = Main.Lvl(i);
  127. if (lvl2 != null)
  128. break;
  129. }
  130. }
  131. if (lvl2 != null)
  132. return lvl2;
  133. if (NumStyleLink != null)
  134. {
  135. var lvl = NumStyleLink.Lvl(ilvl);
  136. if (lvl == null)
  137. {
  138. for (int i = ilvl - 1; i >= 0; i--)
  139. {
  140. lvl = NumStyleLink.Lvl(i);
  141. if (lvl != null)
  142. break;
  143. }
  144. }
  145. return lvl;
  146. }
  147. return null;
  148. }
  149. public int? StartOverride(int ilvl)
  150. {
  151. if (NumStyleLink != null)
  152. {
  153. var startOverride = NumStyleLink.StartOverride(ilvl);
  154. if (startOverride != null)
  155. return startOverride;
  156. }
  157. return Main.StartOverride(ilvl);
  158. }
  159. public int Start(int ilvl)
  160. {
  161. var lvl = Lvl(ilvl);
  162. var start = (int?)lvl.Elements(W.start).Attributes(W.val).FirstOrDefault();
  163. if (start != null)
  164. return (int)start;
  165. return 0;
  166. }
  167. public int AbstractNumId
  168. {
  169. get
  170. {
  171. return Main.AbstractNumId;
  172. }
  173. }
  174. }
  175. public class ListItemInfo
  176. {
  177. public bool IsListItem;
  178. public bool IsZeroNumId;
  179. public ListItemSource FromStyle;
  180. public ListItemSource FromParagraph;
  181. private int? mAbstractNumId = null;
  182. public int? AbstractNumId
  183. {
  184. get
  185. {
  186. // note: this property does not get NumStyleLinkAbstractNumId
  187. // it presumes that we are only interested in AbstractNumId
  188. // however, it is easy enough to change if necessary
  189. if (mAbstractNumId != null)
  190. return mAbstractNumId;
  191. if (FromParagraph != null)
  192. mAbstractNumId = FromParagraph.AbstractNumId;
  193. else if (FromStyle != null)
  194. mAbstractNumId = FromStyle.AbstractNumId;
  195. return mAbstractNumId;
  196. }
  197. }
  198. public XElement Lvl(int ilvl)
  199. {
  200. if (FromParagraph != null)
  201. {
  202. var lvl = FromParagraph.Lvl(ilvl);
  203. if (lvl == null)
  204. {
  205. for (int i = ilvl - 1; i >= 0; i--)
  206. {
  207. lvl = FromParagraph.Lvl(i);
  208. if (lvl != null)
  209. break;
  210. }
  211. }
  212. return lvl;
  213. }
  214. var lvl2 = FromStyle.Lvl(ilvl);
  215. if (lvl2 == null)
  216. {
  217. for (int i = ilvl - 1; i >= 0; i--)
  218. {
  219. lvl2 = FromParagraph.Lvl(i);
  220. if (lvl2 != null)
  221. break;
  222. }
  223. }
  224. return lvl2;
  225. }
  226. public int Start(int ilvl)
  227. {
  228. if (FromParagraph != null)
  229. return FromParagraph.Start(ilvl);
  230. return FromStyle.Start(ilvl);
  231. }
  232. public int Start(int ilvl, bool takeOverride, out bool isOverride)
  233. {
  234. if (FromParagraph != null)
  235. {
  236. if (takeOverride)
  237. {
  238. var startOverride = FromParagraph.StartOverride(ilvl);
  239. if (startOverride != null)
  240. {
  241. isOverride = true;
  242. return (int)startOverride;
  243. }
  244. }
  245. isOverride = false;
  246. return FromParagraph.Start(ilvl);
  247. }
  248. else if (this.FromStyle != null)
  249. {
  250. if (takeOverride)
  251. {
  252. var startOverride = FromStyle.StartOverride(ilvl);
  253. if (startOverride != null)
  254. {
  255. isOverride = true;
  256. return (int)startOverride;
  257. }
  258. }
  259. isOverride = false;
  260. return FromStyle.Start(ilvl);
  261. }
  262. isOverride = false;
  263. return 0;
  264. }
  265. public int? StartOverride(int ilvl)
  266. {
  267. if (FromParagraph != null)
  268. {
  269. var startOverride = FromParagraph.StartOverride(ilvl);
  270. if (startOverride != null)
  271. return (int)startOverride;
  272. return null;
  273. }
  274. else if (this.FromStyle != null)
  275. {
  276. var startOverride = FromStyle.StartOverride(ilvl);
  277. if (startOverride != null)
  278. return (int)startOverride;
  279. return null;
  280. }
  281. return null;
  282. }
  283. private int? mNumId;
  284. public int NumId
  285. {
  286. get
  287. {
  288. if (mNumId != null)
  289. return (int)mNumId;
  290. if (FromParagraph != null)
  291. mNumId = FromParagraph.Main.NumId;
  292. else if (FromStyle != null)
  293. mNumId = FromStyle.Main.NumId;
  294. return (int)mNumId;
  295. }
  296. }
  297. public ListItemInfo() { }
  298. public ListItemInfo(bool isListItem, bool isZeroNumId)
  299. {
  300. IsListItem = isListItem;
  301. IsZeroNumId = isZeroNumId;
  302. }
  303. }
  304. public static void SetParagraphLevel(XElement paragraph, int ilvl)
  305. {
  306. var pi = paragraph.Annotation<ParagraphInfo>();
  307. if (pi == null)
  308. {
  309. pi = new ParagraphInfo()
  310. {
  311. Ilvl = ilvl,
  312. };
  313. paragraph.AddAnnotation(pi);
  314. return;
  315. }
  316. throw new OpenXmlPowerToolsException("Internal error - should never set ilvl more than once.");
  317. }
  318. public static int GetParagraphLevel(XElement paragraph)
  319. {
  320. var pi = paragraph.Annotation<ParagraphInfo>();
  321. if (pi != null)
  322. return pi.Ilvl;
  323. throw new OpenXmlPowerToolsException("Internal error - should never ask for ilvl without it first being set.");
  324. }
  325. public static ListItemInfo GetListItemInfo(XDocument numXDoc, XDocument stylesXDoc, XElement paragraph)
  326. {
  327. // The following is an optimization - only determine ListItemInfo once for a
  328. // paragraph.
  329. ListItemInfo listItemInfo = paragraph.Annotation<ListItemInfo>();
  330. if (listItemInfo != null)
  331. return listItemInfo;
  332. throw new OpenXmlPowerToolsException("Attempting to retrieve ListItemInfo before initialization");
  333. }
  334. private static ListItemInfo NotAListItem = new ListItemInfo(false, true);
  335. private static ListItemInfo ZeroNumId = new ListItemInfo(false, false);
  336. public static void InitListItemInfo(XDocument numXDoc, XDocument stylesXDoc, XElement paragraph)
  337. {
  338. if (FirstRunIsEmptySectionBreak(paragraph))
  339. {
  340. paragraph.AddAnnotation(NotAListItem);
  341. return;
  342. }
  343. int? paragraphNumId = null;
  344. XElement paragraphNumberingProperties = paragraph
  345. .Elements(W.pPr)
  346. .Elements(W.numPr)
  347. .FirstOrDefault();
  348. if (paragraphNumberingProperties != null)
  349. {
  350. paragraphNumId = (int?)paragraphNumberingProperties
  351. .Elements(W.numId)
  352. .Attributes(W.val)
  353. .FirstOrDefault();
  354. // if numPr of paragraph does not contain numId, then it is not a list item.
  355. // if numId of paragraph == 0, then this is not a list item, regardless of the markup in the style.
  356. if (paragraphNumId == null || paragraphNumId == 0)
  357. {
  358. paragraph.AddAnnotation(NotAListItem);
  359. return;
  360. }
  361. }
  362. string paragraphStyleName = GetParagraphStyleName(stylesXDoc, paragraph);
  363. var listItemInfo = GetListItemInfoFromCache(numXDoc, paragraphStyleName, paragraphNumId);
  364. if (listItemInfo != null)
  365. {
  366. paragraph.AddAnnotation(listItemInfo);
  367. if (listItemInfo.FromParagraph != null)
  368. {
  369. var para_ilvl = (int?)paragraphNumberingProperties
  370. .Elements(W.ilvl)
  371. .Attributes(W.val)
  372. .FirstOrDefault();
  373. if (para_ilvl == null)
  374. para_ilvl = 0;
  375. var abstractNum = listItemInfo.FromParagraph.Main.AbstractNum;
  376. var multiLevelType = (string)abstractNum.Elements(W.multiLevelType).Attributes(W.val).FirstOrDefault();
  377. if (multiLevelType == "singleLevel")
  378. para_ilvl = 0;
  379. SetParagraphLevel(paragraph, (int)para_ilvl);
  380. }
  381. else if (listItemInfo.FromStyle != null)
  382. {
  383. int this_ilvl = listItemInfo.FromStyle.Style_ilvl;
  384. var abstractNum = listItemInfo.FromStyle.Main.AbstractNum;
  385. var multiLevelType = (string)abstractNum.Elements(W.multiLevelType).Attributes(W.val).FirstOrDefault();
  386. if (multiLevelType == "singleLevel")
  387. this_ilvl = 0;
  388. SetParagraphLevel(paragraph, this_ilvl);
  389. }
  390. return;
  391. }
  392. listItemInfo = new ListItemInfo();
  393. int? style_ilvl = null;
  394. bool? styleZeroNumId = null;
  395. if (paragraphStyleName != null)
  396. {
  397. listItemInfo.FromStyle = InitializeStyleListItemSource(numXDoc, stylesXDoc, paragraph, paragraphStyleName,
  398. out style_ilvl, out styleZeroNumId);
  399. }
  400. int? paragraph_ilvl = null;
  401. bool? paragraphZeroNumId = null;
  402. if (paragraphNumberingProperties != null && paragraphNumberingProperties.Element(W.numId) != null)
  403. {
  404. listItemInfo.FromParagraph = InitializeParagraphListItemSource(numXDoc, stylesXDoc, paragraph, paragraphNumberingProperties, out paragraph_ilvl, out paragraphZeroNumId);
  405. }
  406. if (styleZeroNumId == true && paragraphZeroNumId == null ||
  407. paragraphZeroNumId == true)
  408. {
  409. paragraph.AddAnnotation(NotAListItem);
  410. AddListItemInfoIntoCache(numXDoc, paragraphStyleName, paragraphNumId, NotAListItem);
  411. return;
  412. }
  413. int ilvlToSet = 0;
  414. if (paragraph_ilvl != null)
  415. ilvlToSet = (int)paragraph_ilvl;
  416. else if (style_ilvl != null)
  417. ilvlToSet = (int)style_ilvl;
  418. if (listItemInfo.FromParagraph != null)
  419. {
  420. var abstractNum = listItemInfo.FromParagraph.Main.AbstractNum;
  421. var multiLevelType = (string)abstractNum.Elements(W.multiLevelType).Attributes(W.val).FirstOrDefault();
  422. if (multiLevelType == "singleLevel")
  423. ilvlToSet = 0;
  424. }
  425. else if (listItemInfo.FromStyle != null)
  426. {
  427. var abstractNum = listItemInfo.FromStyle.Main.AbstractNum;
  428. var multiLevelType = (string)abstractNum.Elements(W.multiLevelType).Attributes(W.val).FirstOrDefault();
  429. if (multiLevelType == "singleLevel")
  430. ilvlToSet = 0;
  431. }
  432. SetParagraphLevel(paragraph, ilvlToSet);
  433. listItemInfo.IsListItem = listItemInfo.FromStyle != null || listItemInfo.FromParagraph != null;
  434. paragraph.AddAnnotation(listItemInfo);
  435. AddListItemInfoIntoCache(numXDoc, paragraphStyleName, paragraphNumId, listItemInfo);
  436. }
  437. private static string GetParagraphStyleName(XDocument stylesXDoc, XElement paragraph)
  438. {
  439. var paragraphStyleName = (string)paragraph
  440. .Elements(W.pPr)
  441. .Elements(W.pStyle)
  442. .Attributes(W.val)
  443. .FirstOrDefault();
  444. if (paragraphStyleName == null)
  445. paragraphStyleName = GetDefaultParagraphStyleName(stylesXDoc);
  446. return paragraphStyleName;
  447. }
  448. private static bool FirstRunIsEmptySectionBreak(XElement paragraph)
  449. {
  450. var firstRun = paragraph
  451. .DescendantsTrimmed(W.txbxContent)
  452. .Where(d => d.Name == W.r)
  453. .FirstOrDefault();
  454. var hasTextElement = paragraph
  455. .DescendantsTrimmed(W.txbxContent)
  456. .Where(d => d.Name == W.r)
  457. .Elements(W.t)
  458. .Any();
  459. if (firstRun == null || !hasTextElement)
  460. {
  461. if (paragraph
  462. .Elements(W.pPr)
  463. .Elements(W.sectPr)
  464. .Any())
  465. return true;
  466. }
  467. return false;
  468. }
  469. private static ListItemSource InitializeParagraphListItemSource(XDocument numXDoc, XDocument stylesXDoc, XElement paragraph, XElement paragraphNumberingProperties, out int? ilvl, out bool? zeroNumId)
  470. {
  471. zeroNumId = null;
  472. // Paragraph numbering properties must contain a numId.
  473. int? numId = (int?)paragraphNumberingProperties
  474. .Elements(W.numId)
  475. .Attributes(W.val)
  476. .FirstOrDefault();
  477. ilvl = (int?)paragraphNumberingProperties
  478. .Elements(W.ilvl)
  479. .Attributes(W.val)
  480. .FirstOrDefault();
  481. if (numId == null)
  482. {
  483. zeroNumId = true;
  484. return null;
  485. }
  486. var num = numXDoc
  487. .Root
  488. .Elements(W.num)
  489. .FirstOrDefault(n => (int)n.Attribute(W.numId) == numId);
  490. if (num == null)
  491. {
  492. zeroNumId = true;
  493. return null;
  494. }
  495. zeroNumId = false;
  496. if (ilvl == null)
  497. ilvl = 0;
  498. ListItemSource listItemSource = new ListItemSource(numXDoc, stylesXDoc, (int)numId);
  499. return listItemSource;
  500. }
  501. private static ListItemSource InitializeStyleListItemSource(XDocument numXDoc, XDocument stylesXDoc, XElement paragraph, string paragraphStyleName,
  502. out int? ilvl, out bool? zeroNumId)
  503. {
  504. zeroNumId = null;
  505. XElement pPr = FormattingAssembler.ParagraphStyleRollup(paragraph, stylesXDoc, GetDefaultParagraphStyleName(stylesXDoc));
  506. if (pPr != null)
  507. {
  508. XElement styleNumberingProperties = pPr
  509. .Elements(W.numPr)
  510. .FirstOrDefault();
  511. if (styleNumberingProperties != null && styleNumberingProperties.Element(W.numId) != null)
  512. {
  513. int numId = (int)styleNumberingProperties
  514. .Elements(W.numId)
  515. .Attributes(W.val)
  516. .FirstOrDefault();
  517. ilvl = (int?)styleNumberingProperties
  518. .Elements(W.ilvl)
  519. .Attributes(W.val)
  520. .FirstOrDefault();
  521. if (ilvl == null)
  522. ilvl = 0;
  523. if (numId == 0)
  524. {
  525. zeroNumId = true;
  526. return null;
  527. }
  528. // make sure that the numId is valid
  529. XElement num = numXDoc
  530. .Root
  531. .Elements(W.num)
  532. .Where(e => (int)e.Attribute(W.numId) == numId)
  533. .FirstOrDefault();
  534. if (num == null)
  535. {
  536. zeroNumId = true;
  537. return null;
  538. }
  539. ListItemSource listItemSource = new ListItemSource(numXDoc, stylesXDoc, numId);
  540. listItemSource.Style_ilvl = (int)ilvl;
  541. zeroNumId = false;
  542. return listItemSource;
  543. }
  544. }
  545. ilvl = null;
  546. return null;
  547. }
  548. private static string GetDefaultParagraphStyleName(XDocument stylesXDoc)
  549. {
  550. XElement defaultParagraphStyle;
  551. string defaultParagraphStyleName = null;
  552. StylesInfo stylesInfo = stylesXDoc.Annotation<StylesInfo>();
  553. if (stylesInfo != null)
  554. defaultParagraphStyleName = stylesInfo.DefaultParagraphStyleName;
  555. else
  556. {
  557. defaultParagraphStyle = stylesXDoc
  558. .Root
  559. .Elements(W.style)
  560. .FirstOrDefault(s =>
  561. {
  562. if ((string)s.Attribute(W.type) != "paragraph")
  563. return false;
  564. var defaultAttribute = s.Attribute(W._default);
  565. var isDefault = false;
  566. if (defaultAttribute != null &&
  567. (bool)s.Attribute(W._default).ToBoolean())
  568. isDefault = true;
  569. return isDefault;
  570. });
  571. defaultParagraphStyleName = null;
  572. if (defaultParagraphStyle != null)
  573. defaultParagraphStyleName = (string)defaultParagraphStyle.Attribute(W.styleId);
  574. stylesInfo = new StylesInfo()
  575. {
  576. DefaultParagraphStyleName = defaultParagraphStyleName,
  577. };
  578. stylesXDoc.AddAnnotation(stylesInfo);
  579. }
  580. return defaultParagraphStyleName;
  581. }
  582. private static ListItemInfo GetListItemInfoFromCache(XDocument numXDoc, string styleName, int? numId)
  583. {
  584. string key =
  585. (styleName == null ? "" : styleName) +
  586. "|" +
  587. (numId == null ? "" : numId.ToString());
  588. var numXDocRoot = numXDoc.Root;
  589. Dictionary<string, ListItemInfo> listItemInfoCache =
  590. numXDocRoot.Annotation<Dictionary<string, ListItemInfo>>();
  591. if (listItemInfoCache == null)
  592. {
  593. listItemInfoCache = new Dictionary<string, ListItemInfo>();
  594. numXDocRoot.AddAnnotation(listItemInfoCache);
  595. }
  596. if (listItemInfoCache.ContainsKey(key))
  597. return listItemInfoCache[key];
  598. return null;
  599. }
  600. private static void AddListItemInfoIntoCache(XDocument numXDoc, string styleName, int? numId, ListItemInfo listItemInfo)
  601. {
  602. string key =
  603. (styleName == null ? "" : styleName) +
  604. "|" +
  605. (numId == null ? "" : numId.ToString());
  606. var numXDocRoot = numXDoc.Root;
  607. Dictionary<string, ListItemInfo> listItemInfoCache =
  608. numXDocRoot.Annotation<Dictionary<string, ListItemInfo>>();
  609. if (listItemInfoCache == null)
  610. {
  611. listItemInfoCache = new Dictionary<string, ListItemInfo>();
  612. numXDocRoot.AddAnnotation(listItemInfoCache);
  613. }
  614. if (!listItemInfoCache.ContainsKey(key))
  615. listItemInfoCache.Add(key, listItemInfo);
  616. }
  617. public class LevelNumbers
  618. {
  619. public int[] LevelNumbersArray;
  620. }
  621. private class StylesInfo
  622. {
  623. public string DefaultParagraphStyleName;
  624. }
  625. private class ParagraphInfo
  626. {
  627. public int Ilvl;
  628. }
  629. private class ReverseAxis
  630. {
  631. public XElement PreviousParagraph;
  632. }
  633. public static string RetrieveListItem(WordprocessingDocument wordDoc, XElement paragraph)
  634. {
  635. return RetrieveListItem(wordDoc, paragraph, null);
  636. }
  637. public static string RetrieveListItem(WordprocessingDocument wordDoc, XElement paragraph, ListItemRetrieverSettings settings)
  638. {
  639. if (wordDoc.MainDocumentPart.NumberingDefinitionsPart == null)
  640. return null;
  641. var listItemInfo = paragraph.Annotation<ListItemInfo>();
  642. if (listItemInfo == null)
  643. InitializeListItemRetriever(wordDoc, settings);
  644. listItemInfo = paragraph.Annotation<ListItemInfo>();
  645. if (!listItemInfo.IsListItem)
  646. return null;
  647. var numberingDefinitionsPart = wordDoc
  648. .MainDocumentPart
  649. .NumberingDefinitionsPart;
  650. if (numberingDefinitionsPart == null)
  651. return null;
  652. StyleDefinitionsPart styleDefinitionsPart = wordDoc
  653. .MainDocumentPart
  654. .StyleDefinitionsPart;
  655. if (styleDefinitionsPart == null)
  656. return null;
  657. var numXDoc = numberingDefinitionsPart.GetXDocument();
  658. var stylesXDoc = styleDefinitionsPart.GetXDocument();
  659. var paragraphLevel = GetParagraphLevel(paragraph);
  660. var lvl = listItemInfo.Lvl(paragraphLevel);
  661. string lvlText = (string)lvl.Elements(W.lvlText).Attributes(W.val).FirstOrDefault();
  662. if (lvlText == null)
  663. return null;
  664. var levelNumbersAnnotation = paragraph.Annotation<LevelNumbers>();
  665. if (levelNumbersAnnotation == null)
  666. throw new OpenXmlPowerToolsException("Internal error");
  667. int[] levelNumbers = levelNumbersAnnotation.LevelNumbersArray;
  668. string languageIdentifier = GetLanguageIdentifier(paragraph, stylesXDoc);
  669. string listItem = FormatListItem(listItemInfo, levelNumbers, GetParagraphLevel(paragraph),
  670. lvlText, stylesXDoc, languageIdentifier, settings);
  671. return listItem;
  672. }
  673. private static string GetLanguageIdentifier(XElement paragraph, XDocument stylesXDoc)
  674. {
  675. var languageType = (string)paragraph
  676. .DescendantsTrimmed(W.txbxContent)
  677. .Where(d => d.Name == W.r)
  678. .Attributes(PtOpenXml.LanguageType)
  679. .FirstOrDefault();
  680. string languageIdentifier = null;
  681. if (languageType == null || languageType == "western")
  682. {
  683. languageIdentifier = (string)paragraph
  684. .Elements(W.r)
  685. .Elements(W.rPr)
  686. .Elements(W.lang)
  687. .Attributes(W.val)
  688. .FirstOrDefault();
  689. if (languageIdentifier == null)
  690. languageIdentifier = (string)stylesXDoc
  691. .Root
  692. .Elements(W.docDefaults)
  693. .Elements(W.rPrDefault)
  694. .Elements(W.rPr)
  695. .Elements(W.lang)
  696. .Attributes(W.val)
  697. .FirstOrDefault();
  698. }
  699. else if (languageType == "eastAsia")
  700. {
  701. languageIdentifier = (string)paragraph
  702. .Elements(W.r)
  703. .Elements(W.rPr)
  704. .Elements(W.lang)
  705. .Attributes(W.eastAsia)
  706. .FirstOrDefault();
  707. if (languageIdentifier == null)
  708. languageIdentifier = (string)stylesXDoc
  709. .Root
  710. .Elements(W.docDefaults)
  711. .Elements(W.rPrDefault)
  712. .Elements(W.rPr)
  713. .Elements(W.lang)
  714. .Attributes(W.eastAsia)
  715. .FirstOrDefault();
  716. }
  717. else if (languageType == "bidi")
  718. {
  719. languageIdentifier = (string)paragraph
  720. .Elements(W.r)
  721. .Elements(W.rPr)
  722. .Elements(W.lang)
  723. .Attributes(W.bidi)
  724. .FirstOrDefault();
  725. if (languageIdentifier == null)
  726. languageIdentifier = (string)stylesXDoc
  727. .Root
  728. .Elements(W.docDefaults)
  729. .Elements(W.rPrDefault)
  730. .Elements(W.rPr)
  731. .Elements(W.lang)
  732. .Attributes(W.bidi)
  733. .FirstOrDefault();
  734. }
  735. if (languageIdentifier == null)
  736. languageIdentifier = "en-US";
  737. return languageIdentifier;
  738. }
  739. private static void InitializeListItemRetriever(WordprocessingDocument wordDoc, ListItemRetrieverSettings settings)
  740. {
  741. foreach (var part in wordDoc.ContentParts())
  742. InitializeListItemRetrieverForPart(wordDoc, part, settings);
  743. #if false
  744. foreach (var part in wordDoc.ContentParts())
  745. {
  746. var xDoc = part.GetXDocument();
  747. var paras = xDoc
  748. .Descendants(W.p)
  749. .Where(p =>
  750. p.Annotation<ListItemInfo>() == null);
  751. if (paras.Any())
  752. Console.WriteLine("Error");
  753. }
  754. #endif
  755. }
  756. private static void InitializeListItemRetrieverForPart(WordprocessingDocument wordDoc, OpenXmlPart part, ListItemRetrieverSettings settings)
  757. {
  758. var mainXDoc = part.GetXDocument();
  759. var numPart = wordDoc.MainDocumentPart.NumberingDefinitionsPart;
  760. if (numPart == null)
  761. return;
  762. var numXDoc = numPart.GetXDocument();
  763. var stylesPart = wordDoc.MainDocumentPart.StyleDefinitionsPart;
  764. if (stylesPart == null)
  765. return;
  766. var stylesXDoc = stylesPart.GetXDocument();
  767. var rootNode = mainXDoc.Root;
  768. InitializeListItemRetrieverForStory(numXDoc, stylesXDoc, rootNode);
  769. var textBoxes = mainXDoc
  770. .Root
  771. .Descendants(W.txbxContent);
  772. foreach (var textBox in textBoxes)
  773. InitializeListItemRetrieverForStory(numXDoc, stylesXDoc, textBox);
  774. }
  775. private static void InitializeListItemRetrieverForStory(XDocument numXDoc, XDocument stylesXDoc, XElement rootNode)
  776. {
  777. var paragraphs = rootNode
  778. .DescendantsTrimmed(W.txbxContent)
  779. .Where(p => p.Name == W.p);
  780. foreach (var paragraph in paragraphs)
  781. InitListItemInfo(numXDoc, stylesXDoc, paragraph);
  782. var abstractNumIds = paragraphs
  783. .Select(paragraph =>
  784. {
  785. ListItemInfo listItemInfo = paragraph.Annotation<ListItemInfo>();
  786. if (!listItemInfo.IsListItem)
  787. return (int?)null;
  788. return listItemInfo.AbstractNumId;
  789. })
  790. .Where(a => a != null)
  791. .Distinct()
  792. .ToList();
  793. // when debugging, it is sometimes useful to cause processing of a specific abstractNumId first.
  794. // the following code enables this.
  795. //int? abstractIdToProcessFirst = null;
  796. //if (abstractIdToProcessFirst != null)
  797. //{
  798. // abstractNumIds = (new[] { abstractIdToProcessFirst })
  799. // .Concat(abstractNumIds.Where(ani => ani != abstractIdToProcessFirst))
  800. // .ToList();
  801. //}
  802. foreach (var abstractNumId in abstractNumIds)
  803. {
  804. var listItems = paragraphs
  805. .Where(paragraph =>
  806. {
  807. var listItemInfo = paragraph.Annotation<ListItemInfo>();
  808. if (!listItemInfo.IsListItem)
  809. return false;
  810. return listItemInfo.AbstractNumId == abstractNumId;
  811. })
  812. .ToList();
  813. // annotate paragraphs with previous paragraphs so that we can look backwards with good perf
  814. XElement prevParagraph = null;
  815. foreach (var paragraph in listItems)
  816. {
  817. ReverseAxis reverse = new ReverseAxis()
  818. {
  819. PreviousParagraph = prevParagraph,
  820. };
  821. paragraph.AddAnnotation(reverse);
  822. prevParagraph = paragraph;
  823. }
  824. var startOverrideAlreadyUsed = new List<int>();
  825. List<int> previous = null;
  826. ListItemInfo[] listItemInfoInEffectForStartOverride = new ListItemInfo[] {
  827. null,
  828. null,
  829. null,
  830. null,
  831. null,
  832. null,
  833. null,
  834. null,
  835. null,
  836. null,
  837. };
  838. foreach (var paragraph in listItems)
  839. {
  840. var listItemInfo = paragraph.Annotation<ListItemInfo>();
  841. var ilvl = GetParagraphLevel(paragraph);
  842. listItemInfoInEffectForStartOverride[ilvl] = listItemInfo;
  843. ListItemInfo listItemInfoInEffect = null;
  844. if (ilvl > 0)
  845. listItemInfoInEffect = listItemInfoInEffectForStartOverride[ilvl - 1];
  846. var levelNumbers = new List<int>();
  847. for (int level = 0; level <= ilvl; level++)
  848. {
  849. var numId = listItemInfo.NumId;
  850. var startOverride = listItemInfo.StartOverride(level);
  851. int? inEffectStartOverride = null;
  852. if (listItemInfoInEffect != null)
  853. inEffectStartOverride = listItemInfoInEffect.StartOverride(level);
  854. if (level == ilvl)
  855. {
  856. var lvl = listItemInfo.Lvl(ilvl);
  857. var lvlRestart = (int?)lvl.Elements(W.lvlRestart).Attributes(W.val).FirstOrDefault();
  858. if (lvlRestart != null)
  859. {
  860. var previousPara = PreviousParagraphsForLvlRestart(paragraph, (int)lvlRestart)
  861. .FirstOrDefault(p =>
  862. {
  863. var plvl = GetParagraphLevel(p);
  864. return plvl == ilvl;
  865. });
  866. if (previousPara != null)
  867. previous = previousPara.Annotation<LevelNumbers>().LevelNumbersArray.ToList();
  868. }
  869. }
  870. if (previous == null ||
  871. level >= previous.Count() ||
  872. (level == ilvl && startOverride != null && !startOverrideAlreadyUsed.Contains(numId)))
  873. {
  874. if (previous == null || level >= previous.Count())
  875. {
  876. var start = listItemInfo.Start(level);
  877. // only look at startOverride if the level that we're examining is same as the paragraph's level.
  878. if (level == ilvl)
  879. {
  880. if (startOverride != null && !startOverrideAlreadyUsed.Contains(numId))
  881. {
  882. startOverrideAlreadyUsed.Add(numId);
  883. start = (int)startOverride;
  884. }
  885. else
  886. {
  887. if (startOverride != null)
  888. start = (int)startOverride;
  889. if (inEffectStartOverride != null && inEffectStartOverride > start)
  890. start = (int)inEffectStartOverride;
  891. }
  892. }
  893. levelNumbers.Add(start);
  894. }
  895. else
  896. {
  897. var start = listItemInfo.Start(level);
  898. // only look at startOverride if the level that we're examining is same as the paragraph's level.
  899. if (level == ilvl)
  900. {
  901. if (startOverride != null)
  902. {
  903. if (!startOverrideAlreadyUsed.Contains(numId))
  904. {
  905. startOverrideAlreadyUsed.Add(numId);
  906. start = (int)startOverride;
  907. }
  908. }
  909. }
  910. levelNumbers.Add(start);
  911. }
  912. }
  913. else
  914. {
  915. int? thisNumber = null;
  916. if (level == ilvl)
  917. {
  918. if (startOverride != null)
  919. {
  920. if (!startOverrideAlreadyUsed.Contains(numId))
  921. {
  922. startOverrideAlreadyUsed.Add(numId);
  923. thisNumber = (int)startOverride;
  924. }
  925. thisNumber = previous.ElementAt(level) + 1;
  926. }
  927. else
  928. {
  929. thisNumber = previous.ElementAt(level) + 1;
  930. }
  931. }
  932. else
  933. {
  934. thisNumber = previous.ElementAt(level);
  935. }
  936. levelNumbers.Add((int)thisNumber);
  937. }
  938. }
  939. var levelNumbersAnno = new LevelNumbers()
  940. {
  941. LevelNumbersArray = levelNumbers.ToArray()
  942. };
  943. paragraph.AddAnnotation(levelNumbersAnno);
  944. previous = levelNumbers;
  945. }
  946. }
  947. }
  948. private static IEnumerable<XElement> PreviousParagraphsForLvlRestart(XElement paragraph, int ilvl)
  949. {
  950. var current = paragraph;
  951. while (true)
  952. {
  953. var ra = current.Annotation<ReverseAxis>();
  954. if (ra == null || ra.PreviousParagraph == null)
  955. yield break;
  956. var raLvl = GetParagraphLevel(ra.PreviousParagraph);
  957. if (raLvl < ilvl)
  958. yield break;
  959. yield return ra.PreviousParagraph;
  960. current = ra.PreviousParagraph;
  961. }
  962. }
  963. private static string FormatListItem(ListItemInfo lii, int[] levelNumbers, int ilvl,
  964. string lvlText, XDocument styles, string languageCultureName, ListItemRetrieverSettings settings)
  965. {
  966. string[] formatTokens = GetFormatTokens(lvlText).ToArray();
  967. XElement lvl = lii.Lvl(ilvl);
  968. bool isLgl = lvl.Elements(W.isLgl).Any();
  969. string listItem = formatTokens.Select((t, l) =>
  970. {
  971. if (t.Substring(0, 1) != "%")
  972. return t;
  973. int indentationLevel;
  974. if (!Int32.TryParse(t.Substring(1), out indentationLevel))
  975. return t;
  976. indentationLevel -= 1;
  977. if (indentationLevel >= levelNumbers.Length)
  978. indentationLevel = levelNumbers.Length - 1;
  979. int levelNumber = levelNumbers[indentationLevel];
  980. string levelText = null;
  981. XElement rlvl = lii.Lvl(indentationLevel);
  982. string numFmtForLevel = (string)rlvl.Elements(W.numFmt).Attributes(W.val).FirstOrDefault();
  983. if (numFmtForLevel == null)
  984. {
  985. var numFmtElement = rlvl.Elements(MC.AlternateContent).Elements(MC.Choice).Elements(W.numFmt).FirstOrDefault();
  986. if (numFmtElement != null && (string)numFmtElement.Attribute(W.val) == "custom")
  987. numFmtForLevel = (string)numFmtElement.Attribute(W.format);
  988. }
  989. if (numFmtForLevel != "none")
  990. {
  991. if (isLgl && numFmtForLevel != "decimalZero")
  992. numFmtForLevel = "decimal";
  993. }
  994. if (languageCultureName != null && settings != null)
  995. {
  996. if (settings.ListItemTextImplementations.ContainsKey(languageCultureName))
  997. {
  998. var impl = settings.ListItemTextImplementations[languageCultureName];
  999. levelText = impl(languageCultureName, levelNumber, numFmtForLevel);
  1000. }
  1001. }
  1002. if (levelText == null)
  1003. levelText = ListItemTextGetter_Default.GetListItemText(languageCultureName, levelNumber, numFmtForLevel);
  1004. return levelText;
  1005. }).StringConcatenate();
  1006. return listItem;
  1007. }
  1008. private static IEnumerable<string> GetFormatTokens(string lvlText)
  1009. {
  1010. int i = 0;
  1011. while (true)
  1012. {
  1013. if (i >= lvlText.Length)
  1014. yield break;
  1015. if (lvlText[i] == '%' && i <= lvlText.Length - 2)
  1016. {
  1017. yield return lvlText.Substring(i, 2);
  1018. i += 2;
  1019. continue;
  1020. }
  1021. int percentIndex = lvlText.IndexOf('%', i);
  1022. if (percentIndex == -1 || percentIndex > lvlText.Length - 2)
  1023. {
  1024. yield return lvlText.Substring(i);
  1025. yield break;
  1026. }
  1027. yield return lvlText.Substring(i, percentIndex - i);
  1028. yield return lvlText.Substring(percentIndex, 2);
  1029. i = percentIndex + 2;
  1030. }
  1031. }
  1032. }
  1033. }