TextReplacer.cs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. using System;
  4. using System.Collections.Generic;
  5. using System.IO;
  6. using System.Linq;
  7. using System.Text;
  8. using System.Xml;
  9. using System.Xml.Linq;
  10. using DocumentFormat.OpenXml.Packaging;
  11. namespace OpenXmlPowerTools
  12. {
  13. public partial class WmlDocument : OpenXmlPowerToolsDocument
  14. {
  15. public WmlDocument SearchAndReplace(string search, string replace, bool matchCase)
  16. {
  17. return TextReplacer.SearchAndReplace(this, search, replace, matchCase);
  18. }
  19. }
  20. public partial class PmlDocument : OpenXmlPowerToolsDocument
  21. {
  22. public PmlDocument SearchAndReplace(string search, string replace, bool matchCase)
  23. {
  24. return TextReplacer.SearchAndReplace(this, search, replace, matchCase);
  25. }
  26. }
  27. public class TextReplacer
  28. {
  29. private class MatchSemaphore
  30. {
  31. public int MatchId;
  32. public MatchSemaphore(int matchId)
  33. {
  34. MatchId = matchId;
  35. }
  36. }
  37. private static XObject CloneWithAnnotation(XNode node)
  38. {
  39. XElement element = node as XElement;
  40. if (element != null)
  41. {
  42. XElement newElement = new XElement(element.Name,
  43. element.Attributes(),
  44. element.Nodes().Select(n => CloneWithAnnotation(n)));
  45. if (element.Annotation<MatchSemaphore>() != null)
  46. newElement.AddAnnotation(element.Annotation<MatchSemaphore>());
  47. }
  48. return node;
  49. }
  50. private static object WmlSearchAndReplaceTransform(XNode node,
  51. string search, string replace, bool matchCase)
  52. {
  53. XElement element = node as XElement;
  54. if (element != null)
  55. {
  56. if (element.Name == W.p)
  57. {
  58. string contents = element.Descendants(W.t).Select(t => (string)t).StringConcatenate();
  59. if (contents.Contains(search) ||
  60. (!matchCase && contents.ToUpper().Contains(search.ToUpper())))
  61. {
  62. XElement paragraphWithSplitRuns = new XElement(W.p,
  63. element.Attributes(),
  64. element.Nodes().Select(n => WmlSearchAndReplaceTransform(n, search,
  65. replace, matchCase)));
  66. XElement[] subRunArray = paragraphWithSplitRuns
  67. .Elements(W.r)
  68. .Where(e => {
  69. XElement subRunElement = e.Elements().FirstOrDefault(el => el.Name != W.rPr);
  70. if (subRunElement == null)
  71. return false;
  72. return W.SubRunLevelContent.Contains(subRunElement.Name);
  73. })
  74. .ToArray();
  75. int paragraphChildrenCount = subRunArray.Length;
  76. int matchId = 1;
  77. foreach (var pc in subRunArray
  78. .Take(paragraphChildrenCount - (search.Length - 1))
  79. .Select((c, i) => new { Child = c, Index = i, }))
  80. {
  81. var subSequence = subRunArray.SequenceAt(pc.Index).Take(search.Length);
  82. var zipped = subSequence.PtZip(search, (pcp, c) => new
  83. {
  84. ParagraphChildProjection = pcp,
  85. CharacterToCompare = c,
  86. });
  87. bool dontMatch = zipped.Any(z => {
  88. if (z.ParagraphChildProjection.Annotation<MatchSemaphore>() != null)
  89. return true;
  90. bool b;
  91. if (matchCase)
  92. b = z.ParagraphChildProjection.Value != z.CharacterToCompare.ToString();
  93. else
  94. b = z.ParagraphChildProjection.Value.ToUpper() != z.CharacterToCompare.ToString().ToUpper();
  95. return b;
  96. });
  97. bool match = !dontMatch;
  98. if (match)
  99. {
  100. foreach (var item in subSequence)
  101. item.AddAnnotation(new MatchSemaphore(matchId));
  102. ++matchId;
  103. }
  104. }
  105. // The following code is locally impure, as this is the most expressive way to write it.
  106. XElement paragraphWithReplacedRuns = (XElement)CloneWithAnnotation(paragraphWithSplitRuns);
  107. for (int id = 1; id < matchId; ++id)
  108. {
  109. List<XElement> elementsToReplace = paragraphWithReplacedRuns
  110. .Elements()
  111. .Where(e => {
  112. var sem = e.Annotation<MatchSemaphore>();
  113. if (sem == null)
  114. return false;
  115. return sem.MatchId == id;
  116. })
  117. .ToList();
  118. elementsToReplace.First().AddBeforeSelf(
  119. new XElement(W.r,
  120. elementsToReplace.First().Elements(W.rPr),
  121. new XElement(W.t, replace)));
  122. elementsToReplace.Remove();
  123. }
  124. var groupedAdjacentRunsWithIdenticalFormatting =
  125. paragraphWithReplacedRuns
  126. .Elements()
  127. .GroupAdjacent(ce =>
  128. {
  129. if (ce.Name != W.r)
  130. return "DontConsolidate";
  131. if (ce.Elements().Where(e => e.Name != W.rPr).Count() != 1 ||
  132. ce.Element(W.t) == null)
  133. return "DontConsolidate";
  134. if (ce.Element(W.rPr) == null)
  135. return "";
  136. return ce.Element(W.rPr).ToString(SaveOptions.None);
  137. });
  138. XElement paragraphWithConsolidatedRuns = new XElement(W.p,
  139. groupedAdjacentRunsWithIdenticalFormatting.Select(g =>
  140. {
  141. if (g.Key == "DontConsolidate")
  142. return (object)g;
  143. string textValue = g.Select(r => r.Element(W.t).Value).StringConcatenate();
  144. XAttribute xs = null;
  145. if (textValue[0] == ' ' || textValue[textValue.Length - 1] == ' ')
  146. xs = new XAttribute(XNamespace.Xml + "space", "preserve");
  147. return new XElement(W.r,
  148. g.First().Elements(W.rPr),
  149. new XElement(W.t, xs, textValue));
  150. }));
  151. return paragraphWithConsolidatedRuns;
  152. }
  153. return element;
  154. }
  155. if (element.Name == W.r && element.Elements(W.t).Any())
  156. {
  157. var collectionOfRuns = element.Elements()
  158. .Where(e => e.Name != W.rPr)
  159. .Select(e =>
  160. {
  161. if (e.Name == W.t)
  162. {
  163. string s = (string)e;
  164. IEnumerable<XElement> collectionOfSubRuns = s.Select(c =>
  165. {
  166. XElement newRun = new XElement(W.r,
  167. element.Elements(W.rPr),
  168. new XElement(W.t,
  169. c == ' ' ?
  170. new XAttribute(XNamespace.Xml + "space", "preserve") :
  171. null, c));
  172. return newRun;
  173. });
  174. return (object)collectionOfSubRuns;
  175. }
  176. else
  177. {
  178. XElement newRun = new XElement(W.r,
  179. element.Elements(W.rPr),
  180. e);
  181. return newRun;
  182. }
  183. });
  184. return collectionOfRuns;
  185. }
  186. return new XElement(element.Name,
  187. element.Attributes(),
  188. element.Nodes().Select(n => WmlSearchAndReplaceTransform(n,
  189. search, replace, matchCase)));
  190. }
  191. return node;
  192. }
  193. private static void WmlSearchAndReplaceInXDocument(XDocument xDocument, string search,
  194. string replace, bool matchCase)
  195. {
  196. XElement newRoot = (XElement)WmlSearchAndReplaceTransform(xDocument.Root,
  197. search, replace, matchCase);
  198. xDocument.Elements().First().ReplaceWith(newRoot);
  199. }
  200. public static WmlDocument SearchAndReplace(WmlDocument doc, string search, string replace, bool matchCase)
  201. {
  202. using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc))
  203. {
  204. using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
  205. {
  206. SearchAndReplace(document, search, replace, matchCase);
  207. }
  208. return streamDoc.GetModifiedWmlDocument();
  209. }
  210. }
  211. public static void SearchAndReplace(WordprocessingDocument wordDoc, string search,
  212. string replace, bool matchCase)
  213. {
  214. if (RevisionAccepter.HasTrackedRevisions(wordDoc))
  215. throw new InvalidDataException(
  216. "Search and replace will not work with documents " +
  217. "that contain revision tracking.");
  218. XDocument xDoc;
  219. xDoc = wordDoc.MainDocumentPart.DocumentSettingsPart.GetXDocument();
  220. if (xDoc.Descendants(W.trackRevisions).Any())
  221. throw new InvalidDataException("Revision tracking is turned on for document.");
  222. xDoc = wordDoc.MainDocumentPart.GetXDocument();
  223. WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase);
  224. wordDoc.MainDocumentPart.PutXDocument();
  225. foreach (var part in wordDoc.MainDocumentPart.HeaderParts)
  226. {
  227. xDoc = part.GetXDocument();
  228. WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase);
  229. part.PutXDocument();
  230. }
  231. foreach (var part in wordDoc.MainDocumentPart.FooterParts)
  232. {
  233. xDoc = part.GetXDocument();
  234. WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase);
  235. part.PutXDocument();
  236. }
  237. if (wordDoc.MainDocumentPart.EndnotesPart != null)
  238. {
  239. xDoc = wordDoc.MainDocumentPart.EndnotesPart.GetXDocument();
  240. WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase);
  241. wordDoc.MainDocumentPart.EndnotesPart.PutXDocument();
  242. }
  243. if (wordDoc.MainDocumentPart.FootnotesPart != null)
  244. {
  245. xDoc = wordDoc.MainDocumentPart.FootnotesPart.GetXDocument();
  246. WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase);
  247. wordDoc.MainDocumentPart.FootnotesPart.PutXDocument();
  248. }
  249. }
  250. private static object PmlReplaceTextTransform(XNode node, string search, string replace,
  251. bool matchCase)
  252. {
  253. XElement element = node as XElement;
  254. if (element != null)
  255. {
  256. if (element.Name == A.p)
  257. {
  258. string contents = element.Descendants(A.t).Select(t => (string)t).StringConcatenate();
  259. if (contents.Contains(search) ||
  260. (!matchCase && contents.ToUpper().Contains(search.ToUpper())))
  261. {
  262. XElement paragraphWithSplitRuns = new XElement(A.p,
  263. element.Attributes(),
  264. element.Nodes().Select(n => PmlReplaceTextTransform(n, search,
  265. replace, matchCase)));
  266. XElement[] subRunArray = paragraphWithSplitRuns
  267. .Elements(A.r)
  268. .Where(e =>
  269. {
  270. XElement subRunElement = e.Elements().FirstOrDefault(el => el.Name != A.rPr);
  271. if (subRunElement == null)
  272. return false;
  273. return subRunElement.Name == A.t;
  274. })
  275. .ToArray();
  276. int paragraphChildrenCount = subRunArray.Length;
  277. int matchId = 1;
  278. foreach (var pc in subRunArray
  279. .Take(paragraphChildrenCount - (search.Length - 1))
  280. .Select((c, i) => new { Child = c, Index = i, }))
  281. {
  282. var subSequence = subRunArray.SequenceAt(pc.Index).Take(search.Length);
  283. var zipped = subSequence.PtZip(search, (pcp, c) => new
  284. {
  285. ParagraphChildProjection = pcp,
  286. CharacterToCompare = c,
  287. });
  288. bool dontMatch = zipped.Any(z =>
  289. {
  290. if (z.ParagraphChildProjection.Annotation<MatchSemaphore>() != null)
  291. return true;
  292. bool b;
  293. if (matchCase)
  294. b = z.ParagraphChildProjection.Value != z.CharacterToCompare.ToString();
  295. else
  296. b = z.ParagraphChildProjection.Value.ToUpper() != z.CharacterToCompare.ToString().ToUpper();
  297. return b;
  298. });
  299. bool match = !dontMatch;
  300. if (match)
  301. {
  302. foreach (var item in subSequence)
  303. item.AddAnnotation(new MatchSemaphore(matchId));
  304. ++matchId;
  305. }
  306. }
  307. // The following code is locally impure, as this is the most expressive way to write it.
  308. XElement paragraphWithReplacedRuns = (XElement)CloneWithAnnotation(paragraphWithSplitRuns);
  309. for (int id = 1; id < matchId; ++id)
  310. {
  311. List<XElement> elementsToReplace = paragraphWithReplacedRuns
  312. .Elements()
  313. .Where(e =>
  314. {
  315. var sem = e.Annotation<MatchSemaphore>();
  316. if (sem == null)
  317. return false;
  318. return sem.MatchId == id;
  319. })
  320. .ToList();
  321. elementsToReplace.First().AddBeforeSelf(
  322. new XElement(A.r,
  323. elementsToReplace.First().Elements(A.rPr),
  324. new XElement(A.t, replace)));
  325. elementsToReplace.Remove();
  326. }
  327. var groupedAdjacentRunsWithIdenticalFormatting =
  328. paragraphWithReplacedRuns
  329. .Elements()
  330. .GroupAdjacent(ce =>
  331. {
  332. if (ce.Name != A.r)
  333. return "DontConsolidate";
  334. if (ce.Elements().Where(e => e.Name != A.rPr).Count() != 1 ||
  335. ce.Element(A.t) == null)
  336. return "DontConsolidate";
  337. if (ce.Element(A.rPr) == null)
  338. return "";
  339. return ce.Element(A.rPr).ToString(SaveOptions.None);
  340. });
  341. XElement paragraphWithConsolidatedRuns = new XElement(A.p,
  342. groupedAdjacentRunsWithIdenticalFormatting.Select(g =>
  343. {
  344. if (g.Key == "DontConsolidate")
  345. return (object)g;
  346. string textValue = g.Select(r => r.Element(A.t).Value).StringConcatenate();
  347. return new XElement(A.r,
  348. g.First().Elements(A.rPr),
  349. new XElement(A.t, textValue));
  350. }));
  351. return paragraphWithConsolidatedRuns;
  352. }
  353. }
  354. if (element.Name == A.r && element.Elements(A.t).Any())
  355. {
  356. var collectionOfRuns = element.Elements()
  357. .Where(e => e.Name != A.rPr)
  358. .Select(e =>
  359. {
  360. if (e.Name == A.t)
  361. {
  362. string s = (string)e;
  363. IEnumerable<XElement> collectionOfSubRuns = s.Select(c =>
  364. {
  365. XElement newRun = new XElement(A.r,
  366. element.Elements(A.rPr),
  367. new XElement(A.t, c));
  368. return newRun;
  369. });
  370. return (object)collectionOfSubRuns;
  371. }
  372. else
  373. {
  374. XElement newRun = new XElement(A.r,
  375. element.Elements(A.rPr),
  376. e);
  377. return newRun;
  378. }
  379. });
  380. return collectionOfRuns;
  381. }
  382. return new XElement(element.Name,
  383. element.Attributes(),
  384. element.Nodes().Select(n => PmlReplaceTextTransform(n, search, replace, matchCase)));
  385. }
  386. return node;
  387. }
  388. public static PmlDocument SearchAndReplace(PmlDocument doc, string search, string replace, bool matchCase)
  389. {
  390. using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc))
  391. {
  392. using (PresentationDocument document = streamDoc.GetPresentationDocument())
  393. {
  394. SearchAndReplace(document, search, replace, matchCase);
  395. }
  396. return streamDoc.GetModifiedPmlDocument();
  397. }
  398. }
  399. public static void SearchAndReplace(PresentationDocument pDoc, string search,
  400. string replace, bool matchCase)
  401. {
  402. PresentationPart presentationPart = pDoc.PresentationPart;
  403. foreach (var slidePart in presentationPart.SlideParts)
  404. {
  405. XDocument slideXDoc = slidePart.GetXDocument();
  406. XElement root = slideXDoc.Root;
  407. XElement newRoot = (XElement)PmlReplaceTextTransform(root, search, replace, matchCase);
  408. slidePart.PutXDocument(new XDocument(newRoot));
  409. }
  410. }
  411. }
  412. }