// Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Xml.Linq; using DocumentFormat.OpenXml.Packaging; namespace OpenXmlPowerTools { public class FieldRetriever { public static string InstrText(XElement root, int id) { XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"; #if false // This is the old code. Both versions work - the caching version is significantly faster. var relevantElements = root.Descendants() .Where(e => { Stack s = e.Annotation>(); if (s != null) return s.Any(z => z.Id == id && z.FieldElementType == FieldElementTypeEnum.InstrText); return false; }) .ToList(); #else var cachedAnnotationInformation = root.Annotation>>(); if (cachedAnnotationInformation == null) throw new OpenXmlPowerToolsException("Internal error"); // it is possible that a field code contains no instr text if (!cachedAnnotationInformation.ContainsKey(id)) return ""; var relevantElements = cachedAnnotationInformation[id]; #endif var groupedSubFields = relevantElements .GroupAdjacent(e => { Stack s = e.Annotation>(); var stackElement = s.FirstOrDefault(z => z.Id == id); var elementsBefore = s.TakeWhile(z => z != stackElement); return elementsBefore.Any(); }) .ToList(); var instrText = groupedSubFields .Select(g => { if (g.Key == false) { return g.Select(e => { Stack s = e.Annotation>(); var stackElement = s.FirstOrDefault(z => z.Id == id); if (stackElement.FieldElementType == FieldElementTypeEnum.InstrText && e.Name == w + "instrText") return e.Value; return ""; }) .StringConcatenate(); } else { Stack s = g.First().Annotation>(); var stackElement = s.FirstOrDefault(z => z.Id == id); var elementBefore = s.TakeWhile(z => z != stackElement).Last(); var subFieldId = elementBefore.Id; return InstrText(root, subFieldId); } }) .StringConcatenate(); return "{" + instrText + "}"; } public static void AnnotateWithFieldInfo(OpenXmlPart part) { XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"; XElement root = part.GetXDocument().Root; var r = root.DescendantsAndSelf() .Rollup( new FieldElementTypeStack { Id = 0, FiStack = null, }, (e, s) => { if (e.Name == w + "fldChar") { if (e.Attribute(w + "fldCharType").Value == "begin") { Stack fis; if (s.FiStack == null) fis = new Stack(); else fis = new Stack(s.FiStack.Reverse()); fis.Push( new FieldElementTypeInfo { Id = s.Id + 1, FieldElementType = FieldElementTypeEnum.Begin, }); return new FieldElementTypeStack { Id = s.Id + 1, FiStack = fis, }; }; if (e.Attribute(w + "fldCharType").Value == "separate") { Stack fis = new Stack(s.FiStack.Reverse()); FieldElementTypeInfo wfi = fis.Pop(); fis.Push( new FieldElementTypeInfo { Id = wfi.Id, FieldElementType = FieldElementTypeEnum.Separate, }); return new FieldElementTypeStack { Id = s.Id, FiStack = fis, }; } if (e.Attribute(w + "fldCharType").Value == "end") { Stack fis = new Stack(s.FiStack.Reverse()); FieldElementTypeInfo wfi = fis.Pop(); return new FieldElementTypeStack { Id = s.Id, FiStack = fis, }; } } if (s.FiStack == null || s.FiStack.Count() == 0) return s; FieldElementTypeInfo wfi3 = s.FiStack.Peek(); if (wfi3.FieldElementType == FieldElementTypeEnum.Begin) { Stack fis = new Stack(s.FiStack.Reverse()); FieldElementTypeInfo wfi2 = fis.Pop(); fis.Push( new FieldElementTypeInfo { Id = wfi2.Id, FieldElementType = FieldElementTypeEnum.InstrText, }); return new FieldElementTypeStack { Id = s.Id, FiStack = fis, }; } if (wfi3.FieldElementType == FieldElementTypeEnum.Separate) { Stack fis = new Stack(s.FiStack.Reverse()); FieldElementTypeInfo wfi2 = fis.Pop(); fis.Push( new FieldElementTypeInfo { Id = wfi2.Id, FieldElementType = FieldElementTypeEnum.Result, }); return new FieldElementTypeStack { Id = s.Id, FiStack = fis, }; } if (wfi3.FieldElementType == FieldElementTypeEnum.End) { Stack fis = new Stack(s.FiStack.Reverse()); fis.Pop(); if (!fis.Any()) fis = null; return new FieldElementTypeStack { Id = s.Id, FiStack = fis, }; } return s; }); var elementPlusInfo = root.DescendantsAndSelf().PtZip(r, (t1, t2) => { return new { Element = t1, Id = t2.Id, WmlFieldInfoStack = t2.FiStack, }; }); foreach (var item in elementPlusInfo) { if (item.WmlFieldInfoStack != null) item.Element.AddAnnotation(item.WmlFieldInfoStack); } //This code is useful when you want to take a look at the annotations, making sure that they are made correctly. // //foreach (var desc in root.DescendantsAndSelf()) //{ // Stack s = desc.Annotation>(); // if (s != null) // { // Console.WriteLine(desc.Name.LocalName.PadRight(20)); // foreach (var item in s) // { // Console.WriteLine(" {0:0000} {1}", item.Id, item.FieldElementType.ToString()); // Console.ReadKey(); // } // } //} var cachedAnnotationInformation = new Dictionary>(); foreach (var desc in root.DescendantsTrimmed(d => d.Name == W.rPr || d.Name == W.pPr)) { Stack s = desc.Annotation>(); if (s != null ) { foreach (var item in s) { if (item.FieldElementType == FieldElementTypeEnum.InstrText) { if (cachedAnnotationInformation.ContainsKey(item.Id)) { cachedAnnotationInformation[item.Id].Add(desc); } else { cachedAnnotationInformation.Add(item.Id, new List() { desc }); } } } } } root.AddAnnotation(cachedAnnotationInformation); } private enum State { InToken, InWhiteSpace, InQuotedToken, OnOpeningQuote, OnClosingQuote, OnBackslash, } private static string[] GetTokens(string field) { State state = State.InWhiteSpace; int tokenStart = 0; char quoteStart = char.MinValue; List tokens = new List(); for (int c = 0; c < field.Length; c++) { if (Char.IsWhiteSpace(field[c])) { if (state == State.InToken) { tokens.Add(field.Substring(tokenStart, c - tokenStart)); state = State.InWhiteSpace; continue; } if (state == State.OnOpeningQuote) { tokenStart = c; state = State.InQuotedToken; } if (state == State.OnClosingQuote) state = State.InWhiteSpace; continue; } if (field[c] == '\\') { if (state == State.InQuotedToken) { state = State.OnBackslash; continue; } } if (state == State.OnBackslash) { state = State.InQuotedToken; continue; } if (field[c] == '"' || field[c] == '\'' || field[c] == 0x201d) { if (state == State.InWhiteSpace) { quoteStart = field[c]; state = State.OnOpeningQuote; continue; } if (state == State.InQuotedToken) { if (field[c] == quoteStart) { tokens.Add(field.Substring(tokenStart, c - tokenStart)); state = State.OnClosingQuote; continue; } continue; } if (state == State.OnOpeningQuote) { if (field[c] == quoteStart) { state = State.OnClosingQuote; continue; } else { tokenStart = c; state = State.InQuotedToken; continue; } } continue; } if (state == State.InWhiteSpace) { tokenStart = c; state = State.InToken; continue; } if (state == State.OnOpeningQuote) { tokenStart = c; state = State.InQuotedToken; continue; } if (state == State.OnClosingQuote) { tokenStart = c; state = State.InToken; continue; } } if (state == State.InToken) tokens.Add(field.Substring(tokenStart, field.Length - tokenStart)); return tokens.ToArray(); } public static FieldInfo ParseField(string field) { FieldInfo emptyField = new FieldInfo { FieldType = "", Arguments = new string[] { }, Switches = new string[] { }, }; if (field.Length == 0) return emptyField; string fieldType = field.TrimStart().Split(' ').FirstOrDefault(); if (fieldType == null) return emptyField; if (fieldType.ToUpper() != "HYPERLINK" && fieldType.ToUpper() != "REF" && fieldType.ToUpper() != "SEQ" && fieldType.ToUpper() != "STYLEREF") return emptyField; string[] tokens = GetTokens(field); if (tokens.Length == 0) return emptyField; FieldInfo fieldInfo = new FieldInfo() { FieldType = tokens[0], Switches = tokens.Where(t => t[0] == '\\').ToArray(), Arguments = tokens.Skip(1).Where(t => t[0] != '\\').ToArray(), }; return fieldInfo; } public class FieldInfo { public string FieldType; public string[] Switches; public string[] Arguments; } public enum FieldElementTypeEnum { Begin, InstrText, Separate, Result, End, }; public class FieldElementTypeInfo { public int Id; public FieldElementTypeEnum FieldElementType; } public class FieldElementTypeStack { public int Id; public Stack FiStack; } } }