SmlToHtmlConverter.cs 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. using System;
  4. using System.Collections.Generic;
  5. using System.Diagnostics.CodeAnalysis;
  6. using System.Drawing;
  7. using System.Globalization;
  8. using System.Linq;
  9. using System.Text;
  10. using System.Xml.Linq;
  11. using DocumentFormat.OpenXml.Packaging;
  12. using System.IO;
  13. namespace OpenXmlPowerTools
  14. {
  15. public partial class SmlDocument
  16. {
  17. [SuppressMessage("ReSharper", "UnusedMember.Global")]
  18. public XElement ConvertToHtml(SmlToHtmlConverterSettings htmlConverterSettings, string tableName)
  19. {
  20. return SmlToHtmlConverter.ConvertTableToHtml(this, htmlConverterSettings, tableName);
  21. }
  22. [SuppressMessage("ReSharper", "UnusedMember.Global")]
  23. public XElement ConvertTableToHtml(string tableName)
  24. {
  25. SmlToHtmlConverterSettings settings = new SmlToHtmlConverterSettings();
  26. return SmlToHtmlConverter.ConvertTableToHtml(this, settings, tableName);
  27. }
  28. }
  29. [SuppressMessage("ReSharper", "FieldCanBeMadeReadOnly.Global")]
  30. public class SmlToHtmlConverterSettings
  31. {
  32. public string PageTitle;
  33. public string CssClassPrefix;
  34. public bool FabricateCssClasses;
  35. public string GeneralCss;
  36. public string AdditionalCss;
  37. public SmlToHtmlConverterSettings()
  38. {
  39. PageTitle = "";
  40. CssClassPrefix = "pt-";
  41. FabricateCssClasses = true;
  42. GeneralCss = "span { white-space: pre-wrap; }";
  43. AdditionalCss = "";
  44. }
  45. public SmlToHtmlConverterSettings(SmlToHtmlConverterSettings htmlConverterSettings)
  46. {
  47. PageTitle = htmlConverterSettings.PageTitle;
  48. CssClassPrefix = htmlConverterSettings.CssClassPrefix;
  49. FabricateCssClasses = htmlConverterSettings.FabricateCssClasses;
  50. GeneralCss = htmlConverterSettings.GeneralCss;
  51. AdditionalCss = htmlConverterSettings.AdditionalCss;
  52. }
  53. }
  54. public static class SmlToHtmlConverter
  55. {
  56. // ***********************************************************************************************************************************
  57. #region PublicApis
  58. public static XElement ConvertTableToHtml(SmlDocument smlDoc, SmlToHtmlConverterSettings settings, string tableName)
  59. {
  60. using (MemoryStream ms = new MemoryStream())
  61. {
  62. ms.Write(smlDoc.DocumentByteArray, 0, smlDoc.DocumentByteArray.Length);
  63. using (SpreadsheetDocument sDoc = SpreadsheetDocument.Open(ms, false))
  64. {
  65. var rangeXml = SmlDataRetriever.RetrieveTable(sDoc, tableName);
  66. var xhtml = SmlToHtmlConverter.ConvertToHtmlInternal(sDoc, settings, rangeXml);
  67. return xhtml;
  68. }
  69. }
  70. }
  71. public static XElement ConvertTableToHtml(SpreadsheetDocument sDoc, SmlToHtmlConverterSettings settings, string tableName)
  72. {
  73. var rangeXml = SmlDataRetriever.RetrieveTable(sDoc, tableName);
  74. var xhtml = SmlToHtmlConverter.ConvertToHtmlInternal(sDoc, settings, rangeXml);
  75. return xhtml;
  76. }
  77. #endregion
  78. // ***********************************************************************************************************************************
  79. private static XElement ConvertToHtmlInternal(SpreadsheetDocument sDoc, SmlToHtmlConverterSettings htmlConverterSettings, XElement rangeXml)
  80. {
  81. XElement xhtml = (XElement)ConvertToHtmlTransform(sDoc, htmlConverterSettings, rangeXml);
  82. ReifyStylesAndClasses(htmlConverterSettings, xhtml);
  83. // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
  84. // XEntity. PtOpenXmlUtil.cs define the XEntity class. See
  85. // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
  86. // for detailed explanation.
  87. //
  88. // If you further transform the XML tree returned by ConvertToHtmlTransform, you
  89. // must do it correctly, or entities will not be serialized properly.
  90. return xhtml;
  91. }
  92. private static XNode ConvertToHtmlTransform(SpreadsheetDocument sDoc, SmlToHtmlConverterSettings htmlConverterSettings, XNode node)
  93. {
  94. var element = node as XElement;
  95. if (element != null)
  96. {
  97. return new XElement(element.Name,
  98. element.Attributes(),
  99. element.Nodes().Select(n => ConvertToHtmlTransform(sDoc, htmlConverterSettings, n)));
  100. }
  101. return node;
  102. }
  103. private static void ReifyStylesAndClasses(SmlToHtmlConverterSettings htmlConverterSettings, XElement xhtml)
  104. {
  105. if (htmlConverterSettings.FabricateCssClasses)
  106. {
  107. var usedCssClassNames = new HashSet<string>();
  108. var elementsThatNeedClasses = xhtml
  109. .DescendantsAndSelf()
  110. .Select(d => new
  111. {
  112. Element = d,
  113. Styles = d.Annotation<Dictionary<string, string>>(),
  114. })
  115. .Where(z => z.Styles != null);
  116. var augmented = elementsThatNeedClasses
  117. .Select(p => new
  118. {
  119. p.Element,
  120. p.Styles,
  121. StylesString = p.Element.Name.LocalName + "|" + p.Styles.OrderBy(k => k.Key).Select(s => string.Format("{0}: {1};", s.Key, s.Value)).StringConcatenate(),
  122. })
  123. .GroupBy(p => p.StylesString)
  124. .ToList();
  125. int classCounter = 1000000;
  126. var sb = new StringBuilder();
  127. sb.Append(Environment.NewLine);
  128. foreach (var grp in augmented)
  129. {
  130. string classNameToUse;
  131. var firstOne = grp.First();
  132. var styles = firstOne.Styles;
  133. if (styles.ContainsKey("PtStyleName"))
  134. {
  135. classNameToUse = htmlConverterSettings.CssClassPrefix + styles["PtStyleName"];
  136. if (usedCssClassNames.Contains(classNameToUse))
  137. {
  138. classNameToUse = htmlConverterSettings.CssClassPrefix +
  139. styles["PtStyleName"] + "-" +
  140. classCounter.ToString().Substring(1);
  141. classCounter++;
  142. }
  143. }
  144. else
  145. {
  146. classNameToUse = htmlConverterSettings.CssClassPrefix +
  147. classCounter.ToString().Substring(1);
  148. classCounter++;
  149. }
  150. usedCssClassNames.Add(classNameToUse);
  151. sb.Append(firstOne.Element.Name.LocalName + "." + classNameToUse + " {" + Environment.NewLine);
  152. foreach (var st in firstOne.Styles.Where(s => s.Key != "PtStyleName"))
  153. {
  154. var s = " " + st.Key + ": " + st.Value + ";" + Environment.NewLine;
  155. sb.Append(s);
  156. }
  157. sb.Append("}" + Environment.NewLine);
  158. var classAtt = new XAttribute("class", classNameToUse);
  159. foreach (var gc in grp)
  160. gc.Element.Add(classAtt);
  161. }
  162. var styleValue = htmlConverterSettings.GeneralCss + sb + htmlConverterSettings.AdditionalCss;
  163. SetStyleElementValue(xhtml, styleValue);
  164. }
  165. else
  166. {
  167. // Previously, the h:style element was not added at this point. However,
  168. // at least the General CSS will contain important settings.
  169. SetStyleElementValue(xhtml, htmlConverterSettings.GeneralCss + htmlConverterSettings.AdditionalCss);
  170. foreach (var d in xhtml.DescendantsAndSelf())
  171. {
  172. var style = d.Annotation<Dictionary<string, string>>();
  173. if (style == null)
  174. continue;
  175. var styleValue =
  176. style
  177. .Where(p => p.Key != "PtStyleName")
  178. .OrderBy(p => p.Key)
  179. .Select(e => string.Format("{0}: {1};", e.Key, e.Value))
  180. .StringConcatenate();
  181. XAttribute st = new XAttribute("style", styleValue);
  182. if (d.Attribute("style") != null)
  183. d.Attribute("style").Value += styleValue;
  184. else
  185. d.Add(st);
  186. }
  187. }
  188. }
  189. private static void SetStyleElementValue(XElement xhtml, string styleValue)
  190. {
  191. var styleElement = xhtml
  192. .Descendants(Xhtml.style)
  193. .FirstOrDefault();
  194. if (styleElement != null)
  195. styleElement.Value = styleValue;
  196. else
  197. {
  198. styleElement = new XElement(Xhtml.style, styleValue);
  199. var head = xhtml.Element(Xhtml.head);
  200. if (head != null)
  201. head.Add(styleElement);
  202. }
  203. }
  204. private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc,
  205. WmlToHtmlConverterSettings settings, XNode node)
  206. {
  207. // Ignore element.
  208. return null;
  209. }
  210. private static readonly HashSet<string> UnknownFonts = new HashSet<string>();
  211. private static HashSet<string> _knownFamilies;
  212. private static HashSet<string> KnownFamilies
  213. {
  214. get
  215. {
  216. if (_knownFamilies == null)
  217. {
  218. _knownFamilies = new HashSet<string>();
  219. var families = FontFamily.Families;
  220. foreach (var fam in families)
  221. _knownFamilies.Add(fam.Name);
  222. }
  223. return _knownFamilies;
  224. }
  225. }
  226. private static readonly Dictionary<string, string> FontFallback = new Dictionary<string, string>()
  227. {
  228. { "Arial", @"'{0}', 'sans-serif'" },
  229. { "Arial Narrow", @"'{0}', 'sans-serif'" },
  230. { "Arial Rounded MT Bold", @"'{0}', 'sans-serif'" },
  231. { "Arial Unicode MS", @"'{0}', 'sans-serif'" },
  232. { "Baskerville Old Face", @"'{0}', 'serif'" },
  233. { "Berlin Sans FB", @"'{0}', 'sans-serif'" },
  234. { "Berlin Sans FB Demi", @"'{0}', 'sans-serif'" },
  235. { "Calibri Light", @"'{0}', 'sans-serif'" },
  236. { "Gill Sans MT", @"'{0}', 'sans-serif'" },
  237. { "Gill Sans MT Condensed", @"'{0}', 'sans-serif'" },
  238. { "Lucida Sans", @"'{0}', 'sans-serif'" },
  239. { "Lucida Sans Unicode", @"'{0}', 'sans-serif'" },
  240. { "Segoe UI", @"'{0}', 'sans-serif'" },
  241. { "Segoe UI Light", @"'{0}', 'sans-serif'" },
  242. { "Segoe UI Semibold", @"'{0}', 'sans-serif'" },
  243. { "Tahoma", @"'{0}', 'sans-serif'" },
  244. { "Trebuchet MS", @"'{0}', 'sans-serif'" },
  245. { "Verdana", @"'{0}', 'sans-serif'" },
  246. { "Book Antiqua", @"'{0}', 'serif'" },
  247. { "Bookman Old Style", @"'{0}', 'serif'" },
  248. { "Californian FB", @"'{0}', 'serif'" },
  249. { "Cambria", @"'{0}', 'serif'" },
  250. { "Constantia", @"'{0}', 'serif'" },
  251. { "Garamond", @"'{0}', 'serif'" },
  252. { "Lucida Bright", @"'{0}', 'serif'" },
  253. { "Lucida Fax", @"'{0}', 'serif'" },
  254. { "Palatino Linotype", @"'{0}', 'serif'" },
  255. { "Times New Roman", @"'{0}', 'serif'" },
  256. { "Wide Latin", @"'{0}', 'serif'" },
  257. { "Courier New", @"'{0}'" },
  258. { "Lucida Console", @"'{0}'" },
  259. };
  260. private static void CreateFontCssProperty(string font, Dictionary<string, string> style)
  261. {
  262. if (FontFallback.ContainsKey(font))
  263. {
  264. style.AddIfMissing("font-family", string.Format(FontFallback[font], font));
  265. return;
  266. }
  267. style.AddIfMissing("font-family", font);
  268. }
  269. private static bool GetBoolProp(XElement runProps, XName xName)
  270. {
  271. var p = runProps.Element(xName);
  272. if (p == null)
  273. return false;
  274. var v = p.Attribute(W.val);
  275. if (v == null)
  276. return true;
  277. var s = v.Value.ToLower();
  278. if (s == "0" || s == "false")
  279. return false;
  280. if (s == "1" || s == "true")
  281. return true;
  282. return false;
  283. }
  284. }
  285. }