CrazyIter 4 年 前
コミット
db44cc3c21

+ 43 - 0
HTEXLib/COMM/Helpers/CollectionHelper.cs

@@ -0,0 +1,43 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Text;
+
+namespace HTEXLib.COMM.Helpers
+{
+    public static class CollectionHelper
+    {
+        /// <summary>
+        /// 判断集合是否为空
+        /// </summary>
+        /// <param name="collection"></param>
+        /// <returns></returns>
+        public static bool IsEmpty(this ICollection collection)
+        {
+            if (collection != null && collection.Count > 0)
+            {
+                return false;
+            }
+            else
+            {
+                return true;
+            }
+        }
+        /// <summary>
+        /// 判断集合是否不为空
+        /// </summary>
+        /// <param name="collection"></param>
+        /// <returns></returns>
+        public static bool IsNotEmpty(this ICollection collection)
+        {
+            if (collection != null && collection.Count > 0)
+            {
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}

+ 395 - 0
HTEXLib/COMM/Helpers/ContentTypeDict.cs

@@ -0,0 +1,395 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace HTEXLib.COMM.Helpers
+{
+    public static class ContentTypeDict
+    {
+        public static readonly Dictionary<string, string> extdict = new Dictionary<string, string> {
+            //{ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",".xlsx"},
+            { "audio/x-ms-wma",".wma"},
+            {"video/3gpp",".3g2"},
+{"audio/mp4",".aac"},
+{"audio/ac3",".ac3"},
+{"text/x-adasrc",".adb"},
+{"image/x-applix-graphics",".ag"},
+{"audio/amr",".amr"},
+{"audio/x-ape",".ape"},
+{"image/x-sony-arw",".arw"},
+{"text/plain",".asc"},
+{"video/x-ms-asf",".asf"},
+{"text/x-ssa",".ass"},
+{"video/x-msvideo",".avi"},
+{"text/x-bibtex",".bib"},
+{"image/bmp",".bmp"},
+{"text/x-csrc",".c"},
+{"text/x-c++src",".c++"},
+{"image/cgm",".cgm"},
+{"text/x-tex",".cls"},
+{"text/x-cmake",".cmake"},
+{"image/x-canon-cr2",".cr2"},
+{"image/x-canon-crw",".crw"},
+{"text/x-csharp",".cs"},
+{"text/css",".css"},
+{"text/csv",".csv"},
+{"image/x-win-bitmap",".cur"},
+{"text/x-dsrc",".d"},
+{"text/x-dcl",".dcl"},
+{"image/x-kodak-dcr",".dcr"},
+{"image/x-dds",".dds"},
+{"text/x-patch",".diff"},
+{"image/vnd.djvu",".djv"},
+{"image/x-adobe-dng",".dng"},
+{"text/vnd.graphviz",".dot"},
+{"text/x-dsl",".dsl"},
+{"image/vnd.dwg",".dwg"},
+{"image/vnd.dxf",".dxf"},
+{"text/x-eiffel",".e"},
+{"text/x-emacs-lisp",".el"},
+{"image/x-emf",".emf"},
+{"image/x-eps",".eps"},
+{"image/x-bzeps",".eps.bz2"},
+{"image/x-gzeps",".eps.gz"},
+{"text/x-erlang",".erl"},
+{"text/x-setext",".etx"},
+{"image/x-exr",".exr"},
+{"text/x-fortran",".f"},
+{"image/x-xfig",".fig"},
+{"image/fits",".fits"},
+{"video/x-flv",".flv"},
+{"text/x-xslfo",".fo"},
+{"image/fax-g3",".g3"},
+{"text/directory",".gcrd"},
+{"image/gif",".gif"},
+{"text/x-google-video-pointer",".gvp"},
+{"text/x-chdr",".h"},
+{"text/x-c++hdr",".h++"},
+{"text/x-haskell",".hs"},
+{"text/html",".htm"},
+{"image/x-tga",".icb"},
+{"image/x-icns",".icns"},
+{"image/x-icon",".ico"},
+{"text/calendar",".ics"},
+{"text/x-idl",".idl"},
+{"image/ief",".ief"},
+{"image/x-iff",".iff"},
+{"image/x-ilbm",".ilbm"},
+{"text/x-imelody",".ime"},
+{"text/x-iptables",".iptables"},
+{"image/jp2",".j2k"},
+{"text/vnd.sun.j2me.app-descriptor",".jad"},
+{"text/x-java",".java"},
+{"image/x-jng",".jng"},
+{"image/jpeg",".jpeg"},
+{"application/javascript",".js"},
+{"application/json",".json"},
+{"application/jsonp",".jsonp"},
+{"image/x-kodak-k25",".k25"},
+{"image/x-kodak-kdc",".kdc"},
+{"text/x-ldif",".ldif"},
+{"text/x-literate-haskell",".lhs"},
+{"text/x-log",".log"},
+{"text/x-lua",".lua"},
+{"image/x-lwo",".lwo"},
+{"image/x-lws",".lws"},
+{"text/x-lilypond",".ly"},
+{"text/x-matlab",".m"},
+{"video/mpeg",".m2t"},
+{"audio/x-mpegurl",".m3u"},
+{"application/vnd.apple.mpegurl",".m3u8"},
+{"text/x-troff-me",".me"},
+{"video/x-matroska",".mkv"},
+{"text/x-ocaml",".ml"},
+{"text/x-troff-mm",".mm"},
+{"text/mathml",".mml"},
+{"text/x-moc",".moc"},
+{"text/x-mof",".mof"},
+{"audio/mpeg",".mp3"},
+{"video/mp4",".mp4"},
+{"text/x-mrml",".mrl"},
+{"image/x-minolta-mrw",".mrw"},
+{"text/x-troff-ms",".ms"},
+{"image/x-msod",".msod"},
+{"text/x-mup",".mup"},
+{"image/x-nikon-nef",".nef"},
+{"text/x-nfo",".nfo"},
+{"text/x-ocl",".ocl"},
+{"text/x-opml+xml",".opml"},
+{"image/openraster",".ora"},
+{"image/x-olympus-orf",".orf"},
+{"text/x-pascal",".p"},
+{"image/x-portable-bitmap",".pbm"},
+{"image/x-photo-cd",".pcd"},
+{"image/x-pcx",".pcx"},
+{"application/pdf",".pdf"},
+{"image/x-pentax-pef",".pef"},
+{"image/x-portable-graymap",".pgm"},
+{"image/x-pict",".pict"},
+{"image/png",".png"},
+{"image/x-portable-anymap",".pnm"},
+{"image/x-macpaint",".pntg"},
+{"text/x-gettext-translation",".po"},
+{"text/x-gettext-translation-template",".pot"},
+{"text/x-python",".py"},
+{"image/x-quicktime",".qif"},
+{"image/x-fuji-raf",".raf"},
+{"image/x-cmu-raster",".ras"},
+{"image/x-panasonic-raw",".raw"},
+{"text/x-ms-regedit",".reg"},
+{"image/x-rgb",".rgb"},
+{"image/rle",".rle"},
+{"text/troff",".roff"},
+{"image/vnd.rn-realpix",".rp"},
+{"text/vnd.rn-realtext",".rt"},
+{"text/richtext",".rtx"},
+{"text/x-scheme",".scm"},
+{"image/x-sgi",".sgi"},
+{"text/sgml",".sgm"},
+{"image/x-skencil",".sk"},
+{"text/spreadsheet",".slk"},
+{"text/x-rpm-spec",".spec"},
+{"text/x-sql",".sql"},
+{"image/x-sony-sr2",".sr2"},
+{"image/x-sony-srf",".srf"},
+{"text/x-subviewer",".sub"},
+{"image/x-sun-raster",".sun"},
+{"image/svg+xml",".svg"},
+{"image/svg+xml-compressed",".svgz"},
+{"text/x-txt2tags",".t2t"},
+{"text/x-tcl",".tcl"},
+{"text/x-texinfo",".texi"},
+{"image/tiff",".tif"},
+{"audio/x-voc",".voc"},
+{"image/x-wmf",".wmf"},
+{"text/vnd.wap.wml",".wml"},
+{"text/vnd.wap.wmlscript",".wmls"},
+{"video/x-ms-wmv",".wmv"},
+{"application/xhtml+xml",".xhtml"},
+{"application/xml",".xml"}
+        };
+
+        public static readonly Dictionary<string, string> dict = new Dictionary<string, string> {
+            {".3g2", "video/3gpp"},
+            {".3ga", "video/3gpp"},
+            {".3gp", "video/3gpp"},
+            {".aac", "audio/mp4"},
+            {".ac3", "audio/ac3"},
+            {".adb", "text/x-adasrc"},
+            {".ads", "text/x-adasrc"},
+            {".ag", "image/x-applix-graphics"},
+            {".amr", "audio/amr"},
+            {".ape", "audio/x-ape"},
+            {".arw", "image/x-sony-arw"},
+            {".asc", "text/plain"},
+            {".asf", "video/x-ms-asf"},
+            {".ass", "text/x-ssa"},
+            {".avi", "video/x-msvideo"},
+            {".bib", "text/x-bibtex"},
+            {".bmp", "image/bmp"},
+            {".c", "text/x-csrc"},
+            {".c++", "text/x-c++src"},
+            {".cc", "text/x-c++src"},
+            {".cgm", "image/cgm"},
+            {".cls", "text/x-tex"},
+            {".cmake", "text/x-cmake"},
+            {".cpp", "text/x-c++src"},
+            {".cr2", "image/x-canon-cr2"},
+            {".crw", "image/x-canon-crw"},
+            {".cs", "text/x-csharp"},
+            {".css", "text/css"},
+            {".cssl", "text/css"},
+            {".csv", "text/csv"},
+            {".cur", "image/x-win-bitmap"},
+            {".cxx", "text/x-c++src"},
+            {".d", "text/x-dsrc"},
+            {".dcl", "text/x-dcl"},
+            {".dcr", "image/x-kodak-dcr"},
+            {".dds", "image/x-dds"},
+            {".diff", "text/x-patch"},
+            {".djv", "image/vnd.djvu"},
+            {".djvu", "image/vnd.djvu"},
+            {".dng", "image/x-adobe-dng"},
+            {".dot", "text/vnd.graphviz"},
+            {".dsl", "text/x-dsl"},
+            {".dtx", "text/x-tex"},
+            {".dwg", "image/vnd.dwg"},
+            {".dxf", "image/vnd.dxf"},
+            {".e", "text/x-eiffel"},
+            {".eif", "text/x-eiffel"},
+            {".el", "text/x-emacs-lisp"},
+            {".emf", "image/x-emf"},
+            {".eps", "image/x-eps"},
+            {".eps.bz2", "image/x-bzeps"},
+            {".eps.gz", "image/x-gzeps"},
+            {".epsf", "image/x-eps"},
+            {".epsf.bz2", "image/x-bzeps"},
+            {".epsf.gz", "image/x-gzeps"},
+            {".epsi", "image/x-eps"},
+            {".epsi.bz2", "image/x-bzeps"},
+            {".epsi.gz", "image/x-gzeps"},
+            {".erl", "text/x-erlang"},
+            {".etx", "text/x-setext"},
+            {".exr", "image/x-exr"},
+            {".f", "text/x-fortran"},
+            {".f90", "text/x-fortran"},
+            {".f95", "text/x-fortran"},
+            {".fig", "image/x-xfig"},
+            {".fits", "image/fits"},
+            {".flv", "video/x-flv"},
+            {".fo", "text/x-xslfo"},
+            {".for", "text/x-fortran"},
+            {".g3", "image/fax-g3"},
+            {".gcrd", "text/directory"},
+            {".gif", "image/gif"},
+            {".gv", "text/vnd.graphviz"},
+            {".gvp", "text/x-google-video-pointer"},
+            {".h", "text/x-chdr"},
+            {".h++", "text/x-c++hdr"},
+            {".hh", "text/x-c++hdr"},
+            {".hp", "text/x-c++hdr"},
+            {".hpp", "text/x-c++hdr"},
+            {".hs", "text/x-haskell"},
+            {".htm", "text/html"},
+            {".html", "text/html"},
+            {".hxx", "text/x-c++hdr"},
+            {".icb", "image/x-tga"},
+            {".icns", "image/x-icns"},
+            {".ico", "image/x-icon"},
+            {".ics", "text/calendar"},
+            {".idl", "text/x-idl"},
+            {".ief", "image/ief"},
+            {".iff", "image/x-iff"},
+            {".ilbm", "image/x-ilbm"},
+            {".ime", "text/x-imelody"},
+            {".imy", "text/x-imelody"},
+            {".ins", "text/x-tex"},
+            {".iptables", "text/x-iptables"},
+            {".j2k", "image/jp2"},
+            {".jad", "text/vnd.sun.j2me.app-descriptor"},
+            {".java", "text/x-java"},
+            {".jng", "image/x-jng"},
+            {".jp2", "image/jp2"},
+            {".jpc", "image/jp2"},
+            {".jpe", "image/jpeg"},
+            {".jpeg", "image/jpeg"},
+            {".jpf", "image/jp2"},
+            {".jpg", "image/jpeg"},
+            {".jpx", "image/jp2"},
+            {".js", "application/javascript"},
+            {".json", "application/json"},
+            {".jsonp", "application/jsonp"},
+            {".k25", "image/x-kodak-k25"},
+            {".kdc", "image/x-kodak-kdc"},
+            {".latex", "text/x-tex"},
+            {".ldif", "text/x-ldif"},
+            {".lhs", "text/x-literate-haskell"},
+            {".log", "text/x-log"},
+            {".ltx", "text/x-tex"},
+            {".lua", "text/x-lua"},
+            {".lwo", "image/x-lwo"},
+            {".lwob", "image/x-lwo"},
+            {".lws", "image/x-lws"},
+            {".ly", "text/x-lilypond"},
+            {".m", "text/x-matlab"},
+            {".m2t", "video/mpeg"},
+            {".m3u", "audio/x-mpegurl"},
+            {".m3u8", "application/vnd.apple.mpegurl"},
+            {".me", "text/x-troff-me"},
+            {".mkv", "video/x-matroska"},
+            {".ml", "text/x-ocaml"},
+            {".mli", "text/x-ocaml"},
+            {".mm", "text/x-troff-mm"},
+            {".mml", "text/mathml"},
+            {".moc", "text/x-moc"},
+            {".mof", "text/x-mof"},
+            {".mp2", "video/mpeg"},
+            {".mp3", "audio/mpeg"},
+            {".mp4", "video/mp4"},
+            {".mpe", "video/mpeg"},
+            {".mpeg", "video/mpeg"},
+            {".mpg", "video/mpeg"},
+            {".mrl", "text/x-mrml"},
+            {".mrml", "text/x-mrml"},
+            {".mrw", "image/x-minolta-mrw"},
+            {".ms", "text/x-troff-ms"},
+            {".msod", "image/x-msod"},
+            {".mup", "text/x-mup"},
+            {".nef", "image/x-nikon-nef"},
+            {".nfo", "text/x-nfo"},
+            {".not", "text/x-mup"},
+            {".ocl", "text/x-ocl"},
+            {".opml", "text/x-opml+xml"},
+            {".ora", "image/openraster"},
+            {".orf", "image/x-olympus-orf"},
+            {".p", "text/x-pascal"},
+            {".pas", "text/x-pascal"},
+            {".patch", "text/x-patch"},
+            {".pbm", "image/x-portable-bitmap"},
+            {".pcd", "image/x-photo-cd"},
+            {".pcx", "image/x-pcx"},
+            {".pdf", "application/pdf"},
+            {".pef", "image/x-pentax-pef"},
+            {".pgm", "image/x-portable-graymap"},
+            {".pict", "image/x-pict"},
+            {".pict1", "image/x-pict"},
+            {".pict2", "image/x-pict"},
+            {".png", "image/png"},
+            {".pnm", "image/x-portable-anymap"},
+            {".pntg", "image/x-macpaint"},
+            {".po", "text/x-gettext-translation"},
+            {".pot", "text/x-gettext-translation-template"},
+            {".py", "text/x-python"},
+            {".qif", "image/x-quicktime"},
+            {".qtif", "image/x-quicktime"},
+            {".raf", "image/x-fuji-raf"},
+            {".ras", "image/x-cmu-raster"},
+            {".raw", "image/x-panasonic-raw"},
+            {".reg", "text/x-ms-regedit"},
+            {".rgb", "image/x-rgb"},
+            {".rle", "image/rle"},
+            {".roff", "text/troff"},
+            {".rp", "image/vnd.rn-realpix"},
+            {".rt", "text/vnd.rn-realtext"},
+            {".rtx", "text/richtext"},
+            {".scm", "text/x-scheme"},
+            {".sgi", "image/x-sgi"},
+            {".sgm", "text/sgml"},
+            {".sgml", "text/sgml"},
+            {".sk", "image/x-skencil"},
+            {".sk1", "image/x-skencil"},
+            {".slk", "text/spreadsheet"},
+            {".spec", "text/x-rpm-spec"},
+            {".sql", "text/x-sql"},
+            {".sr2", "image/x-sony-sr2"},
+            {".srf", "image/x-sony-srf"},
+            {".ssa", "text/x-ssa"},
+            {".sty", "text/x-tex"},
+            {".sub", "text/x-subviewer"},
+            {".sun", "image/x-sun-raster"},
+            {".svg", "image/svg+xml"},
+            {".svgz", "image/svg+xml-compressed"},
+            {".sylk", "text/spreadsheet"},
+            {".t2t", "text/x-txt2tags"},
+            {".tcl", "text/x-tcl"},
+            {".tex", "text/x-tex"},
+            {".texi", "text/x-texinfo"},
+            {".texinfo", "text/x-texinfo"},
+            {".tga", "image/x-tga"},
+            {".tif", "image/tiff"},
+            {".tiff", "image/tiff"},
+            {".tk", "text/x-tcl"},
+            {".tpic", "image/x-tga"},
+            {".tr", "text/troff"},
+            {".txt", "text/plain"},
+            {".vob", "video/mpeg"},
+            {".voc", "audio/x-voc"},
+            {".wmf", "image/x-wmf"},
+            {".wml", "text/vnd.wap.wml"},
+            {".wmls", "text/vnd.wap.wmlscript"},
+            {".wmv", "video/x-ms-wmv"},
+            {".xhtml", "application/xhtml+xml"},
+            {".xml", "application/xml"}
+        };
+    }
+}

+ 85 - 0
HTEXLib/COMM/Helpers/HtmlHelper.cs

@@ -0,0 +1,85 @@
+using HtmlAgilityPack;
+using System;
+using System.Collections.Generic;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace HTEXLib.COMM.Helpers
+{
+    public class HtmlHelper
+    {
+        public static string DoUselessTag(string str)
+        {
+            if (str.StartsWith("</p>"))
+            {
+                str = str.Substring(4);
+            }
+            if (str.EndsWith("<p>"))
+            {
+                str = str.Substring(0, str.Length - 1 - 2);
+            }
+            if (str.EndsWith("<p >"))
+            {
+                str = str.Substring(0, str.Length - 1 - 3);
+            }
+            if (str.EndsWith("<p  >"))
+            {
+                str = str.Substring(0, str.Length - 1 - 4);
+            }
+            if (str.StartsWith("<p >") && !str.Contains("</p>"))
+            {
+                str = str.Replace("<p >", "");
+            }
+            if (str.StartsWith("<p  >") && !str.Contains("</p>"))
+            {
+                str = str.Replace("<p  >", "");
+            }
+            if (str.StartsWith("<p>") && !str.Contains("</p>"))
+            {
+                str = str.Replace("<p>", "");
+            }
+            if (str.EndsWith("</p>") && !str.Contains("<p>") && !str.Contains("<p >") && !str.Contains("<p  >"))
+            {
+                str = str.Replace("</p>", "");
+            }
+            str = Regex.Replace(str, "<p([^>]{0,})>\\s*</p>", "");
+            return str;
+        }
+
+        /// <summary>
+        /// 从html获取文本及img的url  去掉其他标签的干扰,空格。以获取更准确的sha1校验值
+        /// </summary>
+        /// <param name="html"></param>
+        /// <returns></returns>
+        public static string DoTextImg(string html)
+        {
+            HtmlDocument doc = new HtmlDocument();
+            doc.LoadHtml(html);
+            List<string> urls = GetHtmlImageUrlList(html);
+            StringBuilder builder = new StringBuilder(doc.DocumentNode.InnerText.Replace("&nbsp;", ""));
+            if (urls.IsNotEmpty())
+            {
+                foreach (string url in urls)
+                {
+                    builder.Append(url);
+                }
+            }
+            return builder.ToString();
+        }
+
+        public static List<string> GetHtmlImageUrlList(string sHtmlText)
+        {
+            // 定义正则表达式用来匹配 img 标签   
+            Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);
+
+            // 搜索匹配的字符串   
+            MatchCollection matches = regImg.Matches(sHtmlText);
+            //string[] sUrlList = new string[matches.Count];
+            List<string> urls = new List<string>();
+            // 取得匹配项列表   
+            foreach (Match match in matches)
+                urls.Add(match.Groups["imgUrl"].Value);
+            return urls;
+        }
+    }
+}

+ 230 - 0
HTEXLib/COMM/Helpers/StringHelper.cs

@@ -0,0 +1,230 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+using System.Text.Json;
+
+namespace HTEXLib.COMM.Helpers
+{
+    public class StringHelper
+    {
+        #region 截取字符串
+        public static string GetSubString(string pSrcString, int pLength, string pTailString)
+        {
+            return GetSubString(pSrcString, 0, pLength, pTailString);
+        }
+        public static string GetSubString(string pSrcString, int pStartIndex, int pLength, string pTailString)
+        {
+            string str = pSrcString;
+            byte[] bytes = Encoding.UTF8.GetBytes(pSrcString);
+            foreach (char ch in Encoding.UTF8.GetChars(bytes))
+            {
+                if (((ch > 'ࠀ') && (ch < '一')) || ((ch > 0xac00) && (ch < 0xd7a3)))
+                {
+                    if (pStartIndex >= pSrcString.Length)
+                    {
+                        return "";
+                    }
+                    return pSrcString.Substring(pStartIndex, ((pLength + pStartIndex) > pSrcString.Length) ? (pSrcString.Length - pStartIndex) : pLength);
+                }
+            }
+            if (pLength < 0)
+            {
+                return str;
+            }
+            byte[] sourceArray = Encoding.Default.GetBytes(pSrcString);
+            if (sourceArray.Length <= pStartIndex)
+            {
+                return str;
+            }
+            int length = sourceArray.Length;
+            if (sourceArray.Length > (pStartIndex + pLength))
+            {
+                length = pLength + pStartIndex;
+            }
+            else
+            {
+                pLength = sourceArray.Length - pStartIndex;
+                pTailString = "";
+            }
+            int num2 = pLength;
+            int[] numArray = new int[pLength];
+            byte[] destinationArray = null;
+            int num3 = 0;
+            for (int i = pStartIndex; i < length; i++)
+            {
+                if (sourceArray[i] > 0x7f)
+                {
+                    num3++;
+                    if (num3 == 3)
+                    {
+                        num3 = 1;
+                    }
+                }
+                else
+                {
+                    num3 = 0;
+                }
+                numArray[i] = num3;
+            }
+            if ((sourceArray[length - 1] > 0x7f) && (numArray[pLength - 1] == 1))
+            {
+                num2 = pLength + 1;
+            }
+            destinationArray = new byte[num2];
+            Array.Copy(sourceArray, pStartIndex, destinationArray, 0, num2);
+            return (Encoding.Default.GetString(destinationArray) + pTailString);
+        }
+        #endregion
+
+        /// <summary>
+        /// 将字符串转换为int类型数组
+        /// </summary>
+        /// <param name="str">如1,2,3,4,5</param>
+        /// <returns></returns>
+        public static List<string> StrToListString(string str)
+        {
+            var list = new List<string>();
+            if (!str.Contains(","))
+            {
+                list.Add(str);
+                return list;
+            }
+            var slist = str.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
+            foreach (var item in slist)
+            {
+                list.Add(item);
+            }
+            return list;
+        }
+
+        /// <summary>
+        /// 截取两个字符串中间的字符串
+        /// </summary>
+        /// <param name="sourse"></param>
+        /// <param name="startstr"></param>
+        /// <param name="endstr"></param>
+        /// <returns></returns>
+        public static string SubMidString(string sourse, string startstr, string endstr)
+        {
+            string result = string.Empty;
+            int startindex, endindex;
+            startindex = sourse.IndexOf(startstr);
+            if (startindex == -1)
+                return result;
+            string tmpstr = sourse.Substring(startindex + startstr.Length);
+            endindex = tmpstr.IndexOf(endstr);
+            if (endindex == -1)
+                return result;
+            result = tmpstr.Remove(endindex);
+            return result;
+        }
+        public static double[] ListTodouble(List<double> list)
+        {
+            Double[] doubles = new Double[list.Count];
+            int k = 0;
+            foreach (double p in list)
+            {
+                doubles[k] = p;
+                k++;
+            }
+            //doubles = list;
+            if (doubles == null)
+            {
+                return null;
+            }
+            double[] result = new double[doubles.Length];
+            for (int i = 0; i < doubles.Length; i++)
+            {
+                result[i] = doubles[i];
+            }
+            return result;
+        }
+        public static double[,] ListToDouble(List<List<double>> list)
+        {
+            int itemCount = 0;
+            //int stuCount = list.Count;
+            foreach (List<double> p in list)
+            {
+                if (p.Count != 0)
+                {
+                    itemCount = p.Count;
+                    break;
+                }
+
+            }
+            for (int k = 0; k < list.Count; k++)
+            {
+                if (list[k].Count == 0)
+                {
+                    list.Remove(list[k]);
+                }
+            }
+            double[,] doubles = new double[list.Count, itemCount];
+            for (int i = 0; i < list.Count; i++)
+            {
+                for (int j = 0; j < itemCount; j++)
+                {
+                    doubles[i, j] = list[i][j];
+                }
+            }
+            return doubles;
+        }
+
+        public static int getKeyCount(JsonElement element)
+        {
+            int keys = 0;
+            var emobj = element.EnumerateObject();
+            while (emobj.MoveNext())
+            {
+                keys++;
+            }
+            return keys;
+        }
+        public static string getEduName(string key)
+        {
+            string name = "";
+            switch (key)
+            {
+                case "0":
+                    name = "知识";
+                    break;
+                case "1":
+                    name = "理解";
+                    break;
+                case "2":
+                    name = "应用";
+                    break;
+                case "3":
+                    name = "分析";
+                    break;
+                case "4":
+                    name = "综合";
+                    break;
+                case "5":
+                    name = "评鉴";
+                    break;
+            }
+            return name;
+        }
+        public static string getTypeName(string key)
+        {
+            string name = "";
+            switch (key)
+            {
+                case "0":
+                    name = "Single";
+                    break;
+                case "1":
+                    name = "Multiple";
+                    break;
+                case "2":
+                    name = "Judge";
+                    break;
+                case "3":
+                    name = "Complete";
+                    break;
+            }
+            return name;
+        }
+    }
+}

+ 137 - 0
HTEXLib/DOCX/Convert/DocxConverter.cs

@@ -0,0 +1,137 @@
+using DocumentFormat.OpenXml.Packaging;
+using HTEXLib.COMM.Helpers;
+using HTEXLib.Helpers.ShapeHelpers;
+using OpenXmlPowerTools;
+using System;
+using System.Collections.Generic;
+using System.Drawing.Imaging;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Xml.Linq;
+
+namespace HTEXLib.DOCX.Convert
+{
+    public static  class DocxConverter
+    {
+        public static dynamic ConvertDocxToHtml(Stream fileStream, string folder)
+        {
+            string FolderName = DateTime.Now.ToString("yyyyMMdd") + "/" + folder;
+            byte[] byteArray = new byte[fileStream.Length];
+            fileStream.Read(byteArray, 0, byteArray.Length);
+            //byte[] bytes = new byte[stream.Length];
+            using (MemoryStream memoryStream = new MemoryStream())
+            {
+                memoryStream.Write(byteArray, 0, byteArray.Length);
+                using (WordprocessingDocument doc = WordprocessingDocument.Open(memoryStream, true))
+                {
+                    int imageCounter = 0;
+                    WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings()
+                    {
+                        PageTitle = "",
+                        AdditionalCss = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }",
+
+                        FabricateCssClasses = true,
+                        CssClassPrefix = "pt-",
+                        RestrictToSupportedLanguages = false,
+                        RestrictToSupportedNumberingFormats = false,
+                        ImageHandler = imageInfo =>
+                        {
+                            ++imageCounter;
+                            string extension = imageInfo.ContentType.Split('/')[1].ToLower();
+                            ImageFormat imageFormat = null;
+                            if (extension.Equals("png")) imageFormat = ImageFormat.Png;
+                            else if (extension.Equals("gif")) imageFormat = ImageFormat.Gif;
+                            else if (extension.Equals("bmp")) imageFormat = ImageFormat.Bmp;
+                            else if (extension.Equals("jpeg")) imageFormat = ImageFormat.Jpeg;
+                            else if (extension.Equals("tiff"))
+                            {
+                                extension = "gif";
+                                imageFormat = ImageFormat.Gif;
+                            }
+                            else if (extension.Equals("x-wmf"))
+                            {
+                                extension = "wmf";
+                                imageFormat = ImageFormat.Wmf;
+                            }
+
+                            if (imageFormat == null) return null;
+                            string base64 = null;
+                            string mimeType = null;
+                            string shaCode = null;
+                            try
+                            {
+                                if (extension.Equals("wmf"))
+                                {
+                                    var buffer = Encoding.Default.GetBytes(imageInfo.Mathxml);
+                                    base64 = System.Convert.ToBase64String(buffer);
+                                    mimeType = "image/svg+xml";
+                                    shaCode = ShaHashHelper.GetSHA1(new MemoryStream(buffer));
+                                }
+                                else
+                                {
+                                    ImageFormat format = imageInfo.Bitmap.RawFormat;
+                                    ImageCodecInfo codec = ImageCodecInfo.GetImageDecoders()
+                                                                .First(c => c.FormatID == format.Guid);
+                                    mimeType = codec.MimeType;
+                                    using (MemoryStream ms = new MemoryStream())
+                                    {
+
+                                        imageInfo.Bitmap.Save(ms, imageFormat);
+                                        var ba = ms.ToArray();
+                                        base64 = System.Convert.ToBase64String(ba);
+                                        shaCode = ShaHashHelper.GetSHA1(ms);
+                                    }
+                                }
+                            }
+                            catch (System.Runtime.InteropServices.ExternalException)
+                            { return null; }
+
+                            string imageSource =
+                                    string.Format("data:{0};base64,{1}", mimeType, base64);
+                            #region 处理图片存到Bolb
+                            string[] strs = imageSource.Split(',');
+                            string fileExt = StringHelper.SubMidString(strs[0], ":", ";");
+                            if (ContentTypeDict.extdict.TryGetValue(fileExt, out string ext))
+                            {
+                                fileExt = ext;
+                            }
+                            else
+                            {
+                                //解决多种扩展名不能获取的
+                                string[] sp = StringHelper.SubMidString(strs[0], "/", ";").Split('-');
+                                fileExt = sp[sp.Length - 1];
+                                sp = fileExt.Split('+');
+                                fileExt = "." + sp[sp.Length - 1];
+                            }
+                            string url = "data:" + mimeType + ";base64," + strs[1];
+                            url = url.Replace("\r\n", "");
+
+                            //Stream stream = new MemoryStream(Convert.FromBase64String(strs[1]));
+                            string filename = shaCode + fileExt;
+                            //  AzureBlobModel model = azureBlobDBRepository.UploadFileByFolderNAsyn(stream, FolderName, filename, "exercise", false);
+                            #endregion
+                            XElement img = new XElement(Xhtml.img,
+                                 new XAttribute(NoNamespace.src, url),
+                                    //  new XAttribute(NoNamespace.src, model.BlobUrl),
+                                    imageInfo.ImgStyleAttribute,
+                                    imageInfo.AltText != null ?
+                                        new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
+                            //stream.Close();
+                            return img;
+                        }
+                    };
+                    // XElement html = HtmlConverter.ConvertToHtml(doc, settings);
+                    // File.WriteAllText(@"E:\document\kk.html", html.ToStringNewLineOnAttributes());
+                    XElement htmlElement = WmlToHtmlConverter.ConvertToHtml(doc, settings);
+                    var htmls = new XDocument(new XDocumentType("html", null, null, null), htmlElement);
+                    var htmlString = htmls.ToString(SaveOptions.DisableFormatting);
+                    //引入MathJax插件
+                    htmlString = htmlString + "<script type=\"text/javascript\" src=\"http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML\"></script>";
+                    //File.WriteAllText(folder + "/" + "index.html", htmlString);
+                    return new { htmlString, blobPath = FolderName + "/" + "index.html" };
+                };
+            }
+        }
+    }
+}

ファイルの差分が大きいため隠しています
+ 560 - 0
HTEXLib/DOCX/Convert/HtmlToItemTranslator.cs


+ 87 - 0
HTEXLib/DOCX/Models/ItemInfo.cs

@@ -0,0 +1,87 @@
+using System;
+using System.Collections.Generic;
+using System.ComponentModel.DataAnnotations;
+using System.Text;
+
+namespace HTEXLib.DOCX.Models
+{
+    public class ItemInfo  
+    {
+        
+        public string code { get; set; }
+        //[PartitionKey(name = "ExamItem")]
+        public string pk { get; set; }
+        public int? ttl { get; set; }
+        public ItemInfo()
+        {
+            children = new List<ItemInfo>();
+            option = new List<CodeValue>();
+            answer = new List<string>();
+            points = new List<string>();
+            gradeIds = new List<string>();
+            repairResource = new List<Repair>();
+        }
+        public string shaCode { get; set; }
+        //题干
+        [Required(ErrorMessage = "{0} 必须填写")]
+        public string question { get; set; }
+        // 选项 单选 多选 判断
+        public List<CodeValue> option { get; set; }
+        public List<string> answer { get; set; }
+        //解析
+        public string explain { get; set; }
+        /// <summary>
+        /// 题型  Single单选,Multiple多选,Judge判断,Complete填空,Subjective问答,Compose综合
+        /// </summary>
+        public string type { get; set; }
+        /// <summary>
+        /// 上级shaCode
+        /// </summary>
+        public string pShaCode { get; set; }
+        //管理知识点
+        public List<string> points { get; set; }
+        //认知层次 应用 综合 理解 评鉴 知识
+        public int? field { get; set; }
+        public List<ItemInfo> children { get; set; }
+        // 配分  
+        public double score { get; set; }
+        /// <summary>
+        /// 题号
+        /// </summary>
+        public int order { get; set; }
+        //补救
+        //public string repair { get; set; }
+
+        /// <summary>
+        /// 补救资源
+        /// </summary>
+        public List<Repair> repairResource { get; set; }
+
+
+
+        public string subjectId { get; set; }
+        public string periodId { get; set; }
+        public List<string> gradeIds { get; set; }
+
+        /// <summary>
+        /// 难度
+        /// </summary>
+        public int level { get; set; }
+        public string id { get; set; }
+        /// <summary>
+        /// 是否综合题的小题
+        /// </summary>
+        public bool lite { get; set; } = false;
+
+        //创建时间 
+        public long createTime { get; set; }
+        //创建者
+        public string creator { get; set; }
+        //使用次数
+        public int useCount { get; set; }
+        public string examCode { get; set; }
+        public string blob { get; set; }
+        public string scope { get; set; }
+        public string pId { get; set; }
+    }
+}

+ 24 - 0
HTEXLib/DOCX/Models/LangConfig.cs

@@ -0,0 +1,24 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace HTEXLib.DOCX.Models
+{
+    public class LangConfig
+    {
+        public string Lang { get; set; }
+        public string Name { get; set; }
+        public string Alias { get; set; }
+        public ItemConfig Item { get; set; }
+    }
+    public class ItemConfig
+    {
+        public Dictionary<string, string> Type { get; set; }
+        public string AnswerTag { get; set; }
+        public string AnalysisTag { get; set; }
+        public string EndedTag { get; set; }
+        public string Options { get; set; }
+        public string Start { get; set; }
+        public string End { get; set; }
+    }
+}

+ 10 - 5
HTEXLib/HTEXLib.csproj

@@ -1,15 +1,16 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
     <TargetFramework>netstandard2.0</TargetFramework>
-    <Description>PPT解析</Description>
-    <PackageReleaseNotes>PPT解析</PackageReleaseNotes>
+    <Description>PPTX&amp;DOCX解析</Description>
+    <PackageReleaseNotes>PPTX&amp;DOCX解析</PackageReleaseNotes>
     <GeneratePackageOnBuild>true</GeneratePackageOnBuild>
-    <Version>1.1.1</Version>
-    <AssemblyVersion>1.1.1.1</AssemblyVersion>
-    <FileVersion>1.1.1.1</FileVersion>
+    <Version>2.0.0</Version>
+    <AssemblyVersion>2.0.0.0</AssemblyVersion>
+    <FileVersion>2.0.0.0</FileVersion>
   </PropertyGroup>
   <ItemGroup>
     <PackageReference Include="DocumentFormat.OpenXml" Version="2.11.3" />
+    <PackageReference Include="HtmlAgilityPack" Version="1.11.28" />
     <PackageReference Include="Microsoft.CSharp" Version="4.7.0" />
     <PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="3.1.8" />
     <PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
@@ -17,4 +18,8 @@
     <PackageReference Include="System.Drawing.Common" Version="4.7.0" />
     <PackageReference Include="System.Text.Json" Version="4.7.2" />
   </ItemGroup>
+  <ItemGroup>
+    <Folder Include="PDF\" />
+    <Folder Include="HTEX\" />
+  </ItemGroup>
 </Project>