using HTEXLib.COMM.Helpers; using HTEXLib.DOCX.Models; using HTEXLib.Helpers.ShapeHelpers; using HtmlAgilityPack; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Text.Json; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace HTEXLib.Translator { public class HTML2ITEMV2Translator { public const string Answer = "Answer"; public const string Analysis = "Analysis"; public const string Ended = "Ended"; public const string Point = "Point"; public const string Score = "Score"; public const string Summary = "Summary"; public const string Filed = "Filed"; public const string Level = "Level"; public LangConfig langConfig { get; set; } public HtmlDocument doc { get; set; } = new HtmlDocument(); public string[] optionsKeys { get; set; } public Dictionary dict { get; set; } public string[] Fileds { get; set; } public List? _langConfigs { get; set; } = new List(); public HTML2ITEMV2Translator(string configPath) { FileStream fs = new FileStream(configPath+ "/LangConfig.json", System.IO.FileMode.Open, FileAccess.Read, FileShare.ReadWrite); StreamReader sr = new StreamReader(fs, System.Text.Encoding.UTF8); string line; StringBuilder builder = new StringBuilder(); while ((line = sr.ReadLine()) != null) { builder.Append(line.ToString()); } sr.Close(); string text = builder.ToString(); _langConfigs = JsonSerializer.Deserialize>(text); } /// /// 处理标签中以及题型标签中包含的空格字符 /// /// /// public string BlankProcess(string html) { string ans = langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End; string als = langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End; string end = langConfig.Item.Start + langConfig.Item.Ended + langConfig.Item.End; string pot = langConfig.Item.Start + langConfig.Item.Point + langConfig.Item.End; string scr = langConfig.Item.Start + langConfig.Item.Score + langConfig.Item.End; string lvl = langConfig.Item.Start + langConfig.Item.Level + langConfig.Item.End; foreach (var filed in Fileds) { var fld= langConfig.Item.Start + filed + langConfig.Item.End; string[] fldarry = fld.Select(s => s.ToString()).ToArray(); string fldReg = string.Join("\\s*", fldarry); html = Regex.Replace(html, fldReg, fld); } string[] ansarry = ans.Select(s => s.ToString()).ToArray(); string[] alsarry = als.Select(s => s.ToString()).ToArray(); string[] endarry = end.Select(s => s.ToString()).ToArray(); string[] potarry = pot.Select(s => s.ToString()).ToArray(); string[] scrarry = scr.Select(s => s.ToString()).ToArray(); string[] lvlarry = lvl.Select(s => s.ToString()).ToArray(); string ansReg = string.Join("\\s*", ansarry); string alsReg = string.Join("\\s*", alsarry); string endReg = string.Join("\\s*", endarry); string potReg = string.Join("\\s*", potarry); string scrReg = string.Join("\\s*", scrarry); string lvlReg = string.Join("\\s*", lvlarry); html = Regex.Replace(html, ansReg, ans); html = Regex.Replace(html, alsReg, als); html = Regex.Replace(html, endReg, end); html = Regex.Replace(html, potReg, pot); html = Regex.Replace(html, scrReg, scr); html = Regex.Replace(html, lvlReg, lvl); string blankReg = "\\s*"; foreach (string value in langConfig.Item.Type.Values) { //string tag = langConfig.Item.Start + "\\s*" + "\\d+\\s*" + string.Join("\\s*", value.Select(s => s.ToString()).ToArray()) + "\\s*" + langConfig.Item.End; string tag = $"{langConfig.Item.Start}\\s*\\d+\\s*{string.Join("\\s*", value.Select(s => s.ToString()).ToArray())}\\s*{langConfig.Item.End}"; var m = Regex.Match(html, tag); while (m.Success) { string blankStr = Regex.Replace(m.Value, blankReg, ""); html = html.Replace(m.Value, blankStr); m = m.NextMatch(); } } return html; } public List Translate(string html, JsonElement lang ) { string mathjax = ""; html = html.Replace(mathjax, ""); //去除class 以及span标签" string classpattern = "class=\"([^\"]*)\""; html = Regex.Replace(html, classpattern, ""); string pattern = "]{0,})>"; html = Regex.Replace(html, pattern, ""); html = html.Replace(" close=\"\" separators=\" | \">", ""); html = html.Replace("\t", " ").Replace("", "").Replace("", "").Replace("dir=\"ltr\"", ""); doc.LoadHtml(html); //初始化语言配置 langConfig = null; var configArray = Regex.Split(doc.DocumentNode.InnerText, "{([\\S]*?)}"); for (int index = 1; index < configArray.Length; index++) { if (index % 2 == 1) { langConfig = _langConfigs.Where(x => x.Lang.Equals(configArray[index],StringComparison.CurrentCultureIgnoreCase) ).FirstOrDefault(); if (langConfig != null) { break; } } } if (langConfig == null && lang.ValueKind== JsonValueKind.String) { langConfig = _langConfigs.Where(x => x.Lang == lang.GetString()).FirstOrDefault(); } if (langConfig == null) { throw new Exception(); } //初始化标签配置 Fileds = langConfig.Item.Filed.Split('|'); dict = new Dictionary { { langConfig.Item.Answer, new string[] { Answer } }, { langConfig.Item.Analysis, new string[] { Analysis } }, { langConfig.Item.Ended, new string[] { Ended } }, { langConfig.Item.Point, new string[] { Point } }, { langConfig.Item.Score, new string[] { Score } }, { langConfig.Item.Level, new string[] { Level } } }; foreach (string key in langConfig.Item.Type.Keys) { dict.Add(langConfig.Item.Type[key], new string[] { Summary, key }); } foreach (var filed in Fileds) { dict.Add(filed, new string[] { Filed, $"{Array.IndexOf(Fileds, filed)}" }); } optionsKeys = langConfig.Item.Options.Select(s => s.ToString()).ToArray(); //处理 标签中包含的空格字符 html = BlankProcess(html); html = Regex.Replace(html, "{" + langConfig.Lang + "}","", RegexOptions.IgnoreCase); var array = Regex.Split(html, "{([\\S]*?)}"); List>> composeKeys = new List>>(); //List //处理综合题 for (int index = 1; index < array.Length; index++) { var tagValue = BlankTag(array[index]); tagValue = Regex.Replace(tagValue, @"\d", ""); if (dict.TryGetValue(tagValue, out string[] keyInfo)) { if (keyInfo[0] == Summary && keyInfo[1].Equals("compose")) { var curr = index; List comsArray = new List() ; for (int composeIndex = index + 1; composeIndex < array.Length; composeIndex++) { var conIndex = BlankTag(array[composeIndex]); tagValue = Regex.Replace(tagValue, @"\d", ""); if (conIndex.Equals(langConfig.Item.Ended) || conIndex.Equals(tagValue + langConfig.Item.Ended)) { comsArray.AddRange(array.ToList().GetRange(index+1, composeIndex - index-1)); index = composeIndex + 1; break; } } //int[0]综合题开始标签位置,int[1]综合题结束标签位置,int[2]综合题第一个小题的开始标签位置 KeyValuePair> coms = new KeyValuePair>(new int[] { curr, index }, comsArray); composeKeys.Add(coms); } } } List>> composeList = new List>>(); foreach(var conskey in composeKeys) { List consInner = ConvertTest(conskey.Value.ToArray(), null); int stIndex = conskey.Key[0]+1; if (consInner.IsNotEmpty()) { stIndex = consInner[0].order<=0? conskey.Key[0]+consInner[0].order:stIndex; } KeyValuePair> innerComposeItem = new KeyValuePair>(new int[] { conskey.Key[0], conskey.Key[1], stIndex }, consInner); composeList.Add(innerComposeItem); } List tests= ConvertTest(array, composeList); return tests; } private string BlankTag(string tagHtml) { //去掉标签中的Html doc.LoadHtml(tagHtml); var tagValue = doc.DocumentNode.InnerText.Replace("{", "").Replace("}", "") .Replace("\n", "").Replace(" ", "").Replace("\t", "").Replace("\r", "") .Replace(" ", "").Replace(" ", "").Replace(" ", ""); // tagValue = Regex.Replace(tagValue, @"\d", ""); tagValue = Regex.Replace(tagValue, @"\s", ""); return tagValue; } public List ConvertTest(string[] array, List>> composeList) { List tests = new List(); //内容零时变量,追加完成后重新实例化 new StringBuilder() StringBuilder content = new StringBuilder(); //告知遇到新标签,内容需要重新初始化实例 DOCX.Models.ItemInfo test=null; string openTag = ""; string openTagVal = ""; bool openFlag = false; for (int index = 1; index < array.Length; index++) { if (index % 2 == 1) { //去掉标签中的{} 空格换行制表符及Html空格数字等 var tagValue = BlankTag(array[index]); tagValue = Regex.Replace(tagValue, @"\d", ""); if (dict.TryGetValue(tagValue, out string[] keyInfo)) { switch ( keyInfo[0] ) { case Summary: if (!string.IsNullOrEmpty(openTag) && openFlag && test != null) { DoOpenTag(openTag, openTagVal, openFlag, content, test); content = new StringBuilder(); } if (test != null) { tests.Add(test); } //下列代码不能调整顺序 if (keyInfo[1].Equals("compose")) { var id = System.Guid.NewGuid().ToString(); var compose = new ItemInfo { type = keyInfo[1],objective=false,order=index,id=id }; if (composeList.IsNotEmpty()) { var childItem = composeList.Where(x => x.Key[0] == index).FirstOrDefault(); childItem.Value.ForEach(x => x.pid = id); compose.children = childItem.Value; var ques= array.ToList().GetRange(childItem.Key[0]+1, childItem.Key[2]- childItem.Key[0]); compose.question= string.Join("", ques); index = childItem.Key[1] - 1; } tests.Add(compose); openTag = ""; openTagVal = ""; openFlag = false; test = null; } else { var id = System.Guid.NewGuid().ToString(); test = new ItemInfo() { type = keyInfo[1], order = index, id = id }; if (keyInfo[1].Equals("single") || keyInfo[1].Equals("multiple") || keyInfo[1].Equals("judge")) { test.objective = true; } else { test.objective = false; } openTag = Summary; openTagVal = tagValue; openFlag = true; } break; case Answer: //下列代码不能调整顺序 if (!string.IsNullOrEmpty(openTag) && openFlag) { DoOpenTag(openTag, openTagVal, openFlag, content, test); content = new StringBuilder(); } openTag = Answer; openTagVal = tagValue; openFlag = true; break; case Analysis: //下列代码不能调整顺序 if (!string.IsNullOrEmpty(openTag) && openFlag) { DoOpenTag(openTag, openTagVal, openFlag, content, test); content = new StringBuilder(); } openTag = Analysis; openTagVal = tagValue; openFlag = true; break; case Ended: break; case Point: //下列代码不能调整顺序 if (!string.IsNullOrEmpty(openTag) && openFlag) { DoOpenTag(openTag, openTagVal, openFlag, content, test); content = new StringBuilder(); } openTag = Point; openTagVal = tagValue; openFlag = true; break; case Score: //下列代码不能调整顺序 if (!string.IsNullOrEmpty(openTag) && openFlag) { DoOpenTag(openTag, openTagVal, openFlag, content, test); content = new StringBuilder(); } openTag = Score; openTagVal = tagValue; openFlag = true; break; case Level: //下列代码不能调整顺序 if (!string.IsNullOrEmpty(openTag) && openFlag) { DoOpenTag(openTag, openTagVal, openFlag, content, test); content = new StringBuilder(); } openTag = Level; openTagVal = tagValue; openFlag = true; break; case Filed: //下列代码不能调整顺序 if (!string.IsNullOrEmpty(openTag) && openFlag) { DoOpenTag(openTag, openTagVal, openFlag, content, test); content = new StringBuilder(); } openTag = Filed; openTagVal = tagValue; openFlag = true; break; } } //如果不是标签内的则累加到内容上 else { content.Append(array[index]); } } else { //偶数序列为内容 content.Append(array[index]); } } if (test != null) { DoOpenTag(openTag, openTagVal, openFlag, content, test); tests.Add(test); } return tests; } public (List options,string question) OptionProcess(string question) { List options = new List(); string optsRgex = optionsKeys[0] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?).*"; ; string optsHtml = Regex.Match(question, optsRgex).Value; //StringBuilder textImg = new StringBuilder(); for (int i = 0; i < optionsKeys.Length - 1; i++) { string optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + optionsKeys[i + 1] + "\\s*(\\.|\\.|\\、|\\:|\\:)"; string optHtml = Regex.Match(optsHtml, optRgex).Value; if (string.IsNullOrWhiteSpace(optHtml)) { optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:).*"; optHtml = Regex.Match(optsHtml, optRgex).Value; } if (!string.IsNullOrEmpty(optHtml)) { optHtml = Regex.Replace(optHtml, optionsKeys[i + 1] + "\\s*(\\.|\\.|\\、|\\:|\\:)", ""); optHtml = optHtml.Substring(2, optHtml.Length - 2); optHtml = HtmlHelper.DoUselessTag(optHtml); optHtml = optHtml.TrimStart().TrimEnd(); //textImg.Append(HtmlHelper.DoTextImg(optHtml)); options.Add(new CodeValue { code = optionsKeys[i], value = optHtml }); } } if (!string.IsNullOrWhiteSpace(optsHtml)) { return (options, question.Replace(optsHtml, "")); } else { return (null, question); } } public void DoOpenTag (string openTag,string openTagVal, bool openFlag, StringBuilder content , DOCX.Models.ItemInfo test) { if (test != null) { switch (openTag) { case Summary: if (test.type.Equals("single") || test.type.Equals("multiple")|| test.type.Equals("judge")) { (List options, string question) = OptionProcess(content.ToString()); test.option = options; test.question = HtmlHelper.DoUselessTag(question) ; } else { test.question = HtmlHelper.DoUselessTag(content.ToString()); } break; case Answer: if (test.type.Equals("single") || test.type.Equals("multiple")|| test.type.Equals("judge")) { HashSet ans = new HashSet(); var anstr = BlankTag(content.ToString()); anstr.Select(s => s.ToString()).ToList().ForEach(x => { ans.Add(x); }); test.answer = ans.ToList(); if (test.type.Equals("judge")) { string[] Judge = langConfig.Item.Judge.Split('|'); List option = new List() { new CodeValue { code = "A", value = Judge[0] }, new CodeValue { code = "B", value = Judge[1] } }; if (test.answer != null && test.answer.Count > 0) { int index = 0; foreach (var j in Judge) { if (String.Equals(test.answer[0], j, StringComparison.CurrentCultureIgnoreCase)) { test.answer[0] = option[index].code; test.option = option; break; } index += 1; } } } } else { test.answer = new List() { HtmlHelper.DoUselessTag(content.ToString()) }; } break; case Analysis: test.explain = HtmlHelper.DoUselessTag(content.ToString()); break; case Ended: break; case Point: string Points =BlankTag(content.ToString()); if (!string.IsNullOrWhiteSpace(Points)) { string[] ps = Regex.Split(Points, "\\.|\\.|\\、|\\:|\\:|\\,|\\,|\\;|\\;"); if (ps != null && ps.Length > 0) { test.knowledge = ps.Distinct().ToList(); } } break; case Score: //单选或多选,判断答案 脱html标签 string Scores = BlankTag(content.ToString()); //正则匹配数字 整数和小数点 var reg = "^[0-9]+(\\.?[0-9]+)?"; Match m1t = Regex.Match(Scores, reg); double.TryParse(m1t.Value, out double sc); test.score = sc; break; case Level: //单选或多选,判断答案 脱html标签 string Levels = BlankTag(content.ToString()); //正则匹配数字 整数和小数点 var lelreg = "^[0-9]+(\\.?[0-9]+)?"; Match lelm1t = Regex.Match(Levels, lelreg); int.TryParse(lelm1t.Value, out int lvl); test.level = lvl; break; case Filed: test.field = Array.IndexOf(Fileds, openTagVal) + 1; break; } } } } }