123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479 |
- using HTEXLib.COMM.Helpers;
- using HTEXLib.DOCX.Models;
- using HTEXLib.Helpers.ShapeHelpers;
- using HtmlAgilityPack;
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Linq;
- using System.Text;
- using System.Text.Json;
- using System.Text.RegularExpressions;
- using System.Threading.Tasks;
- namespace HTEXLib.Translator
- {
- public class HTML2ITEMV2Translator
- {
- public const string Answer = "Answer";
- public const string Analysis = "Analysis";
- public const string Ended = "Ended";
- public const string Point = "Point";
- public const string Score = "Score";
- public const string Summary = "Summary";
- public const string Filed = "Filed";
- public const string Level = "Level";
- public LangConfig langConfig { get; set; }
- public HtmlDocument doc { get; set; } = new HtmlDocument();
- public string[] optionsKeys { get; set; }
- public Dictionary<string, string[]> dict { get; set; }
- public string[] Fileds { get; set; }
- public List<LangConfig>? _langConfigs { get; set; } = new List<LangConfig>();
- public HTML2ITEMV2Translator(string configPath)
- {
- FileStream fs = new FileStream(configPath+ "/LangConfig.json", System.IO.FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
- StreamReader sr = new StreamReader(fs, System.Text.Encoding.UTF8);
- string line;
- StringBuilder builder = new StringBuilder();
- while ((line = sr.ReadLine()) != null)
- {
- builder.Append(line.ToString());
- }
- sr.Close();
- string text = builder.ToString();
- _langConfigs = JsonSerializer.Deserialize<List<LangConfig>>(text);
- }
- /// <summary>
- /// 处理标签中以及题型标签中包含的空格字符
- /// </summary>
- /// <param name="html"></param>
- /// <returns></returns>
- public string BlankProcess(string html)
- {
- string ans = langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End;
- string als = langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End;
- string end = langConfig.Item.Start + langConfig.Item.Ended + langConfig.Item.End;
- string pot = langConfig.Item.Start + langConfig.Item.Point + langConfig.Item.End;
- string scr = langConfig.Item.Start + langConfig.Item.Score + langConfig.Item.End;
- string lvl = langConfig.Item.Start + langConfig.Item.Level + langConfig.Item.End;
- foreach (var filed in Fileds)
- {
- var fld= langConfig.Item.Start + filed + langConfig.Item.End;
- string[] fldarry = fld.Select(s => s.ToString()).ToArray();
- string fldReg = string.Join("\\s*", fldarry);
- html = Regex.Replace(html, fldReg, fld);
- }
- string[] ansarry = ans.Select(s => s.ToString()).ToArray();
- string[] alsarry = als.Select(s => s.ToString()).ToArray();
- string[] endarry = end.Select(s => s.ToString()).ToArray();
- string[] potarry = pot.Select(s => s.ToString()).ToArray();
- string[] scrarry = scr.Select(s => s.ToString()).ToArray();
- string[] lvlarry = lvl.Select(s => s.ToString()).ToArray();
- string ansReg = string.Join("\\s*", ansarry);
- string alsReg = string.Join("\\s*", alsarry);
- string endReg = string.Join("\\s*", endarry);
- string potReg = string.Join("\\s*", potarry);
- string scrReg = string.Join("\\s*", scrarry);
- string lvlReg = string.Join("\\s*", lvlarry);
- html = Regex.Replace(html, ansReg, ans);
- html = Regex.Replace(html, alsReg, als);
- html = Regex.Replace(html, endReg, end);
- html = Regex.Replace(html, potReg, pot);
- html = Regex.Replace(html, scrReg, scr);
- html = Regex.Replace(html, lvlReg, lvl);
- string blankReg = "\\s*";
- foreach (string value in langConfig.Item.Type.Values)
- {
- //string tag = langConfig.Item.Start + "\\s*" + "\\d+\\s*" + string.Join("\\s*", value.Select(s => s.ToString()).ToArray()) + "\\s*" + langConfig.Item.End;
- string tag = $"{langConfig.Item.Start}\\s*\\d+\\s*{string.Join("\\s*", value.Select(s => s.ToString()).ToArray())}\\s*{langConfig.Item.End}";
- var m = Regex.Match(html, tag);
- while (m.Success)
- {
- string blankStr = Regex.Replace(m.Value, blankReg, "");
- html = html.Replace(m.Value, blankStr);
- m = m.NextMatch();
- }
- }
- return html;
- }
- public List<DOCX.Models.ItemInfo> Translate(string html, JsonElement lang )
- {
- string mathjax = "<script type=\"text/javascript\" src=\"http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML\"></script>";
- html = html.Replace(mathjax, "");
- //去除class 以及span标签"
- string classpattern = "class=\"([^\"]*)\"";
- html = Regex.Replace(html, classpattern, "");
- string pattern = "<span([^>]{0,})>";
- html = Regex.Replace(html, pattern, "");
- html = html.Replace(" close=\"\" separators=\" | \">", "");
- html = html.Replace("\t", " ").Replace("<span>", "").Replace("</span>", "").Replace("dir=\"ltr\"", "");
- doc.LoadHtml(html);
- //初始化语言配置
- langConfig = null;
- var configArray = Regex.Split(doc.DocumentNode.InnerText, "{([\\S]*?)}");
- for (int index = 1; index < configArray.Length; index++) {
- if (index % 2 == 1) {
- langConfig = _langConfigs.Where(x => x.Lang.Equals(configArray[index],StringComparison.CurrentCultureIgnoreCase) ).FirstOrDefault();
- if (langConfig != null) {
- break;
- }
- }
- }
- if (langConfig == null && lang.ValueKind== JsonValueKind.String) {
- langConfig = _langConfigs.Where(x => x.Lang == lang.GetString()).FirstOrDefault();
- }
- if (langConfig == null) {
- throw new Exception();
- }
- //初始化标签配置
- Fileds = langConfig.Item.Filed.Split('|');
- dict = new Dictionary<string, string[]>
- {
- { langConfig.Item.Answer, new string[] { Answer } },
- { langConfig.Item.Analysis, new string[] { Analysis } },
- { langConfig.Item.Ended, new string[] { Ended } },
- { langConfig.Item.Point, new string[] { Point } },
- { langConfig.Item.Score, new string[] { Score } },
- { langConfig.Item.Level, new string[] { Level } }
- };
- foreach (string key in langConfig.Item.Type.Keys)
- {
- dict.Add(langConfig.Item.Type[key], new string[] { Summary, key });
- }
- foreach (var filed in Fileds)
- {
- dict.Add(filed, new string[] { Filed, $"{Array.IndexOf(Fileds, filed)}" });
- }
- optionsKeys = langConfig.Item.Options.Select(s => s.ToString()).ToArray();
- //处理 标签中包含的空格字符
- html = BlankProcess(html);
- html = Regex.Replace(html, "{" + langConfig.Lang + "}","", RegexOptions.IgnoreCase);
- var array = Regex.Split(html, "{([\\S]*?)}");
- List<KeyValuePair<int[], List<string>>> composeKeys = new List<KeyValuePair<int[], List<string>>>();
- //List<string>
- //处理综合题
- for (int index = 1; index < array.Length; index++)
- {
-
- var tagValue = BlankTag(array[index]);
- tagValue = Regex.Replace(tagValue, @"\d", "");
- if (dict.TryGetValue(tagValue, out string[] keyInfo))
- {
- if (keyInfo[0] == Summary && keyInfo[1].Equals("compose"))
- {
- var curr = index;
- List<string> comsArray = new List<string>() ;
- for (int composeIndex = index + 1; composeIndex < array.Length; composeIndex++) {
- var conIndex = BlankTag(array[composeIndex]);
- tagValue = Regex.Replace(tagValue, @"\d", "");
- if (conIndex.Equals(langConfig.Item.Ended) || conIndex.Equals(tagValue + langConfig.Item.Ended)) {
- comsArray.AddRange(array.ToList().GetRange(index+1, composeIndex - index-1));
- index = composeIndex + 1;
- break;
- }
- }
- //int[0]综合题开始标签位置,int[1]综合题结束标签位置,int[2]综合题第一个小题的开始标签位置
- KeyValuePair<int[], List<string>> coms = new KeyValuePair<int[], List<string>>(new int[] { curr, index }, comsArray);
- composeKeys.Add(coms);
- }
- }
- }
- List<KeyValuePair<int[], List<ItemInfo>>> composeList = new List<KeyValuePair<int[], List<ItemInfo>>>();
- foreach(var conskey in composeKeys) {
- List<DOCX.Models.ItemInfo> consInner = ConvertTest(conskey.Value.ToArray(), null);
- int stIndex = conskey.Key[0]+1;
- if (consInner.IsNotEmpty())
- {
- stIndex = consInner[0].order<=0? conskey.Key[0]+consInner[0].order:stIndex;
- }
- KeyValuePair<int[], List<ItemInfo>> innerComposeItem = new KeyValuePair<int[], List<ItemInfo>>(new int[] { conskey.Key[0], conskey.Key[1], stIndex }, consInner);
- composeList.Add(innerComposeItem);
- }
- List<DOCX.Models.ItemInfo> tests= ConvertTest(array, composeList);
- return tests;
- }
- private string BlankTag(string tagHtml) {
- //去掉标签中的Html
- doc.LoadHtml(tagHtml);
- var tagValue = doc.DocumentNode.InnerText.Replace("{", "").Replace("}", "")
- .Replace("\n", "").Replace(" ", "").Replace("\t", "").Replace("\r", "")
- .Replace(" ", "").Replace(" ", "").Replace(" ", "");
- // tagValue = Regex.Replace(tagValue, @"\d", "");
- tagValue = Regex.Replace(tagValue, @"\s", "");
- return tagValue;
- }
- public List<DOCX.Models.ItemInfo> ConvertTest(string[] array, List<KeyValuePair<int[], List<ItemInfo>>> composeList) {
- List<DOCX.Models.ItemInfo> tests = new List<DOCX.Models.ItemInfo>();
- //内容零时变量,追加完成后重新实例化 new StringBuilder()
- StringBuilder content = new StringBuilder();
- //告知遇到新标签,内容需要重新初始化实例
- DOCX.Models.ItemInfo test=null;
- string openTag = "";
- string openTagVal = "";
- bool openFlag = false;
- for (int index = 1; index < array.Length; index++) {
- if (index % 2 == 1)
- {
- //去掉标签中的{} 空格换行制表符及Html空格数字等
- var tagValue = BlankTag(array[index]);
- tagValue = Regex.Replace(tagValue, @"\d", "");
- if (dict.TryGetValue(tagValue, out string[] keyInfo))
- {
- switch ( keyInfo[0] ) {
- case Summary:
- if (!string.IsNullOrEmpty(openTag) && openFlag && test != null)
- {
- DoOpenTag(openTag, openTagVal, openFlag, content, test);
- content = new StringBuilder();
- }
- if (test != null)
- {
- tests.Add(test);
- }
- //下列代码不能调整顺序
- if (keyInfo[1].Equals("compose"))
- {
- var id = System.Guid.NewGuid().ToString();
- var compose = new ItemInfo { type = keyInfo[1],objective=false,order=index,id=id };
- if (composeList.IsNotEmpty())
- {
- var childItem = composeList.Where(x => x.Key[0] == index).FirstOrDefault();
- childItem.Value.ForEach(x => x.pid = id);
- compose.children = childItem.Value;
- var ques= array.ToList().GetRange(childItem.Key[0]+1, childItem.Key[2]- childItem.Key[0]);
- compose.question= string.Join("", ques);
- index = childItem.Key[1] - 1;
- }
- tests.Add(compose);
- openTag = "";
- openTagVal = "";
- openFlag = false;
- test = null;
- }
- else {
- var id = System.Guid.NewGuid().ToString();
- test = new ItemInfo() { type = keyInfo[1], order = index, id = id };
- if (keyInfo[1].Equals("single") || keyInfo[1].Equals("multiple") || keyInfo[1].Equals("judge"))
- {
- test.objective = true;
- }
- else
- {
- test.objective = false;
- }
- openTag = Summary;
- openTagVal = tagValue;
- openFlag = true;
- }
- break;
- case Answer:
- //下列代码不能调整顺序
- if (!string.IsNullOrEmpty(openTag) && openFlag)
- {
- DoOpenTag(openTag, openTagVal, openFlag, content, test);
- content = new StringBuilder();
- }
- openTag = Answer;
- openTagVal = tagValue;
- openFlag = true;
- break;
- case Analysis:
- //下列代码不能调整顺序
- if (!string.IsNullOrEmpty(openTag) && openFlag)
- {
- DoOpenTag(openTag, openTagVal, openFlag, content, test);
- content = new StringBuilder();
- }
- openTag = Analysis;
- openTagVal = tagValue;
- openFlag = true;
- break;
- case Ended:
- break;
- case Point:
- //下列代码不能调整顺序
- if (!string.IsNullOrEmpty(openTag) && openFlag)
- {
- DoOpenTag(openTag, openTagVal, openFlag, content, test);
- content = new StringBuilder();
- }
- openTag = Point;
- openTagVal = tagValue;
- openFlag = true;
- break;
- case Score:
- //下列代码不能调整顺序
- if (!string.IsNullOrEmpty(openTag) && openFlag)
- {
- DoOpenTag(openTag, openTagVal, openFlag, content, test);
- content = new StringBuilder();
- }
- openTag = Score;
- openTagVal = tagValue;
- openFlag = true;
- break;
- case Level:
- //下列代码不能调整顺序
- if (!string.IsNullOrEmpty(openTag) && openFlag)
- {
- DoOpenTag(openTag, openTagVal, openFlag, content, test);
- content = new StringBuilder();
- }
- openTag = Level;
- openTagVal = tagValue;
- openFlag = true;
- break;
- case Filed:
- //下列代码不能调整顺序
- if (!string.IsNullOrEmpty(openTag) && openFlag)
- {
- DoOpenTag(openTag, openTagVal, openFlag, content, test);
- content = new StringBuilder();
- }
- openTag = Filed;
- openTagVal = tagValue;
- openFlag = true;
- break;
- }
- }
- //如果不是标签内的则累加到内容上
- else {
- content.Append(array[index]);
- }
- }
- else {
- //偶数序列为内容
- content.Append(array[index]);
- }
- }
- if (test != null)
- {
- DoOpenTag(openTag, openTagVal, openFlag, content, test);
- tests.Add(test);
- }
- return tests;
- }
- public (List<CodeValue> options,string question) OptionProcess(string question) {
- List<CodeValue> options = new List<CodeValue>();
- string optsRgex = optionsKeys[0] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?).*"; ;
- string optsHtml = Regex.Match(question, optsRgex).Value;
- //StringBuilder textImg = new StringBuilder();
- for (int i = 0; i < optionsKeys.Length - 1; i++)
- {
- string optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + optionsKeys[i + 1] + "\\s*(\\.|\\.|\\、|\\:|\\:)";
- string optHtml = Regex.Match(optsHtml, optRgex).Value;
- if (string.IsNullOrWhiteSpace(optHtml)) {
- optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:).*";
- optHtml = Regex.Match(optsHtml, optRgex).Value;
- }
- if (!string.IsNullOrEmpty(optHtml))
- {
- optHtml = Regex.Replace(optHtml, optionsKeys[i + 1] + "\\s*(\\.|\\.|\\、|\\:|\\:)", "");
- optHtml = optHtml.Substring(2, optHtml.Length - 2);
- optHtml = HtmlHelper.DoUselessTag(optHtml);
- optHtml = optHtml.TrimStart().TrimEnd();
- //textImg.Append(HtmlHelper.DoTextImg(optHtml));
- options.Add(new CodeValue { code = optionsKeys[i], value = optHtml });
- }
- }
- if (!string.IsNullOrWhiteSpace(optsHtml))
- {
- return (options, question.Replace(optsHtml, ""));
- }
- else {
- return (null, question);
- }
- }
- public void DoOpenTag (string openTag,string openTagVal, bool openFlag, StringBuilder content , DOCX.Models.ItemInfo test) {
- if (test != null) {
- switch (openTag) {
- case Summary:
- if (test.type.Equals("single") || test.type.Equals("multiple")|| test.type.Equals("judge"))
- {
- (List<CodeValue> options, string question) = OptionProcess(content.ToString());
- test.option = options;
- test.question = HtmlHelper.DoUselessTag(question) ;
- }
- else {
- test.question = HtmlHelper.DoUselessTag(content.ToString());
- }
- break;
- case Answer:
- if (test.type.Equals("single") || test.type.Equals("multiple")|| test.type.Equals("judge"))
- {
- HashSet<string> ans = new HashSet<string>();
- var anstr = BlankTag(content.ToString());
- anstr.Select(s => s.ToString()).ToList().ForEach(x =>
- {
- ans.Add(x);
- });
- test.answer = ans.ToList();
- if (test.type.Equals("judge")) {
- string[] Judge = langConfig.Item.Judge.Split('|');
- List<CodeValue> option = new List<CodeValue>() { new CodeValue { code = "A", value = Judge[0] }, new CodeValue { code = "B", value = Judge[1] } };
- if (test.answer != null && test.answer.Count > 0)
- {
- int index = 0;
- foreach (var j in Judge)
- {
- if (String.Equals(test.answer[0], j, StringComparison.CurrentCultureIgnoreCase))
- {
- test.answer[0] = option[index].code;
- test.option = option;
- break;
- }
- index += 1;
- }
- }
- }
- }
- else {
- test.answer = new List<string>() { HtmlHelper.DoUselessTag(content.ToString()) };
- }
- break;
- case Analysis:
- test.explain = HtmlHelper.DoUselessTag(content.ToString());
- break;
- case Ended: break;
- case Point:
- string Points =BlankTag(content.ToString());
- if (!string.IsNullOrWhiteSpace(Points))
- {
- string[] ps = Regex.Split(Points, "\\.|\\.|\\、|\\:|\\:|\\,|\\,|\\;|\\;");
- if (ps != null && ps.Length > 0)
- {
- test.knowledge = ps.Distinct().ToList();
- }
- }
- break;
- case Score:
- //单选或多选,判断答案 脱html标签
- string Scores = BlankTag(content.ToString());
- //正则匹配数字 整数和小数点
- var reg = "^[0-9]+(\\.?[0-9]+)?";
- Match m1t = Regex.Match(Scores, reg);
- double.TryParse(m1t.Value, out double sc);
- test.score = sc;
- break;
- case Level:
- //单选或多选,判断答案 脱html标签
- string Levels = BlankTag(content.ToString());
- //正则匹配数字 整数和小数点
- var lelreg = "^[0-9]+(\\.?[0-9]+)?";
- Match lelm1t = Regex.Match(Levels, lelreg);
- int.TryParse(lelm1t.Value, out int lvl);
- test.level = lvl;
- break;
- case Filed:
- test.field = Array.IndexOf(Fileds, openTagVal) + 1;
- break;
- }
- }
- }
- }
- }
|