123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786 |
- using HTEXLib.COMM.Helpers;
- using HTEXLib.DOCX.Models;
- using HTEXLib.Helpers.ShapeHelpers;
- using HtmlAgilityPack;
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- using System.Threading.Tasks;
- namespace HTEXLib.Translator
- {
- public class HTML2ITEMTranslator
- {
- public LangConfig langConfig { get; set; }
- public HtmlDocument doc { get; set; } = new HtmlDocument();
- public string[] optionsKeys { get; set; }
- public Dictionary<string, string> KeyReg { get; set; }
- public string[] Fileds { get; set; }
- public HTML2ITEMTranslator()
- {
- }
- /// <summary>
- /// 处理 【答案】【解析】【结束】以及题型标签中包含的空格字符
- /// </summary>
- /// <param name="html"></param>
- /// <returns></returns>
- public string BlankProcess(string html)
- {
- string ans = langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End;
- string als = langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End;
- string end = langConfig.Item.Start + langConfig.Item.Ended + langConfig.Item.End;
- string[] ansarry = ans.Select(s => s.ToString()).ToArray();
- string[] alsarry = als.Select(s => s.ToString()).ToArray();
- string[] endarry = end.Select(s => s.ToString()).ToArray();
- string ansReg = string.Join("\\s*", ansarry);
- string alsReg = string.Join("\\s*", alsarry);
- string endReg = string.Join("\\s*", endarry);
- html = Regex.Replace(html, ansReg, ans);
- html = Regex.Replace(html, alsReg, als);
- html = Regex.Replace(html, endReg, end);
- string blankReg = "\\s*";
- foreach (string value in langConfig.Item.Type.Values)
- {
- string tag = langConfig.Item.Start + "\\s*" + "\\d+\\s*" + string.Join("\\s*", value.Select(s => s.ToString()).ToArray()) + "\\s*" + langConfig.Item.End;
- var m = Regex.Match(html, tag);
- while (m.Success)
- {
- string blankStr = Regex.Replace(m.Value, blankReg, "");
- html = html.Replace(m.Value, blankStr);
- m = m.NextMatch();
- }
- }
- return html;
- }
- public List<DOCX.Models.ItemInfo> Translate(string html , LangConfig _langConfig)
- {
- langConfig = _langConfig;
- KeyReg = new Dictionary<string, string> {
- // { "Summary", langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End },
- { "Answer", langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End },
- { "Score", langConfig.Item.Start + langConfig.Item.Score + langConfig.Item.End },
- { "Point", langConfig.Item.Start + langConfig.Item.Point + langConfig.Item.End },
- { "Analysis", langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End}
- };
- Fileds = langConfig.Item.Filed.Split('|');
- var index = 1;
- foreach (var filed in Fileds)
- {
- KeyReg.Add($"Filed{index}", langConfig.Item.Start + filed + langConfig.Item.End);
- index += 1;
- }
-
- optionsKeys = langConfig.Item.Options.Select(s => s.ToString()).ToArray();
- string mathjax = "<script type=\"text/javascript\" src=\"http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML\"></script>";
- html = html.Replace(mathjax, "");
- //去除class 以及span标签"
- string classpattern = "class=\"([^\"]*)\"";
- html = Regex.Replace(html, classpattern, "");
- string pattern = "<span([^>]{0,})>";
- html = Regex.Replace(html, pattern, "");
- html = html.Replace(" close=\"\" separators=\" | \">", "");
- html = html.Replace("\t", " ").Replace("<span>", "").Replace("</span>", "").Replace("dir=\"ltr\"", "");
- //处理 【答案】【解析】【结束】标签中包含的空格字符
- html = BlankProcess(html);
-
- var rrrs = Regex.Split(html, "{([\\S]*?)}");
- Dictionary<string, List<string>> TestInType = ConvertTest(html);
- List<DOCX.Models.ItemInfo> tests = new List<DOCX.Models.ItemInfo>();
- foreach (string key in TestInType.Keys)
- {
- switch (key)
- {
- case "single":
- List<DOCX.Models.ItemInfo> exercisesSingle = SingleConvert(key, TestInType[key]);
- // exercisesSingle.ForEach(x => { x.pid = x.shaCode; });
- tests.AddRange(exercisesSingle); break;
- case "multiple":
- List<DOCX.Models.ItemInfo> exercisesMultiple = MultipleConvert(key, TestInType[key]);
- //exercisesMultiple.ForEach(x => { x.pid = x.shaCode; });
- tests.AddRange(exercisesMultiple); break;
- case "judge":
- List<DOCX.Models.ItemInfo> exercisesJudge = JudgeConvert(key, TestInType[key]);
- //exercisesJudge.ForEach(x => { x.pid = x.shaCode; });
- tests.AddRange(exercisesJudge); break;
- //case "complete":
- // List<ItemInfo> exercisesComplete = CompleteConvert(key, TestInType[key]);
- // exercisesComplete.ForEach(x => { x.pShaCode = x.shaCode; });
- // tests.AddRange(exercisesComplete); break;
- //case "subjective":
- // List<ItemInfo> exercisesSubjective = SubjectiveConvert(key, TestInType[key]);
- // exercisesSubjective.ForEach(x => { x.pShaCode = x.shaCode; });
- // tests.AddRange(exercisesSubjective); break;
- case "compose":
- List<DOCX.Models.ItemInfo> exercisesCompose = ComposeConvert(key, TestInType[key]);
- // exercisesCompose.ForEach(x => { x.pid = x.shaCode; });
- tests.AddRange(exercisesCompose);
- break;
- default:
- List<DOCX.Models.ItemInfo> exercisesOther = SubjectiveConvert(key, TestInType[key]);
- // exercisesOther.ForEach(x => { x.pid = x.shaCode; });
- tests.AddRange(exercisesOther); break;
- }
- }
- return tests.OrderBy(x => x.order).ToList();
- }
- private List<DOCX.Models.ItemInfo> SingleConvert(string TypeKey, List<string> list)
- {
- List<DOCX.Models.ItemInfo> testInfos = OptionProcess(TypeKey, list);
- return testInfos;
- }
- private List<DOCX.Models.ItemInfo> MultipleConvert(string TypeKey, List<string> list)
- {
- List<DOCX.Models.ItemInfo> testInfos = OptionProcess(TypeKey, list);
- return testInfos;
- }
- private List<DOCX.Models.ItemInfo> JudgeConvert(string TypeKey, List<string> list)
- {
- List<DOCX.Models.ItemInfo> testInfos = OptionProcess(TypeKey, list);
- return testInfos;
- }
- private List<DOCX.Models.ItemInfo> CompleteConvert(string TypeKey, List<string> list)
- {
- List<DOCX.Models.ItemInfo> testInfos = CompleteProcess(TypeKey, list);
- return testInfos;
- }
- private List<DOCX.Models.ItemInfo> CompleteProcess(string TypeKey, List<string> tests)
- {
- //List<string> tests = ConvertTest(testHtml);
- List<DOCX.Models.ItemInfo> testInfos = ConvertTestInfo(tests, TypeKey);
- // HtmlDocument doc = new HtmlDocument();
- foreach (DOCX.Models.ItemInfo testInfo in testInfos)
- {
- List<string> ans = new List<string>();
- testInfo.question = testInfo.question.Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "").Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "");
- string regRex = langConfig.Item.Start + "([\\s\\S]*?)" + langConfig.Item.End;
- List<ReplaceDto> replaces = new List<ReplaceDto>();
- var m = Regex.Match(testInfo.question, regRex);
- int index = 1;
- while (m.Success)
- {
- string an = m.Groups[1].ToString();
- doc.LoadHtml(an);
- string anstr = doc.DocumentNode.InnerText;
- string nbsp = "";
- int length = System.Text.Encoding.Default.GetBytes(anstr).Length;
- for (int i = 0; i < length * 3; i++)
- {
- nbsp += " ";
- }
- ReplaceDto replaceDto = new ReplaceDto { oldstr = langConfig.Item.Start + an + langConfig.Item.End, newstr = "<underline style='word-break: break-word' data=\"" + index + "\"><u>" + nbsp + "</u></underline>" };
- replaces.Add(replaceDto);
- ans.Add(an);
- m = m.NextMatch();
- index++;
- }
- string textImg = testInfo.question;
- //消除答案
- foreach (ReplaceDto replace in replaces)
- {
- testInfo.question = testInfo.question.Replace(replace.oldstr, replace.newstr);
- testInfo.question = HtmlHelper.DoUselessTag(testInfo.question);
- //只要题干文字和图片
- //不加underline标记
- textImg = testInfo.question.Replace(replace.oldstr, "");
- }
- //textImg = HtmlHelper.DoTextImg(textImg);
- testInfo.shaCode = ShaHashHelper.GetSHA1(textImg);
- //处理解析
- testInfo.explain = testInfo.explain.Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "");//.Replace(Start + EndedTag + End, "");
- testInfo.explain = HtmlHelper.DoUselessTag((string)testInfo.explain);
- testInfo.answer.AddRange(ans);
- }
- return testInfos;
- }
- private List<DOCX.Models.ItemInfo> OptionProcess(string typeKey, List<string> list)
- {
-
- List<DOCX.Models.ItemInfo> testInfos = ConvertTestInfo(list, typeKey);
- foreach (DOCX.Models.ItemInfo testInfo in testInfos)
- {
- string optsRgex = "";
- if (testInfo.question.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End))
- {
- optsRgex = optionsKeys[0] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End;
- }
- else if (!testInfo.question.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End) && testInfo.question.Contains(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End))
- {
- optsRgex = optionsKeys[0] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End;
- }
- else
- {
- optsRgex = optionsKeys[0] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?).*";
- }
- string optsHtml = Regex.Match(testInfo.question, optsRgex).Value;
- //HtmlDocument doc = new HtmlDocument(); +".*";
- //doc.LoadHtml(optsHtml);
- //optsHtml = doc.DocumentNode.InnerText;
- //处理选项
- StringBuilder textImg = new StringBuilder();
- for (int i = 0; i < optionsKeys.Length - 1; i++)
- {
- string optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + optionsKeys[i + 1] + "\\s*(\\.|\\.|\\、|\\:|\\:)";
- string optHtml = Regex.Match(optsHtml, optRgex).Value;
- //if (string.IsNullOrEmpty(optHtml)) {
- // optRgex = optionsKeys[i]+"(\\.|\\.|\\、|\\:|\\:).*";
- // optHtml = Regex.Match(optsHtml, optRgex).Value;
- //}
- if (!string.IsNullOrEmpty(optHtml))
- {
- optHtml = optHtml.Substring(2, optHtml.Length - 4);
- optHtml = HtmlHelper.DoUselessTag(optHtml);
- //textImg.Append(HtmlHelper.DoTextImg(optHtml));
- textImg.Append(optHtml);
- testInfo.option.Add(new CodeValue { code = optionsKeys[i], value = optHtml });
- //testInfo.option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] },{ "value", optHtml } });
- //testInfo.option.TryAdd(optionsKeys[i], optHtml);
- }
- else
- {
- if (optsHtml.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End))
- {
- optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End;
- }
- else if (!optsHtml.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End) && optsHtml.Contains(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End))
- {
- optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End;
- }
- else
- {
- optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:).*";
- }
- optHtml = Regex.Match(optsHtml, optRgex).Value;
- if (!string.IsNullOrEmpty(optHtml))
- {
- // optHtml = optHtml.Substring(2, optHtml.Length - 6);
- optHtml = optHtml.Substring(2).Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "").Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "");
- optHtml = HtmlHelper.DoUselessTag(optHtml);
- // textImg.Append(HtmlHelper.DoTextImg(optHtml));
- textImg.Append(optHtml);
- testInfo.option.Add(new CodeValue { code = optionsKeys[i], value = optHtml });
- //testInfo.option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] }, { "value", optHtml } });
- //testInfo.option.TryAdd(optionsKeys[i], optHtml);
- }
- }
- }
- //处理题干
- try
- {
- testInfo.question = testInfo.question.Replace(optsHtml, "").Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "");
- }
- catch (Exception e) {
- //TODO
- }
- testInfo.question = HtmlHelper.DoUselessTag(testInfo.question);
- //textImg.Append(HtmlHelper.DoTextImg(testInfo.question));
- textImg.Append(testInfo.question);
- testInfo.shaCode = ShaHashHelper.GetSHA1(textImg.ToString());
- List<string> answers = testInfo.answer;
- HashSet<string> ans = new HashSet<string>();
- //处理答案
- for (int i = 0; i < answers.Count; i++)
- {
- string Answer = answers[i].Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "").Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "").TrimStart().TrimEnd();
- Answer.Select(s => s.ToString()).ToList().ForEach(x =>
- {
- ans.Add(x);
- });
- }
- testInfo.answer = ans.ToList();
- if (typeKey == "judge" && testInfo.option.IsEmpty()) {
- string [] Judge = langConfig.Item.Judge.Split('|');
- List<CodeValue> option = new List<CodeValue>() { new CodeValue {code="A",value=Judge[0] }, new CodeValue { code = "B", value = Judge[1] } };
- if (testInfo.answer != null && testInfo.answer.Count > 0) {
- int index = 0;
- foreach (var j in Judge) {
- if (String.Equals(testInfo.answer[0], j, StringComparison.CurrentCultureIgnoreCase)) {
- testInfo.answer[0]= option[index].code;
- testInfo.option = option;
- break;
- }
- index += 1;
- }
- }
- }
- //处理解析
- testInfo.explain = testInfo.explain.Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "");//.Replace(Start + EndedTag + End, "");
- testInfo.explain = HtmlHelper.DoUselessTag((string)testInfo.explain);
- testInfo.objective = true;
- }
- return testInfos;
- }
- private List<DOCX.Models.ItemInfo> SubjectiveConvert(string TypeKey, List<string> tests)
- {
- // List<string> tests = ConvertTest(testHtml);
- List<DOCX.Models.ItemInfo> testInfos = ConvertTestInfo(tests, TypeKey);
- foreach (DOCX.Models.ItemInfo testInfo in testInfos)
- {
- testInfo.question = testInfo.question.Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "").Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "");
- testInfo.question = HtmlHelper.DoUselessTag(testInfo.question);
- //StringBuilder textImg = new StringBuilder(HtmlHelper.DoTextImg(testInfo.question));
- StringBuilder textImg = new StringBuilder(testInfo.question);
- testInfo.shaCode = ShaHashHelper.GetSHA1(textImg.ToString());
- for (int i = 0; i < testInfo.answer.Count; i++)
- {
- testInfo.answer[i] = testInfo.answer[i].Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "").Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "");
- testInfo.answer[i] = HtmlHelper.DoUselessTag((string)testInfo.answer[i]);
- }
- testInfo.explain = testInfo.explain!=null?testInfo.explain.Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, ""):"";//.Replace(Start + EndedTag + End, "");
- testInfo.explain = HtmlHelper.DoUselessTag((string)testInfo.explain);
- testInfo.objective = false;
- }
- return testInfos;
- }
- private List<DOCX.Models.ItemInfo> ComposeConvert(string TypeKey, List<string> list)
- {
- List<DOCX.Models.ItemInfo> exerciseDtos = new List<DOCX.Models.ItemInfo>();
- foreach (string html in list)
- {
- DOCX.Models.ItemInfo exercise = new DOCX.Models.ItemInfo() { type = TypeKey , objective = false};
- List<string> test = new List<string>();
- foreach (string k in langConfig.Item.Type.Keys)
- {
- string Rex = langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[TypeKey] + langConfig.Item.End + "[\\s\\S]+?(?=\\s*" + langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[k] + langConfig.Item.End + "|$)";
- var mm = Regex.Match(html, Rex);
- while (mm.Success)
- {
- test.Add(mm.Value);
- mm = mm.NextMatch();
- }
- }
- string ls = test.OrderBy(x => x.Length).First();
- //处理题号
- string digtRex = langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End;
- var m = Regex.Match(ls, digtRex);
- //int index = 1;
- while (m.Success)
- {
- string ord = m.Value.Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "").Replace(langConfig.Item.Type[TypeKey], "");
- int.TryParse(ord, out int order);
- exercise.order = order;
- m = m.NextMatch();
- }
- string testinfo = html.Replace(ls, "").Replace(langConfig.Item.Start + langConfig.Item.Ended + langConfig.Item.End, "");
- ls = Regex.Replace(ls, langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End, "");
- exercise.question = HtmlHelper.DoUselessTag(ls/*.Replace(ComposeStart, "").Replace(ComposeEnd, "")*/);
- //.Replace(ComposeTag, Start);
- //获取综合题的材料加每个小题的sha1Code
- // string testQs = HtmlHelper.DoTextImg(exercise.question);
- string testQs = exercise.question;
- List<DOCX.Models.ItemInfo> dtos = Translate(testinfo, langConfig);
- if (dtos.IsNotEmpty())
- {
- dtos.ForEach(x => { testQs = testQs + x.shaCode; });
- exercise.shaCode = ShaHashHelper.GetSHA1(testQs);
- // dtos.ForEach(x => { x.pid = exercise.shaCode;/* x.lite = true;*/ });
- exercise.children.AddRange(dtos);
- }
- if (exercise.children.IsNotEmpty())
- {
- exercise.children = exercise.children.OrderBy(x => x.order).ToList();
- }
- exerciseDtos.Add(exercise);
- }
- return exerciseDtos;
- }
- public class RegInfo{
- public string val { get; set; }
- public string st { get; set; }
- public string end { get; set; }
- public string reg { get; set; }
- }
- /// <summary>
- /// 处理单选,多选,判断
- /// </summary>
- /// <param name="tests"></param>
- /// <param name="TypeKey"></param>
- /// <returns></returns>
- public List<DOCX.Models.ItemInfo> ConvertTestInfo(List<string> tests, string TypeKey)
- {
- List<DOCX.Models.ItemInfo> testInfos = new List<DOCX.Models.ItemInfo>();
- // foreach (string html in tests)
- //{
- //"Summary", langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End
-
- // testInfos.Add(doItem(html, TypeKey));
- // }
- //return testInfos;
- var tasks = tests.Select(i => {
- return Task.Run(() => doItem(i,TypeKey ));
- });
- testInfos.AddRange(tasks.Select(x => x.Result));
- return testInfos;
- }
- /// <summary>
- /// {1问答题}文字{答案}4*4=16,3+3+3=9{解析}正方形面积为边长的平方,三角形的边长*3为周长{配分}10{知识点}三角形,正方形,面积{应用}{2问答题}圆的面积公式是什么。{解析}正方形面积为边长的平方,三角形的边长*3为周长{答案}4*4=16,3+3+3=9{配分}10{知识点}三角形,正方形,面积{应用}{3问答题}平行四边形的面积公式是什么。{配分}10{应用}
- /// </summary>
- /// <param name="html"></param>
- /// <param name="TypeKey"></param>
- /// <returns></returns>
- public DOCX.Models.ItemInfo doItem(string html, string TypeKey) {
- KeyReg["Summary"] = langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End;
- DOCX.Models.ItemInfo test = new DOCX.Models.ItemInfo();
- test.type = TypeKey;
- Dictionary<string, RegInfo> dict = new Dictionary<string, RegInfo>();
- foreach (var mkey in KeyReg.Keys)
- {
- //比较最短的
- List<RegInfo> val = new List<RegInfo>();
- foreach (var skey in KeyReg.Keys)
- {
- if (mkey != skey)
- {
- string RegexStr = KeyReg[mkey] + "([\\s\\S]*?)" + KeyReg[skey];
- Match mt = Regex.Match(html, RegexStr);
- if (!string.IsNullOrWhiteSpace(mt.Value))
- {
- val.Add(new RegInfo { reg = RegexStr, val = mt.Value, st = KeyReg[mkey], end = KeyReg[skey] });
- }
- }
- }
- if (val.Count == 0)
- {
- string RegexStrd = KeyReg[mkey] + ".*";
- Match mtd = Regex.Match(html, RegexStrd);
- val.Add(new RegInfo { reg = RegexStrd, val = mtd.Value, st = KeyReg[mkey], end = "" });
- }
- val = val.OrderBy(s => s.val.Length).ToList();
- dict.Add(mkey, val.First());
- }
- var keys = dict.Keys;
- foreach (string key in keys)
- {
- switch (key)
- {
- case "Summary":
- //处理题号
- string digtRex = langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End;
- var m = Regex.Match(dict[key].val, digtRex);
- while (m.Success)
- {
- string ord = m.Value.Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "").Replace(langConfig.Item.Type[TypeKey], "");
- int.TryParse(ord, out int order);
- test.order = order;
- m = m.NextMatch();
- }
- var qu = Regex.Replace(dict[key].val, langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End, "");
- qu = Regex.Replace(qu, dict[key].end, "");
- test.question = qu;
- break;
- case "Answer":
- string Answer = dict[key].val;
- //单选或多选,判断答案 脱html标签
- if (TypeKey.Equals("single") || TypeKey.Equals("multiple") || TypeKey.Equals("judge"))
- {
- doc.LoadHtml(dict[key].val);
- Answer = doc.DocumentNode.InnerText;
- }
- Answer = Regex.Replace(Answer, dict[key].st, "");
- Answer = Regex.Replace(Answer, dict[key].end, "");
- test.answer = new List<string>() { Answer };
- break;
- case "Analysis":
- string Analysis = dict[key].val;
- Analysis = Regex.Replace(Analysis, dict[key].st, "");
- Analysis = Regex.Replace(Analysis, dict[key].end, "");
- test.explain = Analysis;
- break;
- case "Score":
- string Score = dict[key].val;
- //单选或多选,判断答案 脱html标签
- doc.LoadHtml(dict[key].val);
- Score = doc.DocumentNode.InnerText;
- Score = Regex.Replace(Score, dict[key].st, "");
- Score = Regex.Replace(Score, dict[key].end, "");
- Score = Regex.Replace(Score, @"\s", "");
- //正则匹配数字
- var reg = "^[0-9]+(\\.?[0-9]+)?";
- Match m1t = Regex.Match(Score, reg);
- double sc = 0;
- double.TryParse(m1t.Value, out sc);
- test.score = sc;
- break;
- case "Point":
- ///"\\s*(\\.|\\.|\\、|\\:|\\:|\\,|\\,|\\;|\\;).*"
- string Point = dict[key].val;
- //单选或多选,判断答案 脱html标签
- doc.LoadHtml(dict[key].val);
- Point = doc.DocumentNode.InnerText;
- Point = Regex.Replace(Point, dict[key].st, "");
- Point = Regex.Replace(Point, dict[key].end, "");
- Point = Regex.Replace(Point, @"\s", "");
- if (!string.IsNullOrWhiteSpace(Point))
- {
- string[] ps = Regex.Split(Point, "\\.|\\.|\\、|\\:|\\:|\\,|\\,|\\;|\\;");
- if (ps != null && ps.Length > 0)
- {
- test.knowledge = ps.Distinct().ToList();
- }
- }
- break;
- case "Filed1":
- if (test.@field > 0) { break; }
- string Filed1 = dict[key].val;
- //单选或多选,判断答案 脱html标签
- doc.LoadHtml(dict[key].val);
- Filed1 = doc.DocumentNode.InnerText;
- Filed1 = Regex.Replace(Filed1, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
- test.@field = Fileds.ToList().IndexOf(Filed1) + 1;
- break;
- case "Filed2":
- if (test.@field > 0) { break; }
- string Filed2 = dict[key].val;
- //单选或多选,判断答案 脱html标签
- doc.LoadHtml(dict[key].val);
- Filed2 = doc.DocumentNode.InnerText;
- Filed2 = Regex.Replace(Filed2, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
- test.@field = Fileds.ToList().IndexOf(Filed2) + 1;
- break;
- case "Filed3":
- if (test.@field > 0) { break; }
- string Filed3 = dict[key].val;
- //单选或多选,判断答案 脱html标签
- doc.LoadHtml(dict[key].val);
- Filed3 = doc.DocumentNode.InnerText;
- Filed3 = Regex.Replace(Filed3, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
- test.@field = Fileds.ToList().IndexOf(Filed3) + 1;
- break;
- case "Filed4":
- if (test.@field > 0) { break; }
- string Filed4 = dict[key].val;
- //单选或多选,判断答案 脱html标签
- doc.LoadHtml(dict[key].val);
- Filed4 = doc.DocumentNode.InnerText;
- Filed4 = Regex.Replace(Filed4, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
- test.@field = Fileds.ToList().IndexOf(Filed4) + 1;
- break;
- case "Filed5":
- if (test.@field > 0) { break; }
- string Filed5 = dict[key].val;
- //单选或多选,判断答案 脱html标签
- doc.LoadHtml(dict[key].val);
- Filed5 = doc.DocumentNode.InnerText;
- Filed5 = Regex.Replace(Filed5, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
- test.@field = Fileds.ToList().IndexOf(Filed5) + 1;
- break;
- case "Filed6":
- if (test.@field > 0) { break; }
- string Filed6 = dict[key].val;
- //单选或多选,判断答案 脱html标签
- doc.LoadHtml(dict[key].val);
- Filed6 = doc.DocumentNode.InnerText;
- Filed6 = Regex.Replace(Filed6, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
- test.@field = Fileds.ToList().IndexOf(Filed6) + 1;
- break;
- default: break;
- }
- }
- return test;
- }
- /// <summary>
- /// 处理单选,多选,判断
- /// </summary>
- /// <param name="tests"></param>
- /// <param name="TypeKey"></param>
- /// <returns></returns>
- public List<DOCX.Models.ItemInfo> ConvertTestInfo1(List<string> tests, string TypeKey)
- {
- List<DOCX.Models.ItemInfo> testInfos = new List<DOCX.Models.ItemInfo>();
- foreach (string html in tests)
- {
- Dictionary<string, string> regex = new Dictionary<string, string>();
- Dictionary<string, string> question = new Dictionary<string, string> {
- { "Summary", langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End + "|" + langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End },
- { "Answer", langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End + "|" + langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End },
- { "Analysis", langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End} };
- //填空题 修改 Dictionary<string, string> compquestion = new Dictionary<string, string> { { "Summary", langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End + "|" + langConfig.Item.Start + langConfig.Item.AnalysisTag + langConfig.Item.End }, { "Analysis", langConfig.Item.Start + langConfig.Item.AnalysisTag + langConfig.Item.End /* + "|" + Start + EndedTag + End */ } };
- DOCX.Models.ItemInfo test = new DOCX.Models.ItemInfo();
- test.type = TypeKey;
- //List<string> keys ;
- //if (TypeKey.Equals("complete"))
- //{
- // keys = compquestion.Keys.ToList();
- // regex = compquestion;
- //}
- //else
- //{
- // keys = question.Keys.ToList();
- // regex = question;
- //}
- List<string> keys = question.Keys.ToList();
- regex = question;
- foreach (string key in keys)
- {
- string[] tags = regex[key].Split('|');
- string RegexStr = "";
- if (key == "Summary")
- {
- if (html.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End))
- {
- RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1];
- }
- else if (!html.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End) && html.Contains(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End))
- {
- RegexStr = tags[0] + "([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End;
- }
- else
- {
- RegexStr = tags[0] + ".*";
- //RegexStr = tags[0] + "([\\s\\S]*?)";
- }
- }
- else if (key == "Answer")
- {
- if (html.Contains(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End))
- {
- RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1];
- }
- else
- {
- RegexStr = tags[0] + ".*";
- //RegexStr = tags[0] + "([\\s\\S]*?)";
- }
- }
- else if (key == "Analysis")
- {
- RegexStr = tags[0] + ".*";
- //RegexStr = tags[0] + "([\\s\\S]*?)";
- }
- Match mt = Regex.Match(html, RegexStr);
- switch (key)
- {
- case "Summary":
- //处理题号
- string digtRex = langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End;
- var m = Regex.Match(mt.Value, digtRex);
- //int index = 1;
- while (m.Success)
- {
- string ord = m.Value.Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "").Replace(langConfig.Item.Type[TypeKey], "");
- int.TryParse(ord, out int order);
- test.order = order;
- m = m.NextMatch();
- }
- test.question = Regex.Replace(mt.Value, langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End, "");
- //test.question = mt.Value.Replace(Start + "\\d+" + langConfig.Item.Type[TypeKey] + End, "");
- break;
- case "Answer":
- string Answer = mt.Value;
- //单选或多选,判断答案 脱html标签
- if (TypeKey.Equals("single") || TypeKey.Equals("multiple") || TypeKey.Equals("judge"))
- {
- // HtmlDocument doc = new HtmlDocument();
- doc.LoadHtml(mt.Value);
- Answer = doc.DocumentNode.InnerText;
- }
- test.answer = new List<string>() { Answer }; break;
- case "Analysis":
- test.explain = mt.Value; break;
- default: break;
- }
- }
- testInfos.Add(test);
- }
- return testInfos;
- }
-
- /// <summary>
- /// 解析题型
- /// </summary>
- /// <param name="testHtml"></param>
- /// <returns></returns>
- public Dictionary<string, List<string>> ConvertTest(string testHtml)
- {
- Dictionary<string, List<string>> TestInType = new Dictionary<string, List<string>>();
- foreach (string key in langConfig.Item.Type.Keys)
- {
- List<string> tests = new List<string>();
- string regRex = "";
- if (key == "compose")
- {
- regRex = langConfig.Item.Start + "\\d+" + langConfig.Item.Type[key] + langConfig.Item.End + "([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Ended + langConfig.Item.End;
- var m = Regex.Match(testHtml, regRex);
- while (m.Success)
- {
- string testInfo = m.Value;
- testHtml = testHtml.Replace(testInfo, "");
- tests.Add(testInfo);
- m = m.NextMatch();
- }
- }
- else
- {
- ///【\d+[^【】]*题】[\s\S]+?(?=\s*【\d+[^【】]*题】|$)
- regRex = langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[key] + langConfig.Item.End + "[\\s\\S]+?(?=\\s*" + langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[key] + langConfig.Item.End + "|$)";
- // string[] tags = TestType[key].Split("|");
- // string regRex = tags[0] + "([\\s\\S]*?)" + tags[1];
- var m = Regex.Match(testHtml, regRex);
- //int index = 1;
- while (m.Success)
- {
- tests.Add(m.Value);
- m = m.NextMatch();
- }
- //处理结尾匹配问题
- if (tests.IsNotEmpty())
- {
- // string last = tests.Last();
- //再次匹配,拿到长度最短的。
- for (int i = 0; i < tests.Count; i++)
- {
- List<string> test = new List<string>();
- foreach (string k in langConfig.Item.Type.Keys)
- {
- string Rex = langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[key] + langConfig.Item.End + "[\\s\\S]+?(?=\\s*" + langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[k] + langConfig.Item.End + "|$)";
- var mm = Regex.Match(tests[i], Rex);
- while (mm.Success)
- {
- test.Add(mm.Value);
- mm = mm.NextMatch();
- }
- }
- string ls = test.OrderBy(x => x.Length).First();
- tests[i] = ls;
- }
- }
- }
- TestInType.Add(key, tests);
- }
- return TestInType;
- }
- }
-
- }
|