HTML2ITEMTranslator.cs 42 KB


  1. using HTEXLib.COMM.Helpers;
  2. using HTEXLib.DOCX.Models;
  3. using HTEXLib.Helpers.ShapeHelpers;
  4. using HtmlAgilityPack;
  5. using System;
  6. using System.Collections.Generic;
  7. using System.Linq;
  8. using System.Text;
  9. using System.Text.RegularExpressions;
  10. using System.Threading.Tasks;
  11. namespace HTEXLib.Translator
  12. {
  13. public class HTML2ITEMTranslator
  14. {
  15. public LangConfig langConfig { get; set; }
  16. public HtmlDocument doc { get; set; } = new HtmlDocument();
  17. public string[] optionsKeys { get; set; }
  18. public Dictionary<string, string> KeyReg { get; set; }
  19. public string[] Fileds { get; set; }
  20. public HTML2ITEMTranslator()
  21. {
  22. }
  23. /// <summary>
  24. /// 处理 【答案】【解析】【结束】以及题型标签中包含的空格字符
  25. /// </summary>
  26. /// <param name="html"></param>
  27. /// <returns></returns>
  28. public string BlankProcess(string html)
  29. {
  30. string ans = langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End;
  31. string als = langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End;
  32. string end = langConfig.Item.Start + langConfig.Item.Ended + langConfig.Item.End;
  33. string[] ansarry = ans.Select(s => s.ToString()).ToArray();
  34. string[] alsarry = als.Select(s => s.ToString()).ToArray();
  35. string[] endarry = end.Select(s => s.ToString()).ToArray();
  36. string ansReg = string.Join("\\s*", ansarry);
  37. string alsReg = string.Join("\\s*", alsarry);
  38. string endReg = string.Join("\\s*", endarry);
  39. html = Regex.Replace(html, ansReg, ans);
  40. html = Regex.Replace(html, alsReg, als);
  41. html = Regex.Replace(html, endReg, end);
  42. string blankReg = "\\s*";
  43. foreach (string value in langConfig.Item.Type.Values)
  44. {
  45. string tag = langConfig.Item.Start + "\\s*" + "\\d+\\s*" + string.Join("\\s*", value.Select(s => s.ToString()).ToArray()) + "\\s*" + langConfig.Item.End;
  46. var m = Regex.Match(html, tag);
  47. while (m.Success)
  48. {
  49. string blankStr = Regex.Replace(m.Value, blankReg, "");
  50. html = html.Replace(m.Value, blankStr);
  51. m = m.NextMatch();
  52. }
  53. }
  54. return html;
  55. }
  56. public List<DOCX.Models.ItemInfo> Translate(string html , LangConfig _langConfig)
  57. {
  58. langConfig = _langConfig;
  59. KeyReg = new Dictionary<string, string> {
  60. // { "Summary", langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End },
  61. { "Answer", langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End },
  62. { "Score", langConfig.Item.Start + langConfig.Item.Score + langConfig.Item.End },
  63. { "Point", langConfig.Item.Start + langConfig.Item.Point + langConfig.Item.End },
  64. { "Analysis", langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End}
  65. };
  66. Fileds = langConfig.Item.Filed.Split('|');
  67. var index = 1;
  68. foreach (var filed in Fileds)
  69. {
  70. KeyReg.Add($"Filed{index}", langConfig.Item.Start + filed + langConfig.Item.End);
  71. index += 1;
  72. }
  73. optionsKeys = langConfig.Item.Options.Select(s => s.ToString()).ToArray();
  74. string mathjax = "<script type=\"text/javascript\" src=\"http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML\"></script>";
  75. html = html.Replace(mathjax, "");
  76. //去除class 以及span标签"
  77. string classpattern = "class=\"([^\"]*)\"";
  78. html = Regex.Replace(html, classpattern, "");
  79. string pattern = "<span([^>]{0,})>";
  80. html = Regex.Replace(html, pattern, "");
  81. html = html.Replace(" close=\"\" separators=\" | \">", "");
  82. html = html.Replace("\t", " ").Replace("<span>", "").Replace("</span>", "").Replace("dir=\"ltr\"", "");
  83. //处理 【答案】【解析】【结束】标签中包含的空格字符
  84. html = BlankProcess(html);
  85. var rrrs = Regex.Split(html, "{([\\S]*?)}");
  86. Dictionary<string, List<string>> TestInType = ConvertTest(html);
  87. List<DOCX.Models.ItemInfo> tests = new List<DOCX.Models.ItemInfo>();
  88. foreach (string key in TestInType.Keys)
  89. {
  90. switch (key)
  91. {
  92. case "single":
  93. List<DOCX.Models.ItemInfo> exercisesSingle = SingleConvert(key, TestInType[key]);
  94. // exercisesSingle.ForEach(x => { x.pid = x.shaCode; });
  95. tests.AddRange(exercisesSingle); break;
  96. case "multiple":
  97. List<DOCX.Models.ItemInfo> exercisesMultiple = MultipleConvert(key, TestInType[key]);
  98. //exercisesMultiple.ForEach(x => { x.pid = x.shaCode; });
  99. tests.AddRange(exercisesMultiple); break;
  100. case "judge":
  101. List<DOCX.Models.ItemInfo> exercisesJudge = JudgeConvert(key, TestInType[key]);
  102. //exercisesJudge.ForEach(x => { x.pid = x.shaCode; });
  103. tests.AddRange(exercisesJudge); break;
  104. //case "complete":
  105. // List<ItemInfo> exercisesComplete = CompleteConvert(key, TestInType[key]);
  106. // exercisesComplete.ForEach(x => { x.pShaCode = x.shaCode; });
  107. // tests.AddRange(exercisesComplete); break;
  108. //case "subjective":
  109. // List<ItemInfo> exercisesSubjective = SubjectiveConvert(key, TestInType[key]);
  110. // exercisesSubjective.ForEach(x => { x.pShaCode = x.shaCode; });
  111. // tests.AddRange(exercisesSubjective); break;
  112. case "compose":
  113. List<DOCX.Models.ItemInfo> exercisesCompose = ComposeConvert(key, TestInType[key]);
  114. // exercisesCompose.ForEach(x => { x.pid = x.shaCode; });
  115. tests.AddRange(exercisesCompose);
  116. break;
  117. default:
  118. List<DOCX.Models.ItemInfo> exercisesOther = SubjectiveConvert(key, TestInType[key]);
  119. // exercisesOther.ForEach(x => { x.pid = x.shaCode; });
  120. tests.AddRange(exercisesOther); break;
  121. }
  122. }
  123. return tests.OrderBy(x => x.order).ToList();
  124. }
  125. private List<DOCX.Models.ItemInfo> SingleConvert(string TypeKey, List<string> list)
  126. {
  127. List<DOCX.Models.ItemInfo> testInfos = OptionProcess(TypeKey, list);
  128. return testInfos;
  129. }
  130. private List<DOCX.Models.ItemInfo> MultipleConvert(string TypeKey, List<string> list)
  131. {
  132. List<DOCX.Models.ItemInfo> testInfos = OptionProcess(TypeKey, list);
  133. return testInfos;
  134. }
  135. private List<DOCX.Models.ItemInfo> JudgeConvert(string TypeKey, List<string> list)
  136. {
  137. List<DOCX.Models.ItemInfo> testInfos = OptionProcess(TypeKey, list);
  138. return testInfos;
  139. }
  140. private List<DOCX.Models.ItemInfo> CompleteConvert(string TypeKey, List<string> list)
  141. {
  142. List<DOCX.Models.ItemInfo> testInfos = CompleteProcess(TypeKey, list);
  143. return testInfos;
  144. }
  145. private List<DOCX.Models.ItemInfo> CompleteProcess(string TypeKey, List<string> tests)
  146. {
  147. //List<string> tests = ConvertTest(testHtml);
  148. List<DOCX.Models.ItemInfo> testInfos = ConvertTestInfo(tests, TypeKey);
  149. // HtmlDocument doc = new HtmlDocument();
  150. foreach (DOCX.Models.ItemInfo testInfo in testInfos)
  151. {
  152. List<string> ans = new List<string>();
  153. testInfo.question = testInfo.question.Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "").Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "");
  154. string regRex = langConfig.Item.Start + "([\\s\\S]*?)" + langConfig.Item.End;
  155. List<ReplaceDto> replaces = new List<ReplaceDto>();
  156. var m = Regex.Match(testInfo.question, regRex);
  157. int index = 1;
  158. while (m.Success)
  159. {
  160. string an = m.Groups[1].ToString();
  161. doc.LoadHtml(an);
  162. string anstr = doc.DocumentNode.InnerText;
  163. string nbsp = "";
  164. int length = System.Text.Encoding.Default.GetBytes(anstr).Length;
  165. for (int i = 0; i < length * 3; i++)
  166. {
  167. nbsp += "&nbsp;";
  168. }
  169. ReplaceDto replaceDto = new ReplaceDto { oldstr = langConfig.Item.Start + an + langConfig.Item.End, newstr = "<underline style='word-break: break-word' data=\"" + index + "\"><u>" + nbsp + "</u></underline>" };
  170. replaces.Add(replaceDto);
  171. ans.Add(an);
  172. m = m.NextMatch();
  173. index++;
  174. }
  175. string textImg = testInfo.question;
  176. //消除答案
  177. foreach (ReplaceDto replace in replaces)
  178. {
  179. testInfo.question = testInfo.question.Replace(replace.oldstr, replace.newstr);
  180. testInfo.question = HtmlHelper.DoUselessTag(testInfo.question);
  181. //只要题干文字和图片
  182. //不加underline标记
  183. textImg = testInfo.question.Replace(replace.oldstr, "");
  184. }
  185. //textImg = HtmlHelper.DoTextImg(textImg);
  186. testInfo.shaCode = ShaHashHelper.GetSHA1(textImg);
  187. //处理解析
  188. testInfo.explain = testInfo.explain.Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "");//.Replace(Start + EndedTag + End, "");
  189. testInfo.explain = HtmlHelper.DoUselessTag((string)testInfo.explain);
  190. testInfo.answer.AddRange(ans);
  191. }
  192. return testInfos;
  193. }
  194. private List<DOCX.Models.ItemInfo> OptionProcess(string typeKey, List<string> list)
  195. {
  196. List<DOCX.Models.ItemInfo> testInfos = ConvertTestInfo(list, typeKey);
  197. foreach (DOCX.Models.ItemInfo testInfo in testInfos)
  198. {
  199. string optsRgex = "";
  200. if (testInfo.question.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End))
  201. {
  202. optsRgex = optionsKeys[0] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End;
  203. }
  204. else if (!testInfo.question.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End) && testInfo.question.Contains(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End))
  205. {
  206. optsRgex = optionsKeys[0] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End;
  207. }
  208. else
  209. {
  210. optsRgex = optionsKeys[0] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?).*";
  211. }
  212. string optsHtml = Regex.Match(testInfo.question, optsRgex).Value;
  213. //HtmlDocument doc = new HtmlDocument(); +".*";
  214. //doc.LoadHtml(optsHtml);
  215. //optsHtml = doc.DocumentNode.InnerText;
  216. //处理选项
  217. StringBuilder textImg = new StringBuilder();
  218. for (int i = 0; i < optionsKeys.Length - 1; i++)
  219. {
  220. string optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + optionsKeys[i + 1] + "\\s*(\\.|\\.|\\、|\\:|\\:)";
  221. string optHtml = Regex.Match(optsHtml, optRgex).Value;
  222. //if (string.IsNullOrEmpty(optHtml)) {
  223. // optRgex = optionsKeys[i]+"(\\.|\\.|\\、|\\:|\\:).*";
  224. // optHtml = Regex.Match(optsHtml, optRgex).Value;
  225. //}
  226. if (!string.IsNullOrEmpty(optHtml))
  227. {
  228. optHtml = optHtml.Substring(2, optHtml.Length - 4);
  229. optHtml = HtmlHelper.DoUselessTag(optHtml);
  230. //textImg.Append(HtmlHelper.DoTextImg(optHtml));
  231. textImg.Append(optHtml);
  232. testInfo.option.Add(new CodeValue { code = optionsKeys[i], value = optHtml });
  233. //testInfo.option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] },{ "value", optHtml } });
  234. //testInfo.option.TryAdd(optionsKeys[i], optHtml);
  235. }
  236. else
  237. {
  238. if (optsHtml.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End))
  239. {
  240. optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End;
  241. }
  242. else if (!optsHtml.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End) && optsHtml.Contains(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End))
  243. {
  244. optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End;
  245. }
  246. else
  247. {
  248. optRgex = optionsKeys[i] + "\\s*(\\.|\\.|\\、|\\:|\\:).*";
  249. }
  250. optHtml = Regex.Match(optsHtml, optRgex).Value;
  251. if (!string.IsNullOrEmpty(optHtml))
  252. {
  253. // optHtml = optHtml.Substring(2, optHtml.Length - 6);
  254. optHtml = optHtml.Substring(2).Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "").Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "");
  255. optHtml = HtmlHelper.DoUselessTag(optHtml);
  256. // textImg.Append(HtmlHelper.DoTextImg(optHtml));
  257. textImg.Append(optHtml);
  258. testInfo.option.Add(new CodeValue { code = optionsKeys[i], value = optHtml });
  259. //testInfo.option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] }, { "value", optHtml } });
  260. //testInfo.option.TryAdd(optionsKeys[i], optHtml);
  261. }
  262. }
  263. }
  264. //处理题干
  265. try
  266. {
  267. testInfo.question = testInfo.question.Replace(optsHtml, "").Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "");
  268. }
  269. catch (Exception e) {
  270. //TODO
  271. }
  272. testInfo.question = HtmlHelper.DoUselessTag(testInfo.question);
  273. //textImg.Append(HtmlHelper.DoTextImg(testInfo.question));
  274. textImg.Append(testInfo.question);
  275. testInfo.shaCode = ShaHashHelper.GetSHA1(textImg.ToString());
  276. List<string> answers = testInfo.answer;
  277. HashSet<string> ans = new HashSet<string>();
  278. //处理答案
  279. for (int i = 0; i < answers.Count; i++)
  280. {
  281. string Answer = answers[i].Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "").Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "").TrimStart().TrimEnd();
  282. Answer.Select(s => s.ToString()).ToList().ForEach(x =>
  283. {
  284. ans.Add(x);
  285. });
  286. }
  287. testInfo.answer = ans.ToList();
  288. if (typeKey == "judge" && testInfo.option.IsEmpty()) {
  289. string [] Judge = langConfig.Item.Judge.Split('|');
  290. List<CodeValue> option = new List<CodeValue>() { new CodeValue {code="A",value=Judge[0] }, new CodeValue { code = "B", value = Judge[1] } };
  291. if (testInfo.answer != null && testInfo.answer.Count > 0) {
  292. int index = 0;
  293. foreach (var j in Judge) {
  294. if (String.Equals(testInfo.answer[0], j, StringComparison.CurrentCultureIgnoreCase)) {
  295. testInfo.answer[0]= option[index].code;
  296. testInfo.option = option;
  297. break;
  298. }
  299. index += 1;
  300. }
  301. }
  302. }
  303. //处理解析
  304. testInfo.explain = testInfo.explain.Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "");//.Replace(Start + EndedTag + End, "");
  305. testInfo.explain = HtmlHelper.DoUselessTag((string)testInfo.explain);
  306. testInfo.objective = true;
  307. }
  308. return testInfos;
  309. }
  310. private List<DOCX.Models.ItemInfo> SubjectiveConvert(string TypeKey, List<string> tests)
  311. {
  312. // List<string> tests = ConvertTest(testHtml);
  313. List<DOCX.Models.ItemInfo> testInfos = ConvertTestInfo(tests, TypeKey);
  314. foreach (DOCX.Models.ItemInfo testInfo in testInfos)
  315. {
  316. testInfo.question = testInfo.question.Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "").Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "");
  317. testInfo.question = HtmlHelper.DoUselessTag(testInfo.question);
  318. //StringBuilder textImg = new StringBuilder(HtmlHelper.DoTextImg(testInfo.question));
  319. StringBuilder textImg = new StringBuilder(testInfo.question);
  320. testInfo.shaCode = ShaHashHelper.GetSHA1(textImg.ToString());
  321. for (int i = 0; i < testInfo.answer.Count; i++)
  322. {
  323. testInfo.answer[i] = testInfo.answer[i].Replace(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End, "").Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, "");
  324. testInfo.answer[i] = HtmlHelper.DoUselessTag((string)testInfo.answer[i]);
  325. }
  326. testInfo.explain = testInfo.explain!=null?testInfo.explain.Replace(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End, ""):"";//.Replace(Start + EndedTag + End, "");
  327. testInfo.explain = HtmlHelper.DoUselessTag((string)testInfo.explain);
  328. testInfo.objective = false;
  329. }
  330. return testInfos;
  331. }
  332. private List<DOCX.Models.ItemInfo> ComposeConvert(string TypeKey, List<string> list)
  333. {
  334. List<DOCX.Models.ItemInfo> exerciseDtos = new List<DOCX.Models.ItemInfo>();
  335. foreach (string html in list)
  336. {
  337. DOCX.Models.ItemInfo exercise = new DOCX.Models.ItemInfo() { type = TypeKey , objective = false};
  338. List<string> test = new List<string>();
  339. foreach (string k in langConfig.Item.Type.Keys)
  340. {
  341. string Rex = langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[TypeKey] + langConfig.Item.End + "[\\s\\S]+?(?=\\s*" + langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[k] + langConfig.Item.End + "|$)";
  342. var mm = Regex.Match(html, Rex);
  343. while (mm.Success)
  344. {
  345. test.Add(mm.Value);
  346. mm = mm.NextMatch();
  347. }
  348. }
  349. string ls = test.OrderBy(x => x.Length).First();
  350. //处理题号
  351. string digtRex = langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End;
  352. var m = Regex.Match(ls, digtRex);
  353. //int index = 1;
  354. while (m.Success)
  355. {
  356. string ord = m.Value.Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "").Replace(langConfig.Item.Type[TypeKey], "");
  357. int.TryParse(ord, out int order);
  358. exercise.order = order;
  359. m = m.NextMatch();
  360. }
  361. string testinfo = html.Replace(ls, "").Replace(langConfig.Item.Start + langConfig.Item.Ended + langConfig.Item.End, "");
  362. ls = Regex.Replace(ls, langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End, "");
  363. exercise.question = HtmlHelper.DoUselessTag(ls/*.Replace(ComposeStart, "").Replace(ComposeEnd, "")*/);
  364. //.Replace(ComposeTag, Start);
  365. //获取综合题的材料加每个小题的sha1Code
  366. // string testQs = HtmlHelper.DoTextImg(exercise.question);
  367. string testQs = exercise.question;
  368. List<DOCX.Models.ItemInfo> dtos = Translate(testinfo, langConfig);
  369. if (dtos.IsNotEmpty())
  370. {
  371. dtos.ForEach(x => { testQs = testQs + x.shaCode; });
  372. exercise.shaCode = ShaHashHelper.GetSHA1(testQs);
  373. // dtos.ForEach(x => { x.pid = exercise.shaCode;/* x.lite = true;*/ });
  374. exercise.children.AddRange(dtos);
  375. }
  376. if (exercise.children.IsNotEmpty())
  377. {
  378. exercise.children = exercise.children.OrderBy(x => x.order).ToList();
  379. }
  380. exerciseDtos.Add(exercise);
  381. }
  382. return exerciseDtos;
  383. }
  384. public class RegInfo{
  385. public string val { get; set; }
  386. public string st { get; set; }
  387. public string end { get; set; }
  388. public string reg { get; set; }
  389. }
  390. /// <summary>
  391. /// 处理单选,多选,判断
  392. /// </summary>
  393. /// <param name="tests"></param>
  394. /// <param name="TypeKey"></param>
  395. /// <returns></returns>
  396. public List<DOCX.Models.ItemInfo> ConvertTestInfo(List<string> tests, string TypeKey)
  397. {
  398. List<DOCX.Models.ItemInfo> testInfos = new List<DOCX.Models.ItemInfo>();
  399. // foreach (string html in tests)
  400. //{
  401. //"Summary", langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End
  402. // testInfos.Add(doItem(html, TypeKey));
  403. // }
  404. //return testInfos;
  405. var tasks = tests.Select(i => {
  406. return Task.Run(() => doItem(i,TypeKey ));
  407. });
  408. testInfos.AddRange(tasks.Select(x => x.Result));
  409. return testInfos;
  410. }
  411. /// <summary>
  412. /// {1问答题}文字{答案}4*4=16,3+3+3=9{解析}正方形面积为边长的平方,三角形的边长*3为周长{配分}10{知识点}三角形,正方形,面积{应用}{2问答题}圆的面积公式是什么。{解析}正方形面积为边长的平方,三角形的边长*3为周长{答案}4*4=16,3+3+3=9{配分}10{知识点}三角形,正方形,面积{应用}{3问答题}平行四边形的面积公式是什么。{配分}10{应用}
  413. /// </summary>
  414. /// <param name="html"></param>
  415. /// <param name="TypeKey"></param>
  416. /// <returns></returns>
  417. public DOCX.Models.ItemInfo doItem(string html, string TypeKey) {
  418. KeyReg["Summary"] = langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End;
  419. DOCX.Models.ItemInfo test = new DOCX.Models.ItemInfo();
  420. test.type = TypeKey;
  421. Dictionary<string, RegInfo> dict = new Dictionary<string, RegInfo>();
  422. foreach (var mkey in KeyReg.Keys)
  423. {
  424. //比较最短的
  425. List<RegInfo> val = new List<RegInfo>();
  426. foreach (var skey in KeyReg.Keys)
  427. {
  428. if (mkey != skey)
  429. {
  430. string RegexStr = KeyReg[mkey] + "([\\s\\S]*?)" + KeyReg[skey];
  431. Match mt = Regex.Match(html, RegexStr);
  432. if (!string.IsNullOrWhiteSpace(mt.Value))
  433. {
  434. val.Add(new RegInfo { reg = RegexStr, val = mt.Value, st = KeyReg[mkey], end = KeyReg[skey] });
  435. }
  436. }
  437. }
  438. if (val.Count == 0)
  439. {
  440. string RegexStrd = KeyReg[mkey] + ".*";
  441. Match mtd = Regex.Match(html, RegexStrd);
  442. val.Add(new RegInfo { reg = RegexStrd, val = mtd.Value, st = KeyReg[mkey], end = "" });
  443. }
  444. val = val.OrderBy(s => s.val.Length).ToList();
  445. dict.Add(mkey, val.First());
  446. }
  447. var keys = dict.Keys;
  448. foreach (string key in keys)
  449. {
  450. switch (key)
  451. {
  452. case "Summary":
  453. //处理题号
  454. string digtRex = langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End;
  455. var m = Regex.Match(dict[key].val, digtRex);
  456. while (m.Success)
  457. {
  458. string ord = m.Value.Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "").Replace(langConfig.Item.Type[TypeKey], "");
  459. int.TryParse(ord, out int order);
  460. test.order = order;
  461. m = m.NextMatch();
  462. }
  463. var qu = Regex.Replace(dict[key].val, langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End, "");
  464. qu = Regex.Replace(qu, dict[key].end, "");
  465. test.question = qu;
  466. break;
  467. case "Answer":
  468. string Answer = dict[key].val;
  469. //单选或多选,判断答案 脱html标签
  470. if (TypeKey.Equals("single") || TypeKey.Equals("multiple") || TypeKey.Equals("judge"))
  471. {
  472. doc.LoadHtml(dict[key].val);
  473. Answer = doc.DocumentNode.InnerText;
  474. }
  475. Answer = Regex.Replace(Answer, dict[key].st, "");
  476. Answer = Regex.Replace(Answer, dict[key].end, "");
  477. test.answer = new List<string>() { Answer };
  478. break;
  479. case "Analysis":
  480. string Analysis = dict[key].val;
  481. Analysis = Regex.Replace(Analysis, dict[key].st, "");
  482. Analysis = Regex.Replace(Analysis, dict[key].end, "");
  483. test.explain = Analysis;
  484. break;
  485. case "Score":
  486. string Score = dict[key].val;
  487. //单选或多选,判断答案 脱html标签
  488. doc.LoadHtml(dict[key].val);
  489. Score = doc.DocumentNode.InnerText;
  490. Score = Regex.Replace(Score, dict[key].st, "");
  491. Score = Regex.Replace(Score, dict[key].end, "");
  492. Score = Regex.Replace(Score, @"\s", "");
  493. //正则匹配数字
  494. var reg = "^[0-9]+(\\.?[0-9]+)?";
  495. Match m1t = Regex.Match(Score, reg);
  496. double sc = 0;
  497. double.TryParse(m1t.Value, out sc);
  498. test.score = sc;
  499. break;
  500. case "Point":
  501. ///"\\s*(\\.|\\.|\\、|\\:|\\:|\\,|\\,|\\;|\\;).*"
  502. string Point = dict[key].val;
  503. //单选或多选,判断答案 脱html标签
  504. doc.LoadHtml(dict[key].val);
  505. Point = doc.DocumentNode.InnerText;
  506. Point = Regex.Replace(Point, dict[key].st, "");
  507. Point = Regex.Replace(Point, dict[key].end, "");
  508. Point = Regex.Replace(Point, @"\s", "");
  509. if (!string.IsNullOrWhiteSpace(Point))
  510. {
  511. string[] ps = Regex.Split(Point, "\\.|\\.|\\、|\\:|\\:|\\,|\\,|\\;|\\;");
  512. if (ps != null && ps.Length > 0)
  513. {
  514. test.knowledge = ps.Distinct().ToList();
  515. }
  516. }
  517. break;
  518. case "Filed1":
  519. if (test.@field > 0) { break; }
  520. string Filed1 = dict[key].val;
  521. //单选或多选,判断答案 脱html标签
  522. doc.LoadHtml(dict[key].val);
  523. Filed1 = doc.DocumentNode.InnerText;
  524. Filed1 = Regex.Replace(Filed1, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
  525. test.@field = Fileds.ToList().IndexOf(Filed1) + 1;
  526. break;
  527. case "Filed2":
  528. if (test.@field > 0) { break; }
  529. string Filed2 = dict[key].val;
  530. //单选或多选,判断答案 脱html标签
  531. doc.LoadHtml(dict[key].val);
  532. Filed2 = doc.DocumentNode.InnerText;
  533. Filed2 = Regex.Replace(Filed2, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
  534. test.@field = Fileds.ToList().IndexOf(Filed2) + 1;
  535. break;
  536. case "Filed3":
  537. if (test.@field > 0) { break; }
  538. string Filed3 = dict[key].val;
  539. //单选或多选,判断答案 脱html标签
  540. doc.LoadHtml(dict[key].val);
  541. Filed3 = doc.DocumentNode.InnerText;
  542. Filed3 = Regex.Replace(Filed3, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
  543. test.@field = Fileds.ToList().IndexOf(Filed3) + 1;
  544. break;
  545. case "Filed4":
  546. if (test.@field > 0) { break; }
  547. string Filed4 = dict[key].val;
  548. //单选或多选,判断答案 脱html标签
  549. doc.LoadHtml(dict[key].val);
  550. Filed4 = doc.DocumentNode.InnerText;
  551. Filed4 = Regex.Replace(Filed4, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
  552. test.@field = Fileds.ToList().IndexOf(Filed4) + 1;
  553. break;
  554. case "Filed5":
  555. if (test.@field > 0) { break; }
  556. string Filed5 = dict[key].val;
  557. //单选或多选,判断答案 脱html标签
  558. doc.LoadHtml(dict[key].val);
  559. Filed5 = doc.DocumentNode.InnerText;
  560. Filed5 = Regex.Replace(Filed5, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
  561. test.@field = Fileds.ToList().IndexOf(Filed5) + 1;
  562. break;
  563. case "Filed6":
  564. if (test.@field > 0) { break; }
  565. string Filed6 = dict[key].val;
  566. //单选或多选,判断答案 脱html标签
  567. doc.LoadHtml(dict[key].val);
  568. Filed6 = doc.DocumentNode.InnerText;
  569. Filed6 = Regex.Replace(Filed6, dict[key].end, "").Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "");
  570. test.@field = Fileds.ToList().IndexOf(Filed6) + 1;
  571. break;
  572. default: break;
  573. }
  574. }
  575. return test;
  576. }
  577. /// <summary>
  578. /// 处理单选,多选,判断
  579. /// </summary>
  580. /// <param name="tests"></param>
  581. /// <param name="TypeKey"></param>
  582. /// <returns></returns>
  583. public List<DOCX.Models.ItemInfo> ConvertTestInfo1(List<string> tests, string TypeKey)
  584. {
  585. List<DOCX.Models.ItemInfo> testInfos = new List<DOCX.Models.ItemInfo>();
  586. foreach (string html in tests)
  587. {
  588. Dictionary<string, string> regex = new Dictionary<string, string>();
  589. Dictionary<string, string> question = new Dictionary<string, string> {
  590. { "Summary", langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End + "|" + langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End },
  591. { "Answer", langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End + "|" + langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End },
  592. { "Analysis", langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End} };
  593. //填空题 修改 Dictionary<string, string> compquestion = new Dictionary<string, string> { { "Summary", langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End + "|" + langConfig.Item.Start + langConfig.Item.AnalysisTag + langConfig.Item.End }, { "Analysis", langConfig.Item.Start + langConfig.Item.AnalysisTag + langConfig.Item.End /* + "|" + Start + EndedTag + End */ } };
  594. DOCX.Models.ItemInfo test = new DOCX.Models.ItemInfo();
  595. test.type = TypeKey;
  596. //List<string> keys ;
  597. //if (TypeKey.Equals("complete"))
  598. //{
  599. // keys = compquestion.Keys.ToList();
  600. // regex = compquestion;
  601. //}
  602. //else
  603. //{
  604. // keys = question.Keys.ToList();
  605. // regex = question;
  606. //}
  607. List<string> keys = question.Keys.ToList();
  608. regex = question;
  609. foreach (string key in keys)
  610. {
  611. string[] tags = regex[key].Split('|');
  612. string RegexStr = "";
  613. if (key == "Summary")
  614. {
  615. if (html.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End))
  616. {
  617. RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1];
  618. }
  619. else if (!html.Contains(langConfig.Item.Start + langConfig.Item.Answer + langConfig.Item.End) && html.Contains(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End))
  620. {
  621. RegexStr = tags[0] + "([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End;
  622. }
  623. else
  624. {
  625. RegexStr = tags[0] + ".*";
  626. //RegexStr = tags[0] + "([\\s\\S]*?)";
  627. }
  628. }
  629. else if (key == "Answer")
  630. {
  631. if (html.Contains(langConfig.Item.Start + langConfig.Item.Analysis + langConfig.Item.End))
  632. {
  633. RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1];
  634. }
  635. else
  636. {
  637. RegexStr = tags[0] + ".*";
  638. //RegexStr = tags[0] + "([\\s\\S]*?)";
  639. }
  640. }
  641. else if (key == "Analysis")
  642. {
  643. RegexStr = tags[0] + ".*";
  644. //RegexStr = tags[0] + "([\\s\\S]*?)";
  645. }
  646. Match mt = Regex.Match(html, RegexStr);
  647. switch (key)
  648. {
  649. case "Summary":
  650. //处理题号
  651. string digtRex = langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End;
  652. var m = Regex.Match(mt.Value, digtRex);
  653. //int index = 1;
  654. while (m.Success)
  655. {
  656. string ord = m.Value.Replace(langConfig.Item.Start, "").Replace(langConfig.Item.End, "").Replace(langConfig.Item.Type[TypeKey], "");
  657. int.TryParse(ord, out int order);
  658. test.order = order;
  659. m = m.NextMatch();
  660. }
  661. test.question = Regex.Replace(mt.Value, langConfig.Item.Start + "\\d+" + langConfig.Item.Type[TypeKey] + langConfig.Item.End, "");
  662. //test.question = mt.Value.Replace(Start + "\\d+" + langConfig.Item.Type[TypeKey] + End, "");
  663. break;
  664. case "Answer":
  665. string Answer = mt.Value;
  666. //单选或多选,判断答案 脱html标签
  667. if (TypeKey.Equals("single") || TypeKey.Equals("multiple") || TypeKey.Equals("judge"))
  668. {
  669. // HtmlDocument doc = new HtmlDocument();
  670. doc.LoadHtml(mt.Value);
  671. Answer = doc.DocumentNode.InnerText;
  672. }
  673. test.answer = new List<string>() { Answer }; break;
  674. case "Analysis":
  675. test.explain = mt.Value; break;
  676. default: break;
  677. }
  678. }
  679. testInfos.Add(test);
  680. }
  681. return testInfos;
  682. }
  683. /// <summary>
  684. /// 解析题型
  685. /// </summary>
  686. /// <param name="testHtml"></param>
  687. /// <returns></returns>
  688. public Dictionary<string, List<string>> ConvertTest(string testHtml)
  689. {
  690. Dictionary<string, List<string>> TestInType = new Dictionary<string, List<string>>();
  691. foreach (string key in langConfig.Item.Type.Keys)
  692. {
  693. List<string> tests = new List<string>();
  694. string regRex = "";
  695. if (key == "compose")
  696. {
  697. regRex = langConfig.Item.Start + "\\d+" + langConfig.Item.Type[key] + langConfig.Item.End + "([\\s\\S]*?)" + langConfig.Item.Start + langConfig.Item.Ended + langConfig.Item.End;
  698. var m = Regex.Match(testHtml, regRex);
  699. while (m.Success)
  700. {
  701. string testInfo = m.Value;
  702. testHtml = testHtml.Replace(testInfo, "");
  703. tests.Add(testInfo);
  704. m = m.NextMatch();
  705. }
  706. }
  707. else
  708. {
  709. ///【\d+[^【】]*题】[\s\S]+?(?=\s*【\d+[^【】]*题】|$)
  710. regRex = langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[key] + langConfig.Item.End + "[\\s\\S]+?(?=\\s*" + langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[key] + langConfig.Item.End + "|$)";
  711. // string[] tags = TestType[key].Split("|");
  712. // string regRex = tags[0] + "([\\s\\S]*?)" + tags[1];
  713. var m = Regex.Match(testHtml, regRex);
  714. //int index = 1;
  715. while (m.Success)
  716. {
  717. tests.Add(m.Value);
  718. m = m.NextMatch();
  719. }
  720. //处理结尾匹配问题
  721. if (tests.IsNotEmpty())
  722. {
  723. // string last = tests.Last();
  724. //再次匹配,拿到长度最短的。
  725. for (int i = 0; i < tests.Count; i++)
  726. {
  727. List<string> test = new List<string>();
  728. foreach (string k in langConfig.Item.Type.Keys)
  729. {
  730. string Rex = langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[key] + langConfig.Item.End + "[\\s\\S]+?(?=\\s*" + langConfig.Item.Start + "\\d+[^" + langConfig.Item.Start + langConfig.Item.End + "]*" + langConfig.Item.Type[k] + langConfig.Item.End + "|$)";
  731. var mm = Regex.Match(tests[i], Rex);
  732. while (mm.Success)
  733. {
  734. test.Add(mm.Value);
  735. mm = mm.NextMatch();
  736. }
  737. }
  738. string ls = test.OrderBy(x => x.Length).First();
  739. tests[i] = ls;
  740. }
  741. }
  742. }
  743. TestInType.Add(key, tests);
  744. }
  745. return TestInType;
  746. }
  747. }
  748. }