ImportExerciseService.cs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. using HtmlAgilityPack;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Linq;
  5. using System.Text;
  6. using System.Text.RegularExpressions;
  7. using System.Threading.Tasks;
  8. using TEAMModelOS.Model.Core.Dtos;
  9. using TEAMModelOS.Model.Evaluation.Dtos;
  10. using TEAMModelOS.SDK.Context.Configuration;
  11. using TEAMModelOS.SDK.Helper.Common.JsonHelper;
  12. using TEAMModelOS.SDK.Helper.Network.HttpHelper;
  13. using TEAMModelOS.Service.Core.Implements;
  14. using TEAMModelOS.Service.Evaluation.Interfaces;
  15. namespace TEAMModelOS.Service.Evaluation.Implements
  16. {
  17. public class ImportExerciseService : BaseService, IImportExerciseService
  18. {
  19. private static string SummaryTag = "【题文】";
  20. private static string AnswerTag = "【答案】";
  21. private static string AnalysisTag = "【解析】";
  22. private static string EndedTag = "【结束】";
  23. private static string Options = "ABCDEFGHIJ";
  24. private static string CompleteStart = "【";
  25. private static string CompleteEnd = "】";
  26. public Task<List<ExerciseDto>> AnalyzeWord(Dictionary<string, object> dict) {
  27. string Word2html= BaseConfigModel.Configuration["HaBookAuth:Word2html"];
  28. String url = "https://teammodelstorage.blob.core.chinacloudapi.cn/teammodelcontest/20190517/%E6%A8%A1%E6%9D%BF%E6%A0%B7%E4%BE%8B.doc";
  29. Dictionary<string, string> TestType = new Dictionary<string, string> {
  30. { "Single", "单选题|多选题" }, { "Multiple", "多选题|判断题" },
  31. { "Judge", "判断题|填空题" }, { "Complete", "填空题|主观题" },
  32. { "Subjective", "主观题|【完结】" } };
  33. Dictionary<string, string> TestInType = new Dictionary<string, string>();
  34. string html = HttpHelper.HttpGet("http://127.0.0.1:8761/word2html/api/convert?docUrl=");
  35. html = html.Replace("\t", " ").Replace("<span>", "").Replace("</span>", "");
  36. //去除class
  37. string classpattern = "class=\"([^\"]*)\"";
  38. html = Regex.Replace(html, classpattern, "");
  39. string pattern = "<span([^>]{0,})>";
  40. html = Regex.Replace(html, pattern, "");
  41. foreach (string key in TestType.Keys)
  42. {
  43. string[] tags = TestType[key].Split("|");
  44. string RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1];
  45. Match mt = Regex.Match(html, RegexStr);
  46. TestInType.Add(key, mt.Value);
  47. }
  48. List<ExerciseDto> tests = new List<ExerciseDto>();
  49. ///解析几种题型的题目
  50. foreach (string key in TestInType.Keys)
  51. {
  52. switch (key)
  53. {
  54. case "Single":
  55. tests.AddRange(SingleConvert(key, TestInType[key])); break;
  56. case "Multiple":
  57. tests.AddRange(MultipleConvert(key, TestInType[key])); break;
  58. case "Judge":
  59. tests.AddRange(JudgeConvert(key, TestInType[key])); break;
  60. case "Complete":
  61. tests.AddRange(CompleteConvert(key, TestInType[key])); break;
  62. case "Subjective":
  63. tests.AddRange(SubjectiveConvert(key, TestInType[key])); break;
  64. default: break;
  65. }
  66. }
  67. ///用MessagePack序列化 用自带的序列化 有编号 编号随机 不利于sha1编码解析重复的题目
  68. string sa = tests.ToJson();
  69. return null;
  70. }
  71. public static List<ExerciseDto> SingleConvert(string TypeKey, string testHtml)
  72. {
  73. List<ExerciseDto> testInfos = OptionProcess(TypeKey, testHtml);
  74. return testInfos;
  75. }
  76. public static List<ExerciseDto> MultipleConvert(string TypeKey, string testHtml)
  77. {
  78. List<ExerciseDto> testInfos = OptionProcess(TypeKey, testHtml);
  79. return testInfos;
  80. }
  81. public static List<ExerciseDto> JudgeConvert(string TypeKey, string testHtml)
  82. {
  83. List<ExerciseDto> testInfos = OptionProcess(TypeKey, testHtml);
  84. return testInfos;
  85. }
  86. public static List<ExerciseDto> CompleteConvert(string TypeKey, string testHtml)
  87. {
  88. return CompleteProcess(TypeKey, testHtml);
  89. }
  90. public static List<ExerciseDto> SubjectiveConvert(string TypeKey, string testHtml)
  91. {
  92. List<string> tests = ConvertTest(testHtml);
  93. List<ExerciseDto> testInfos = ConvertTestInfo(tests, TypeKey);
  94. foreach (ExerciseDto testInfo in testInfos)
  95. {
  96. testInfo.Question = testInfo.Question.Replace(AnalysisTag, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
  97. testInfo.Question = DoUselessTag(testInfo.Question);
  98. for (int i = 0; i < testInfo.Answer.Count; i++)
  99. {
  100. testInfo.Answer[i] = testInfo.Answer[i].Replace(AnswerTag, "").Replace(AnalysisTag, "");
  101. testInfo.Answer[i] = DoUselessTag(testInfo.Answer[i]);
  102. }
  103. testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
  104. testInfo.Explain = DoUselessTag(testInfo.Explain);
  105. }
  106. return testInfos;
  107. }
  108. public static string DoUselessTag(string str)
  109. {
  110. if (str.StartsWith("</p>"))
  111. {
  112. str = str.Substring(4);
  113. }
  114. if (str.EndsWith("<p>"))
  115. {
  116. str = str.Substring(0, str.Length - 1 - 6);
  117. }
  118. if (str.EndsWith("<p >"))
  119. {
  120. str = str.Substring(0, str.Length - 1 - 7);
  121. }
  122. if (str.StartsWith("<p >") && !str.Contains("</p>"))
  123. {
  124. str = str.Replace("<p >", "");
  125. }
  126. if (str.StartsWith("<p>") && !str.Contains("</p>"))
  127. {
  128. str = str.Replace("<p>", "");
  129. }
  130. return str;
  131. }
  132. public static List<ExerciseDto> CompleteProcess(string TypeKey, string testHtml)
  133. {
  134. List<string> tests = ConvertTest(testHtml);
  135. List<ExerciseDto> testInfos = ConvertTestInfo(tests, TypeKey);
  136. foreach (ExerciseDto testInfo in testInfos)
  137. {
  138. List<string> ans = new List<string>();
  139. testInfo.Question = testInfo.Question.Replace(AnalysisTag, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
  140. string regRex = CompleteStart+"([\\s\\S]*?)"+CompleteEnd;
  141. List<ReplaceDto> replaces = new List<ReplaceDto>();
  142. var m = Regex.Match(testInfo.Question, regRex);
  143. int index = 1;
  144. while (m.Success)
  145. {
  146. string an = m.Groups[1].ToString();
  147. string nbsp = "";
  148. for (int i = 0; i < an.Length * 2; i++)
  149. {
  150. nbsp += "&nbsp;";
  151. }
  152. ReplaceDto replaceDto = new ReplaceDto { oldstr = "【" + an + "】", newstr = "<underline data=\"" + index + "\"><u>" + nbsp + "</u></underline>" };
  153. replaces.Add(replaceDto);
  154. ans.Add(an);
  155. m = m.NextMatch();
  156. index++;
  157. }
  158. //消除答案
  159. foreach (ReplaceDto replace in replaces)
  160. {
  161. testInfo.Question = testInfo.Question.Replace(replace.oldstr, replace.newstr);
  162. }
  163. //处理解析
  164. testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
  165. testInfo.Explain = DoUselessTag(testInfo.Explain);
  166. testInfo.Answer.AddRange(ans);
  167. }
  168. return testInfos;
  169. }
  170. /// <summary>
  171. /// 选择题处理
  172. /// </summary>
  173. /// <param name="TypeKey"></param>
  174. /// <param name="testHtml"></param>
  175. /// <returns></returns>
  176. public static List<ExerciseDto> OptionProcess(string TypeKey, string testHtml)
  177. {
  178. //处理 \t
  179. List<string> tests = ConvertTest(testHtml);
  180. string[] optionsKeys = Options.Select(s => s.ToString()).ToArray();
  181. List<ExerciseDto> testInfos = ConvertTestInfo(tests, TypeKey);
  182. foreach (ExerciseDto testInfo in testInfos)
  183. {
  184. string optsRgex = optionsKeys[0] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + AnswerTag;
  185. string optsHtml = Regex.Match(testInfo.Question, optsRgex).Value;
  186. //HtmlDocument doc = new HtmlDocument();
  187. //doc.LoadHtml(optsHtml);
  188. //optsHtml = doc.DocumentNode.InnerText;
  189. //处理选项
  190. for (int i = 0; i < optionsKeys.Length - 1; i++)
  191. {
  192. string optRgex = optionsKeys[i] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + optionsKeys[i + 1] + "(\\.|\\.|\\、|\\:|\\:)";
  193. string optHtml = Regex.Match(optsHtml, optRgex).Value;
  194. if (!string.IsNullOrEmpty(optHtml))
  195. {
  196. optHtml = optHtml.Substring(2, optHtml.Length - 4);
  197. optHtml = DoUselessTag(optHtml);
  198. testInfo.Option.Add(new CodeValue { Code = optionsKeys[i], Value = optHtml });
  199. //testInfo.Option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] },{ "value", optHtml } });
  200. //testInfo.Option.TryAdd(optionsKeys[i], optHtml);
  201. }
  202. else
  203. {
  204. optRgex = optionsKeys[i] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + AnswerTag;
  205. optHtml = Regex.Match(optsHtml, optRgex).Value;
  206. if (!string.IsNullOrEmpty(optHtml))
  207. {
  208. optHtml = optHtml.Substring(2, optHtml.Length - 6);
  209. optHtml = DoUselessTag(optHtml);
  210. testInfo.Option.Add(new CodeValue { Code = optionsKeys[i], Value = optHtml });
  211. //testInfo.Option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] }, { "value", optHtml } });
  212. //testInfo.Option.TryAdd(optionsKeys[i], optHtml);
  213. }
  214. }
  215. }
  216. //处理题干
  217. testInfo.Question = testInfo.Question.Replace(optsHtml, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
  218. testInfo.Question = DoUselessTag(testInfo.Question);
  219. List<string> answers = testInfo.Answer;
  220. HashSet<string> ans = new HashSet<string>();
  221. for (int i = 0; i < answers.Count; i++)
  222. {
  223. string Answer = answers[i].Replace(AnswerTag, "").Replace(AnalysisTag, "").TrimStart().TrimEnd();
  224. Answer.Select(s => s.ToString()).ToList().ForEach(x => {
  225. ans.Add(x);
  226. });
  227. }
  228. testInfo.Answer = ans.ToList();
  229. //处理解析
  230. testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
  231. testInfo.Explain = DoUselessTag(testInfo.Explain);
  232. }
  233. return testInfos;
  234. }
  235. public static List<ExerciseDto> ConvertTestInfo(List<string> tests, string TypeKey)
  236. {
  237. List<ExerciseDto> testInfos = new List<ExerciseDto>();
  238. foreach (string html in tests)
  239. {
  240. Dictionary<string, string> regex = new Dictionary<string, string>();
  241. Dictionary<string, string> question = new Dictionary<string, string> { { "Summary", SummaryTag + "|" + AnswerTag }, { "Answer", AnswerTag + "|" + AnalysisTag }, { "Analysis", AnalysisTag + "|" + EndedTag } };
  242. Dictionary<string, string> compquestion = new Dictionary<string, string> { { "Summary", SummaryTag + "|" + AnalysisTag }, { "Analysis", AnalysisTag + "|" + EndedTag } };
  243. ExerciseDto test = new ExerciseDto();
  244. test.Type = TypeKey;
  245. List<string> keys = new List<string>();
  246. if (TypeKey.Equals("Complete"))
  247. {
  248. keys = compquestion.Keys.ToList();
  249. regex = compquestion;
  250. }
  251. else
  252. {
  253. keys = question.Keys.ToList();
  254. regex = question;
  255. }
  256. foreach (string key in keys)
  257. {
  258. string[] tags = regex[key].Split("|");
  259. string RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1];
  260. Match mt = Regex.Match(html, RegexStr);
  261. switch (key)
  262. {
  263. case "Summary":
  264. test.Question = mt.Value; break;
  265. case "Answer":
  266. string Answer = mt.Value;
  267. ///单选或多选,判断答案 脱html标签
  268. if (TypeKey.Equals("Single") || TypeKey.Equals("Multiple") || TypeKey.Equals("Judge"))
  269. {
  270. HtmlDocument doc = new HtmlDocument();
  271. doc.LoadHtml(mt.Value);
  272. Answer = doc.DocumentNode.InnerText;
  273. }
  274. test.Answer = new List<string>() { Answer }; break;
  275. case "Analysis":
  276. test.Explain = mt.Value; break;
  277. default: break;
  278. }
  279. }
  280. testInfos.Add(test);
  281. }
  282. return testInfos;
  283. }
  284. public static List<string> ConvertTest(string testHtml)
  285. {
  286. string start = SummaryTag;
  287. string end = EndedTag;
  288. List<string> tests = new List<string>();
  289. while (testHtml.IndexOf(start) > 0)
  290. {
  291. int indexStart = testHtml.IndexOf(start);
  292. int indexEnd = testHtml.IndexOf(end);
  293. string test = testHtml.Substring(indexStart, indexEnd - indexStart + start.Length);
  294. tests.Add(test);
  295. testHtml = testHtml.Substring(indexEnd + end.Length);
  296. }
  297. return tests;
  298. }
  299. }
  300. class ReplaceDto
  301. {
  302. public string oldstr { get; set; }
  303. public string newstr { get; set; }
  304. }
  305. }