123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322 |
- using HtmlAgilityPack;
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- using System.Threading.Tasks;
- using TEAMModelOS.Model.Core.Dtos;
- using TEAMModelOS.Model.Evaluation.Dtos;
- using TEAMModelOS.SDK.Context.Configuration;
- using TEAMModelOS.SDK.Helper.Common.JsonHelper;
- using TEAMModelOS.SDK.Helper.Network.HttpHelper;
- using TEAMModelOS.Service.Core.Implements;
- using TEAMModelOS.Service.Evaluation.Interfaces;
- namespace TEAMModelOS.Service.Evaluation.Implements
- {
- public class ImportExerciseService : BaseService, IImportExerciseService
- {
- private static string SummaryTag = "【题文】";
- private static string AnswerTag = "【答案】";
- private static string AnalysisTag = "【解析】";
- private static string EndedTag = "【结束】";
- private static string Options = "ABCDEFGHIJ";
- private static string CompleteStart = "【";
- private static string CompleteEnd = "】";
- public Task<List<ExerciseDto>> AnalyzeWord(Dictionary<string, object> dict) {
- string Word2html= BaseConfigModel.Configuration["HaBookAuth:Word2html"];
- String url = "https://teammodelstorage.blob.core.chinacloudapi.cn/teammodelcontest/20190517/%E6%A8%A1%E6%9D%BF%E6%A0%B7%E4%BE%8B.doc";
- Dictionary<string, string> TestType = new Dictionary<string, string> {
- { "Single", "单选题|多选题" }, { "Multiple", "多选题|判断题" },
- { "Judge", "判断题|填空题" }, { "Complete", "填空题|主观题" },
- { "Subjective", "主观题|【完结】" } };
- Dictionary<string, string> TestInType = new Dictionary<string, string>();
- string html = HttpHelper.HttpGet("http://127.0.0.1:8761/word2html/api/convert?docUrl=");
- html = html.Replace("\t", " ").Replace("<span>", "").Replace("</span>", "");
- //去除class
- string classpattern = "class=\"([^\"]*)\"";
- html = Regex.Replace(html, classpattern, "");
- string pattern = "<span([^>]{0,})>";
- html = Regex.Replace(html, pattern, "");
- foreach (string key in TestType.Keys)
- {
- string[] tags = TestType[key].Split("|");
- string RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1];
- Match mt = Regex.Match(html, RegexStr);
- TestInType.Add(key, mt.Value);
- }
- List<ExerciseDto> tests = new List<ExerciseDto>();
- ///解析几种题型的题目
- foreach (string key in TestInType.Keys)
- {
- switch (key)
- {
- case "Single":
- tests.AddRange(SingleConvert(key, TestInType[key])); break;
- case "Multiple":
- tests.AddRange(MultipleConvert(key, TestInType[key])); break;
- case "Judge":
- tests.AddRange(JudgeConvert(key, TestInType[key])); break;
- case "Complete":
- tests.AddRange(CompleteConvert(key, TestInType[key])); break;
- case "Subjective":
- tests.AddRange(SubjectiveConvert(key, TestInType[key])); break;
- default: break;
- }
- }
- ///用MessagePack序列化 用自带的序列化 有编号 编号随机 不利于sha1编码解析重复的题目
- string sa = tests.ToJson();
- return null;
- }
- public static List<ExerciseDto> SingleConvert(string TypeKey, string testHtml)
- {
- List<ExerciseDto> testInfos = OptionProcess(TypeKey, testHtml);
- return testInfos;
- }
- public static List<ExerciseDto> MultipleConvert(string TypeKey, string testHtml)
- {
- List<ExerciseDto> testInfos = OptionProcess(TypeKey, testHtml);
- return testInfos;
- }
- public static List<ExerciseDto> JudgeConvert(string TypeKey, string testHtml)
- {
- List<ExerciseDto> testInfos = OptionProcess(TypeKey, testHtml);
- return testInfos;
- }
- public static List<ExerciseDto> CompleteConvert(string TypeKey, string testHtml)
- {
- return CompleteProcess(TypeKey, testHtml);
- }
- public static List<ExerciseDto> SubjectiveConvert(string TypeKey, string testHtml)
- {
- List<string> tests = ConvertTest(testHtml);
- List<ExerciseDto> testInfos = ConvertTestInfo(tests, TypeKey);
- foreach (ExerciseDto testInfo in testInfos)
- {
- testInfo.Question = testInfo.Question.Replace(AnalysisTag, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
- testInfo.Question = DoUselessTag(testInfo.Question);
- for (int i = 0; i < testInfo.Answer.Count; i++)
- {
- testInfo.Answer[i] = testInfo.Answer[i].Replace(AnswerTag, "").Replace(AnalysisTag, "");
- testInfo.Answer[i] = DoUselessTag(testInfo.Answer[i]);
- }
- testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
- testInfo.Explain = DoUselessTag(testInfo.Explain);
- }
- return testInfos;
- }
- public static string DoUselessTag(string str)
- {
- if (str.StartsWith("</p>"))
- {
- str = str.Substring(4);
- }
- if (str.EndsWith("<p>"))
- {
- str = str.Substring(0, str.Length - 1 - 6);
- }
- if (str.EndsWith("<p >"))
- {
- str = str.Substring(0, str.Length - 1 - 7);
- }
- if (str.StartsWith("<p >") && !str.Contains("</p>"))
- {
- str = str.Replace("<p >", "");
- }
- if (str.StartsWith("<p>") && !str.Contains("</p>"))
- {
- str = str.Replace("<p>", "");
- }
- return str;
- }
- public static List<ExerciseDto> CompleteProcess(string TypeKey, string testHtml)
- {
- List<string> tests = ConvertTest(testHtml);
- List<ExerciseDto> testInfos = ConvertTestInfo(tests, TypeKey);
- foreach (ExerciseDto testInfo in testInfos)
- {
- List<string> ans = new List<string>();
- testInfo.Question = testInfo.Question.Replace(AnalysisTag, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
- string regRex = CompleteStart+"([\\s\\S]*?)"+CompleteEnd;
- List<ReplaceDto> replaces = new List<ReplaceDto>();
- var m = Regex.Match(testInfo.Question, regRex);
- int index = 1;
- while (m.Success)
- {
- string an = m.Groups[1].ToString();
- string nbsp = "";
- for (int i = 0; i < an.Length * 2; i++)
- {
- nbsp += " ";
- }
- ReplaceDto replaceDto = new ReplaceDto { oldstr = "【" + an + "】", newstr = "<underline data=\"" + index + "\"><u>" + nbsp + "</u></underline>" };
- replaces.Add(replaceDto);
- ans.Add(an);
- m = m.NextMatch();
- index++;
- }
- //消除答案
- foreach (ReplaceDto replace in replaces)
- {
- testInfo.Question = testInfo.Question.Replace(replace.oldstr, replace.newstr);
- }
- //处理解析
- testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
- testInfo.Explain = DoUselessTag(testInfo.Explain);
- testInfo.Answer.AddRange(ans);
- }
- return testInfos;
- }
- /// <summary>
- /// 选择题处理
- /// </summary>
- /// <param name="TypeKey"></param>
- /// <param name="testHtml"></param>
- /// <returns></returns>
- public static List<ExerciseDto> OptionProcess(string TypeKey, string testHtml)
- {
- //处理 \t
- List<string> tests = ConvertTest(testHtml);
- string[] optionsKeys = Options.Select(s => s.ToString()).ToArray();
- List<ExerciseDto> testInfos = ConvertTestInfo(tests, TypeKey);
- foreach (ExerciseDto testInfo in testInfos)
- {
- string optsRgex = optionsKeys[0] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + AnswerTag;
- string optsHtml = Regex.Match(testInfo.Question, optsRgex).Value;
- //HtmlDocument doc = new HtmlDocument();
- //doc.LoadHtml(optsHtml);
- //optsHtml = doc.DocumentNode.InnerText;
- //处理选项
- for (int i = 0; i < optionsKeys.Length - 1; i++)
- {
- string optRgex = optionsKeys[i] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + optionsKeys[i + 1] + "(\\.|\\.|\\、|\\:|\\:)";
- string optHtml = Regex.Match(optsHtml, optRgex).Value;
- if (!string.IsNullOrEmpty(optHtml))
- {
- optHtml = optHtml.Substring(2, optHtml.Length - 4);
- optHtml = DoUselessTag(optHtml);
- testInfo.Option.Add(new CodeValue { Code = optionsKeys[i], Value = optHtml });
- //testInfo.Option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] },{ "value", optHtml } });
- //testInfo.Option.TryAdd(optionsKeys[i], optHtml);
- }
- else
- {
- optRgex = optionsKeys[i] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + AnswerTag;
- optHtml = Regex.Match(optsHtml, optRgex).Value;
- if (!string.IsNullOrEmpty(optHtml))
- {
- optHtml = optHtml.Substring(2, optHtml.Length - 6);
- optHtml = DoUselessTag(optHtml);
- testInfo.Option.Add(new CodeValue { Code = optionsKeys[i], Value = optHtml });
- //testInfo.Option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] }, { "value", optHtml } });
- //testInfo.Option.TryAdd(optionsKeys[i], optHtml);
- }
- }
- }
- //处理题干
- testInfo.Question = testInfo.Question.Replace(optsHtml, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
- testInfo.Question = DoUselessTag(testInfo.Question);
- List<string> answers = testInfo.Answer;
- HashSet<string> ans = new HashSet<string>();
- for (int i = 0; i < answers.Count; i++)
- {
- string Answer = answers[i].Replace(AnswerTag, "").Replace(AnalysisTag, "").TrimStart().TrimEnd();
- Answer.Select(s => s.ToString()).ToList().ForEach(x => {
- ans.Add(x);
- });
- }
- testInfo.Answer = ans.ToList();
- //处理解析
- testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
- testInfo.Explain = DoUselessTag(testInfo.Explain);
- }
- return testInfos;
- }
- public static List<ExerciseDto> ConvertTestInfo(List<string> tests, string TypeKey)
- {
- List<ExerciseDto> testInfos = new List<ExerciseDto>();
- foreach (string html in tests)
- {
- Dictionary<string, string> regex = new Dictionary<string, string>();
- Dictionary<string, string> question = new Dictionary<string, string> { { "Summary", SummaryTag + "|" + AnswerTag }, { "Answer", AnswerTag + "|" + AnalysisTag }, { "Analysis", AnalysisTag + "|" + EndedTag } };
- Dictionary<string, string> compquestion = new Dictionary<string, string> { { "Summary", SummaryTag + "|" + AnalysisTag }, { "Analysis", AnalysisTag + "|" + EndedTag } };
- ExerciseDto test = new ExerciseDto();
- test.Type = TypeKey;
- List<string> keys = new List<string>();
- if (TypeKey.Equals("Complete"))
- {
- keys = compquestion.Keys.ToList();
- regex = compquestion;
- }
- else
- {
- keys = question.Keys.ToList();
- regex = question;
- }
- foreach (string key in keys)
- {
- string[] tags = regex[key].Split("|");
- string RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1];
- Match mt = Regex.Match(html, RegexStr);
- switch (key)
- {
- case "Summary":
- test.Question = mt.Value; break;
- case "Answer":
- string Answer = mt.Value;
- ///单选或多选,判断答案 脱html标签
- if (TypeKey.Equals("Single") || TypeKey.Equals("Multiple") || TypeKey.Equals("Judge"))
- {
- HtmlDocument doc = new HtmlDocument();
- doc.LoadHtml(mt.Value);
- Answer = doc.DocumentNode.InnerText;
- }
- test.Answer = new List<string>() { Answer }; break;
- case "Analysis":
- test.Explain = mt.Value; break;
- default: break;
- }
- }
- testInfos.Add(test);
- }
- return testInfos;
- }
- public static List<string> ConvertTest(string testHtml)
- {
- string start = SummaryTag;
- string end = EndedTag;
- List<string> tests = new List<string>();
- while (testHtml.IndexOf(start) > 0)
- {
- int indexStart = testHtml.IndexOf(start);
- int indexEnd = testHtml.IndexOf(end);
- string test = testHtml.Substring(indexStart, indexEnd - indexStart + start.Length);
- tests.Add(test);
- testHtml = testHtml.Substring(indexEnd + end.Length);
- }
- return tests;
- }
- }
- class ReplaceDto
- {
- public string oldstr { get; set; }
- public string newstr { get; set; }
- }
- }
|