|
@@ -0,0 +1,360 @@
|
|
|
+using DocumentFormat.OpenXml.Packaging;
|
|
|
+using HtmlAgilityPack;
|
|
|
+using Microsoft.AspNetCore.Http;
|
|
|
+using OpenXmlPowerTools;
|
|
|
+using System;
|
|
|
+using System.Collections.Generic;
|
|
|
+using System.Drawing.Imaging;
|
|
|
+using System.IO;
|
|
|
+using System.Linq;
|
|
|
+using System.Text;
|
|
|
+using System.Text.RegularExpressions;
|
|
|
+using System.Threading.Tasks;
|
|
|
+using System.Xml.Linq;
|
|
|
+using TEAMModelOS.Model.Evaluation.Dtos.Own;
|
|
|
+using TEAMModelOS.SDK.Context.Configuration;
|
|
|
+using TEAMModelOS.SDK.Context.Constant;
|
|
|
+using TEAMModelOS.SDK.Extension.SnowFlake;
|
|
|
+using TEAMModelOS.SDK.Helper.Common.CollectionHelper;
|
|
|
+using TEAMModelOS.SDK.Helper.Common.FileHelper;
|
|
|
+using TEAMModelOS.SDK.Helper.Common.StringHelper;
|
|
|
+using TEAMModelOS.SDK.Helper.Security.ShaHash;
|
|
|
+using TEAMModelOS.SDK.Module.AzureBlob.Container;
|
|
|
+using TEAMModelOS.SDK.Module.AzureBlob.Interfaces;
|
|
|
+using TEAMModelOS.SDK.Module.AzureTable.Interfaces;
|
|
|
+using TEAMModelOS.Service.Models.Core;
|
|
|
+using TEAMModelOS.Service.Models.Evaluation.Dtos.Own;
|
|
|
+using TEAMModelOS.Service.Services.Evaluation.Interfaces;
|
|
|
+namespace TEAMModelOS.Service.Services.Evaluation.Implements
|
|
|
+{
|
|
|
+ public class HtmlAnalyzeService : IHtmlAnalyzeService
|
|
|
+ {
|
|
|
+ private static string SummaryTag = "【题文】";
|
|
|
+ private static string AnswerTag = "【答案】";
|
|
|
+ private static string AnalysisTag = "【解析】";
|
|
|
+ private static string EndedTag = "【结束】";
|
|
|
+ private static string Options = "ABCDEFGHIJ";
|
|
|
+ private static string CompleteStart = "【";
|
|
|
+ private static string CompleteEnd = "】";
|
|
|
+ private static string ComposeStart = "【综合题】";
|
|
|
+ private static string ComposeEnd = "【综合题-题干】";
|
|
|
+ private static string ComposeTag = "【综合题-";
|
|
|
+ private static Dictionary<string, string> TestType = new Dictionary<string, string> {
|
|
|
+ { "Single", "【单选题】|【结束】" }, { "Multiple", "【多选题】|【结束】" },
|
|
|
+ { "Judge", "【判断题】|【结束】" }, { "Complete", "【填空题】|【结束】" },
|
|
|
+ { "Subjective", "【问答题】|【结束】" } , { "Compose", "【综合题】|【完结】" }};
|
|
|
+ public List<ExerciseDto> AnalyzeWordAsync(string html, string Lang)
|
|
|
+ {
|
|
|
+ //去除class 以及span标签"
|
|
|
+ string classpattern = "class=\"([^\"]*)\"";
|
|
|
+ html = Regex.Replace(html, classpattern, "");
|
|
|
+ string pattern = "<span([^>]{0,})>";
|
|
|
+ html = Regex.Replace(html, pattern, "");
|
|
|
+ html = html.Replace("\t", " ").Replace("<span>", "").Replace("</span>", "").Replace("dir=\"ltr\"", "");
|
|
|
+ Dictionary<string, List<string>> TestInType = ConvertTest(html);
|
|
|
+ List<ExerciseDto> tests = new List<ExerciseDto>();
|
|
|
+ foreach (string key in TestInType.Keys)
|
|
|
+ {
|
|
|
+ switch (key)
|
|
|
+ {
|
|
|
+ case "Single":
|
|
|
+ List<ExerciseDto> exercisesSingle = SingleConvert(key, TestInType[key]);
|
|
|
+ exercisesSingle.ForEach(x => { x.PShaCode = x.ShaCode; });
|
|
|
+ tests.AddRange(exercisesSingle); break;
|
|
|
+ case "Multiple":
|
|
|
+ List<ExerciseDto> exercisesMultiple = MultipleConvert(key, TestInType[key]);
|
|
|
+ exercisesMultiple.ForEach(x => { x.PShaCode = x.ShaCode; });
|
|
|
+ tests.AddRange(exercisesMultiple); break;
|
|
|
+ case "Judge":
|
|
|
+ List<ExerciseDto> exercisesJudge = JudgeConvert(key, TestInType[key]);
|
|
|
+ exercisesJudge.ForEach(x => { x.PShaCode = x.ShaCode; });
|
|
|
+ tests.AddRange(exercisesJudge); break;
|
|
|
+ case "Complete":
|
|
|
+ List<ExerciseDto> exercisesComplete = CompleteConvert(key, TestInType[key]);
|
|
|
+ exercisesComplete.ForEach(x => { x.PShaCode = x.ShaCode; });
|
|
|
+ tests.AddRange(exercisesComplete); break;
|
|
|
+ case "Subjective":
|
|
|
+ List<ExerciseDto> exercisesSubjective = SubjectiveConvert(key, TestInType[key]);
|
|
|
+ exercisesSubjective.ForEach(x => { x.PShaCode = x.ShaCode; });
|
|
|
+ tests.AddRange(exercisesSubjective); break;
|
|
|
+ case "Compose":
|
|
|
+ List<ExerciseDto> exercisesCompose = ComposeConvert(key, TestInType[key], Lang);
|
|
|
+ exercisesCompose.ForEach(x => { x.PShaCode = x.ShaCode; });
|
|
|
+ tests.AddRange(exercisesCompose);
|
|
|
+ break;
|
|
|
+ default: break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return tests;
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<ExerciseDto> SingleConvert(string TypeKey, List<string> list)
|
|
|
+ {
|
|
|
+ List<ExerciseDto> testInfos = OptionProcess(TypeKey, list);
|
|
|
+ return testInfos;
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<ExerciseDto> MultipleConvert(string TypeKey, List<string> list)
|
|
|
+ {
|
|
|
+ List<ExerciseDto> testInfos = OptionProcess(TypeKey, list);
|
|
|
+ return testInfos;
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<ExerciseDto> JudgeConvert(string TypeKey, List<string> list)
|
|
|
+ {
|
|
|
+ List<ExerciseDto> testInfos = OptionProcess(TypeKey, list);
|
|
|
+ return testInfos;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ private List<ExerciseDto> CompleteConvert(string TypeKey, List<string> list)
|
|
|
+ {
|
|
|
+ List<ExerciseDto> testInfos = CompleteProcess(TypeKey, list);
|
|
|
+ return testInfos;
|
|
|
+ }
|
|
|
+ private List<ExerciseDto> CompleteProcess(string TypeKey, List<string> tests)
|
|
|
+ {
|
|
|
+ //List<string> tests = ConvertTest(testHtml);
|
|
|
+ List<ExerciseDto> testInfos = ConvertTestInfo(tests, TypeKey);
|
|
|
+ HtmlDocument doc = new HtmlDocument();
|
|
|
+ foreach (ExerciseDto testInfo in testInfos)
|
|
|
+ {
|
|
|
+ List<string> ans = new List<string>();
|
|
|
+ testInfo.Question = testInfo.Question.Replace(AnalysisTag, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
|
|
|
+ string regRex = CompleteStart + "([\\s\\S]*?)" + CompleteEnd;
|
|
|
+ List<ReplaceDto> replaces = new List<ReplaceDto>();
|
|
|
+ var m = Regex.Match(testInfo.Question, regRex);
|
|
|
+ int index = 1;
|
|
|
+ while (m.Success)
|
|
|
+ {
|
|
|
+ string an = m.Groups[1].ToString();
|
|
|
+ doc.LoadHtml(an);
|
|
|
+ string anstr = doc.DocumentNode.InnerText;
|
|
|
+ string nbsp = "";
|
|
|
+ int length = System.Text.Encoding.Default.GetBytes(anstr).Length;
|
|
|
+ for (int i = 0; i < length * 3; i++)
|
|
|
+ {
|
|
|
+ nbsp += " ";
|
|
|
+ }
|
|
|
+ ReplaceDto replaceDto = new ReplaceDto { oldstr = CompleteStart + an + CompleteEnd, newstr = "<underline data=\"" + index + "\"><u>" + nbsp + "</u></underline>" };
|
|
|
+ replaces.Add(replaceDto);
|
|
|
+ ans.Add(an);
|
|
|
+ m = m.NextMatch();
|
|
|
+ index++;
|
|
|
+ }
|
|
|
+ string textImg = testInfo.Question;
|
|
|
+ //消除答案
|
|
|
+ foreach (ReplaceDto replace in replaces)
|
|
|
+ {
|
|
|
+ testInfo.Question = testInfo.Question.Replace(replace.oldstr, replace.newstr);
|
|
|
+ testInfo.Question = HtmlHelper.DoUselessTag(testInfo.Question);
|
|
|
+ //只要题干文字和图片
|
|
|
+ //不加underline标记
|
|
|
+ textImg = testInfo.Question.Replace(replace.oldstr, "");
|
|
|
+
|
|
|
+ }
|
|
|
+ textImg = HtmlHelper.DoTextImg(textImg);
|
|
|
+ testInfo.ShaCode = ShaHashHelper.GetSHA1(textImg);
|
|
|
+ //处理解析
|
|
|
+ testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
|
|
|
+ testInfo.Explain = HtmlHelper.DoUselessTag(testInfo.Explain);
|
|
|
+ testInfo.Answer.AddRange(ans);
|
|
|
+ }
|
|
|
+ return testInfos;
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<ExerciseDto> OptionProcess(string typeKey, List<string> list)
|
|
|
+ {
|
|
|
+ string[] optionsKeys = Options.Select(s => s.ToString()).ToArray();
|
|
|
+ List<ExerciseDto> testInfos = ConvertTestInfo(list, typeKey);
|
|
|
+ foreach (ExerciseDto testInfo in testInfos)
|
|
|
+ {
|
|
|
+ string optsRgex = optionsKeys[0] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + AnswerTag;
|
|
|
+ string optsHtml = Regex.Match(testInfo.Question, optsRgex).Value;
|
|
|
+ //HtmlDocument doc = new HtmlDocument();
|
|
|
+ //doc.LoadHtml(optsHtml);
|
|
|
+ //optsHtml = doc.DocumentNode.InnerText;
|
|
|
+ //处理选项
|
|
|
+ StringBuilder textImg = new StringBuilder();
|
|
|
+ for (int i = 0; i < optionsKeys.Length - 1; i++)
|
|
|
+ {
|
|
|
+ string optRgex = optionsKeys[i] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + optionsKeys[i + 1] + "(\\.|\\.|\\、|\\:|\\:)";
|
|
|
+ string optHtml = Regex.Match(optsHtml, optRgex).Value;
|
|
|
+ if (!string.IsNullOrEmpty(optHtml))
|
|
|
+ {
|
|
|
+ optHtml = optHtml.Substring(2, optHtml.Length - 4);
|
|
|
+ optHtml = HtmlHelper.DoUselessTag(optHtml);
|
|
|
+ textImg.Append(HtmlHelper.DoTextImg(optHtml));
|
|
|
+ testInfo.Option.Add(new CodeValue { Code = optionsKeys[i], Value = optHtml });
|
|
|
+ //testInfo.Option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] },{ "value", optHtml } });
|
|
|
+ //testInfo.Option.TryAdd(optionsKeys[i], optHtml);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ optRgex = optionsKeys[i] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + AnswerTag;
|
|
|
+ optHtml = Regex.Match(optsHtml, optRgex).Value;
|
|
|
+ if (!string.IsNullOrEmpty(optHtml))
|
|
|
+ {
|
|
|
+ optHtml = optHtml.Substring(2, optHtml.Length - 6);
|
|
|
+ optHtml = HtmlHelper.DoUselessTag(optHtml);
|
|
|
+ textImg.Append(HtmlHelper.DoTextImg(optHtml));
|
|
|
+ testInfo.Option.Add(new CodeValue { Code = optionsKeys[i], Value = optHtml });
|
|
|
+ //testInfo.Option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] }, { "value", optHtml } });
|
|
|
+ //testInfo.Option.TryAdd(optionsKeys[i], optHtml);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //处理题干
|
|
|
+ testInfo.Question = testInfo.Question.Replace(optsHtml, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
|
|
|
+ testInfo.Question = HtmlHelper.DoUselessTag(testInfo.Question);
|
|
|
+ textImg.Append(HtmlHelper.DoTextImg(testInfo.Question));
|
|
|
+ testInfo.ShaCode = ShaHashHelper.GetSHA1(textImg.ToString());
|
|
|
+ List<string> answers = testInfo.Answer;
|
|
|
+ HashSet<string> ans = new HashSet<string>();
|
|
|
+ //处理答案
|
|
|
+ for (int i = 0; i < answers.Count; i++)
|
|
|
+ {
|
|
|
+ string Answer = answers[i].Replace(AnswerTag, "").Replace(AnalysisTag, "").TrimStart().TrimEnd();
|
|
|
+ Answer.Select(s => s.ToString()).ToList().ForEach(x =>
|
|
|
+ {
|
|
|
+ ans.Add(x);
|
|
|
+ });
|
|
|
+ }
|
|
|
+ testInfo.Answer = ans.ToList();
|
|
|
+ //处理解析
|
|
|
+ testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
|
|
|
+ testInfo.Explain = HtmlHelper.DoUselessTag(testInfo.Explain);
|
|
|
+ }
|
|
|
+ return testInfos;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ private List<ExerciseDto> SubjectiveConvert(string TypeKey, List<string> tests)
|
|
|
+ {
|
|
|
+ // List<string> tests = ConvertTest(testHtml);
|
|
|
+ List<ExerciseDto> testInfos = ConvertTestInfo(tests, TypeKey);
|
|
|
+
|
|
|
+ foreach (ExerciseDto testInfo in testInfos)
|
|
|
+ {
|
|
|
+ testInfo.Question = testInfo.Question.Replace(AnalysisTag, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
|
|
|
+ testInfo.Question = HtmlHelper.DoUselessTag(testInfo.Question);
|
|
|
+ StringBuilder textImg = new StringBuilder(HtmlHelper.DoTextImg(testInfo.Question));
|
|
|
+ testInfo.ShaCode = ShaHashHelper.GetSHA1(textImg.ToString());
|
|
|
+ for (int i = 0; i < testInfo.Answer.Count; i++)
|
|
|
+ {
|
|
|
+ testInfo.Answer[i] = testInfo.Answer[i].Replace(AnswerTag, "").Replace(AnalysisTag, "");
|
|
|
+ testInfo.Answer[i] = HtmlHelper.DoUselessTag(testInfo.Answer[i]);
|
|
|
+ }
|
|
|
+ testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
|
|
|
+ testInfo.Explain = HtmlHelper.DoUselessTag(testInfo.Explain);
|
|
|
+ }
|
|
|
+ return testInfos;
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<ExerciseDto> ComposeConvert(string TypeKey, List<string> list, string Lang)
|
|
|
+ {
|
|
|
+ List<ExerciseDto> exerciseDtos = new List<ExerciseDto>();
|
|
|
+ foreach (string html in list)
|
|
|
+ {
|
|
|
+ ExerciseDto exercise = new ExerciseDto() { Type = TypeKey };
|
|
|
+ string RegexStr = ComposeStart + "([\\s\\S]*?)" + ComposeEnd;
|
|
|
+ Match mt = Regex.Match(html, RegexStr);
|
|
|
+ exercise.Question = HtmlHelper.DoUselessTag(mt.Value.Replace(ComposeStart, "").Replace(ComposeEnd, ""));
|
|
|
+ string testinfo = Regex.Replace(html, RegexStr, "").Replace(ComposeTag, CompleteStart);
|
|
|
+ //获取综合题的材料加每个小题的sha1Code
|
|
|
+ string testQs = HtmlHelper.DoTextImg(exercise.Question);
|
|
|
+ List<ExerciseDto> dtos = AnalyzeWordAsync(testinfo, Lang);
|
|
|
+ if (dtos.IsNotEmpty())
|
|
|
+ {
|
|
|
+ dtos.ForEach(x => { testQs = testQs + x.ShaCode; });
|
|
|
+ exercise.ShaCode = ShaHashHelper.GetSHA1(testQs);
|
|
|
+ dtos.ForEach(x => { x.PShaCode = exercise.ShaCode; });
|
|
|
+ exercise.Children.AddRange(dtos);
|
|
|
+ }
|
|
|
+ exerciseDtos.Add(exercise);
|
|
|
+ }
|
|
|
+ return exerciseDtos;
|
|
|
+ }
|
|
|
+ public static List<ExerciseDto> ConvertTestInfo(List<string> tests, string TypeKey)
|
|
|
+ {
|
|
|
+ List<ExerciseDto> testInfos = new List<ExerciseDto>();
|
|
|
+ foreach (string html in tests)
|
|
|
+ {
|
|
|
+ Dictionary<string, string> regex = new Dictionary<string, string>();
|
|
|
+ Dictionary<string, string> question = new Dictionary<string, string> { { "Summary", TestType[TypeKey].Split("|")[0] + "|" + AnswerTag }, { "Answer", AnswerTag + "|" + AnalysisTag }, { "Analysis", AnalysisTag + "|" + EndedTag } };
|
|
|
+ Dictionary<string, string> compquestion = new Dictionary<string, string> { { "Summary", TestType[TypeKey].Split("|")[0] + "|" + AnalysisTag }, { "Analysis", AnalysisTag + "|" + EndedTag } };
|
|
|
+ ExerciseDto test = new ExerciseDto();
|
|
|
+ test.Type = TypeKey;
|
|
|
+ List<string> keys = new List<string>();
|
|
|
+ if (TypeKey.Equals("Complete"))
|
|
|
+ {
|
|
|
+ keys = compquestion.Keys.ToList();
|
|
|
+ regex = compquestion;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ keys = question.Keys.ToList();
|
|
|
+ regex = question;
|
|
|
+ }
|
|
|
+ foreach (string key in keys)
|
|
|
+ {
|
|
|
+ string[] tags = regex[key].Split("|");
|
|
|
+ string RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1];
|
|
|
+ Match mt = Regex.Match(html, RegexStr);
|
|
|
+ switch (key)
|
|
|
+ {
|
|
|
+ case "Summary":
|
|
|
+ test.Question = mt.Value.Replace(TestType[TypeKey].Split("|")[0], ""); break;
|
|
|
+ case "Answer":
|
|
|
+ string Answer = mt.Value;
|
|
|
+ ///单选或多选,判断答案 脱html标签
|
|
|
+ if (TypeKey.Equals("Single") || TypeKey.Equals("Multiple") || TypeKey.Equals("Judge"))
|
|
|
+ {
|
|
|
+ HtmlDocument doc = new HtmlDocument();
|
|
|
+ doc.LoadHtml(mt.Value);
|
|
|
+ Answer = doc.DocumentNode.InnerText;
|
|
|
+ }
|
|
|
+ test.Answer = new List<string>() { Answer }; break;
|
|
|
+ case "Analysis":
|
|
|
+ test.Explain = mt.Value; break;
|
|
|
+ default: break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ testInfos.Add(test);
|
|
|
+ }
|
|
|
+ return testInfos;
|
|
|
+ }
|
|
|
+ /// <summary>
|
|
|
+ /// 解析题型
|
|
|
+ /// </summary>
|
|
|
+ /// <param name="testHtml"></param>
|
|
|
+ /// <returns></returns>
|
|
|
+ public static Dictionary<string, List<string>> ConvertTest(string testHtml)
|
|
|
+ {
|
|
|
+ string start = SummaryTag;
|
|
|
+ string end = EndedTag;
|
|
|
+ Dictionary<string, List<string>> TestInType = new Dictionary<string, List<string>>();
|
|
|
+ foreach (string key in TestType.Keys)
|
|
|
+ {
|
|
|
+ string[] tags = TestType[key].Split("|");
|
|
|
+ string regRex = tags[0] + "([\\s\\S]*?)" + tags[1];
|
|
|
+ var m = Regex.Match(testHtml, regRex);
|
|
|
+ //int index = 1;
|
|
|
+ List<string> tests = new List<string>();
|
|
|
+ while (m.Success)
|
|
|
+ {
|
|
|
+ string testInfo = tags[0] + m.Groups[1].ToString() + tags[1];
|
|
|
+ tests.Add(testInfo);
|
|
|
+ m = m.NextMatch();
|
|
|
+ }
|
|
|
+ TestInType.Add(key, tests);
|
|
|
+ }
|
|
|
+ return TestInType;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ class ReplaceDto
|
|
|
+ {
|
|
|
+ public string oldstr { get; set; }
|
|
|
+ public string newstr { get; set; }
|
|
|
+ }
|
|
|
+}
|