123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360 |
- using DocumentFormat.OpenXml.Packaging;
- using HtmlAgilityPack;
- using Microsoft.AspNetCore.Http;
- using OpenXmlPowerTools;
- using System;
- using System.Collections.Generic;
- using System.Drawing.Imaging;
- using System.IO;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- using System.Threading.Tasks;
- using System.Xml.Linq;
- using TEAMModelOS.Model.Evaluation.Dtos.Own;
- using TEAMModelOS.SDK.Context.Configuration;
- using TEAMModelOS.SDK.Context.Constant;
- using TEAMModelOS.SDK.Extension.SnowFlake;
- using TEAMModelOS.SDK.Helper.Common.CollectionHelper;
- using TEAMModelOS.SDK.Helper.Common.FileHelper;
- using TEAMModelOS.SDK.Helper.Common.StringHelper;
- using TEAMModelOS.SDK.Helper.Security.ShaHash;
- using TEAMModelOS.SDK.Module.AzureBlob.Container;
- using TEAMModelOS.SDK.Module.AzureBlob.Interfaces;
- using TEAMModelOS.SDK.Module.AzureTable.Interfaces;
- using TEAMModelOS.Service.Models.Core;
- using TEAMModelOS.Service.Models.Evaluation.Dtos.Own;
- using TEAMModelOS.Service.Services.Evaluation.Interfaces;
- namespace TEAMModelOS.Service.Services.Evaluation.Implements
- {
- public class HtmlAnalyzeService : IHtmlAnalyzeService
- {
- private static string SummaryTag = "【题文】";
- private static string AnswerTag = "【答案】";
- private static string AnalysisTag = "【解析】";
- private static string EndedTag = "【结束】";
- private static string Options = "ABCDEFGHIJ";
- private static string CompleteStart = "【";
- private static string CompleteEnd = "】";
- private static string ComposeStart = "【综合题】";
- private static string ComposeEnd = "【综合题-题干】";
- private static string ComposeTag = "【综合题-";
- private static Dictionary<string, string> TestType = new Dictionary<string, string> {
- { "Single", "【单选题】|【结束】" }, { "Multiple", "【多选题】|【结束】" },
- { "Judge", "【判断题】|【结束】" }, { "Complete", "【填空题】|【结束】" },
- { "Subjective", "【问答题】|【结束】" } , { "Compose", "【综合题】|【完结】" }};
- public List<ExerciseDto> AnalyzeWordAsync(string html, string Lang)
- {
- //去除class 以及span标签"
- string classpattern = "class=\"([^\"]*)\"";
- html = Regex.Replace(html, classpattern, "");
- string pattern = "<span([^>]{0,})>";
- html = Regex.Replace(html, pattern, "");
- html = html.Replace("\t", " ").Replace("<span>", "").Replace("</span>", "").Replace("dir=\"ltr\"", "");
- Dictionary<string, List<string>> TestInType = ConvertTest(html);
- List<ExerciseDto> tests = new List<ExerciseDto>();
- foreach (string key in TestInType.Keys)
- {
- switch (key)
- {
- case "Single":
- List<ExerciseDto> exercisesSingle = SingleConvert(key, TestInType[key]);
- exercisesSingle.ForEach(x => { x.PShaCode = x.ShaCode; });
- tests.AddRange(exercisesSingle); break;
- case "Multiple":
- List<ExerciseDto> exercisesMultiple = MultipleConvert(key, TestInType[key]);
- exercisesMultiple.ForEach(x => { x.PShaCode = x.ShaCode; });
- tests.AddRange(exercisesMultiple); break;
- case "Judge":
- List<ExerciseDto> exercisesJudge = JudgeConvert(key, TestInType[key]);
- exercisesJudge.ForEach(x => { x.PShaCode = x.ShaCode; });
- tests.AddRange(exercisesJudge); break;
- case "Complete":
- List<ExerciseDto> exercisesComplete = CompleteConvert(key, TestInType[key]);
- exercisesComplete.ForEach(x => { x.PShaCode = x.ShaCode; });
- tests.AddRange(exercisesComplete); break;
- case "Subjective":
- List<ExerciseDto> exercisesSubjective = SubjectiveConvert(key, TestInType[key]);
- exercisesSubjective.ForEach(x => { x.PShaCode = x.ShaCode; });
- tests.AddRange(exercisesSubjective); break;
- case "Compose":
- List<ExerciseDto> exercisesCompose = ComposeConvert(key, TestInType[key], Lang);
- exercisesCompose.ForEach(x => { x.PShaCode = x.ShaCode; });
- tests.AddRange(exercisesCompose);
- break;
- default: break;
- }
- }
- return tests;
- }
- private List<ExerciseDto> SingleConvert(string TypeKey, List<string> list)
- {
- List<ExerciseDto> testInfos = OptionProcess(TypeKey, list);
- return testInfos;
- }
- private List<ExerciseDto> MultipleConvert(string TypeKey, List<string> list)
- {
- List<ExerciseDto> testInfos = OptionProcess(TypeKey, list);
- return testInfos;
- }
- private List<ExerciseDto> JudgeConvert(string TypeKey, List<string> list)
- {
- List<ExerciseDto> testInfos = OptionProcess(TypeKey, list);
- return testInfos;
- }
- private List<ExerciseDto> CompleteConvert(string TypeKey, List<string> list)
- {
- List<ExerciseDto> testInfos = CompleteProcess(TypeKey, list);
- return testInfos;
- }
- private List<ExerciseDto> CompleteProcess(string TypeKey, List<string> tests)
- {
- //List<string> tests = ConvertTest(testHtml);
- List<ExerciseDto> testInfos = ConvertTestInfo(tests, TypeKey);
- HtmlDocument doc = new HtmlDocument();
- foreach (ExerciseDto testInfo in testInfos)
- {
- List<string> ans = new List<string>();
- testInfo.Question = testInfo.Question.Replace(AnalysisTag, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
- string regRex = CompleteStart + "([\\s\\S]*?)" + CompleteEnd;
- List<ReplaceDto> replaces = new List<ReplaceDto>();
- var m = Regex.Match(testInfo.Question, regRex);
- int index = 1;
- while (m.Success)
- {
- string an = m.Groups[1].ToString();
- doc.LoadHtml(an);
- string anstr = doc.DocumentNode.InnerText;
- string nbsp = "";
- int length = System.Text.Encoding.Default.GetBytes(anstr).Length;
- for (int i = 0; i < length * 3; i++)
- {
- nbsp += " ";
- }
- ReplaceDto replaceDto = new ReplaceDto { oldstr = CompleteStart + an + CompleteEnd, newstr = "<underline data=\"" + index + "\"><u>" + nbsp + "</u></underline>" };
- replaces.Add(replaceDto);
- ans.Add(an);
- m = m.NextMatch();
- index++;
- }
- string textImg = testInfo.Question;
- //消除答案
- foreach (ReplaceDto replace in replaces)
- {
- testInfo.Question = testInfo.Question.Replace(replace.oldstr, replace.newstr);
- testInfo.Question = HtmlHelper.DoUselessTag(testInfo.Question);
- //只要题干文字和图片
- //不加underline标记
- textImg = testInfo.Question.Replace(replace.oldstr, "");
- }
- textImg = HtmlHelper.DoTextImg(textImg);
- testInfo.ShaCode = ShaHashHelper.GetSHA1(textImg);
- //处理解析
- testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
- testInfo.Explain = HtmlHelper.DoUselessTag(testInfo.Explain);
- testInfo.Answer.AddRange(ans);
- }
- return testInfos;
- }
- private List<ExerciseDto> OptionProcess(string typeKey, List<string> list)
- {
- string[] optionsKeys = Options.Select(s => s.ToString()).ToArray();
- List<ExerciseDto> testInfos = ConvertTestInfo(list, typeKey);
- foreach (ExerciseDto testInfo in testInfos)
- {
- string optsRgex = optionsKeys[0] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + AnswerTag;
- string optsHtml = Regex.Match(testInfo.Question, optsRgex).Value;
- //HtmlDocument doc = new HtmlDocument();
- //doc.LoadHtml(optsHtml);
- //optsHtml = doc.DocumentNode.InnerText;
- //处理选项
- StringBuilder textImg = new StringBuilder();
- for (int i = 0; i < optionsKeys.Length - 1; i++)
- {
- string optRgex = optionsKeys[i] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + optionsKeys[i + 1] + "(\\.|\\.|\\、|\\:|\\:)";
- string optHtml = Regex.Match(optsHtml, optRgex).Value;
- if (!string.IsNullOrEmpty(optHtml))
- {
- optHtml = optHtml.Substring(2, optHtml.Length - 4);
- optHtml = HtmlHelper.DoUselessTag(optHtml);
- textImg.Append(HtmlHelper.DoTextImg(optHtml));
- testInfo.Option.Add(new CodeValue { Code = optionsKeys[i], Value = optHtml });
- //testInfo.Option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] },{ "value", optHtml } });
- //testInfo.Option.TryAdd(optionsKeys[i], optHtml);
- }
- else
- {
- optRgex = optionsKeys[i] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + AnswerTag;
- optHtml = Regex.Match(optsHtml, optRgex).Value;
- if (!string.IsNullOrEmpty(optHtml))
- {
- optHtml = optHtml.Substring(2, optHtml.Length - 6);
- optHtml = HtmlHelper.DoUselessTag(optHtml);
- textImg.Append(HtmlHelper.DoTextImg(optHtml));
- testInfo.Option.Add(new CodeValue { Code = optionsKeys[i], Value = optHtml });
- //testInfo.Option.Add(new Dictionary<string, string> { { "code", optionsKeys[i] }, { "value", optHtml } });
- //testInfo.Option.TryAdd(optionsKeys[i], optHtml);
- }
- }
- }
- //处理题干
- testInfo.Question = testInfo.Question.Replace(optsHtml, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
- testInfo.Question = HtmlHelper.DoUselessTag(testInfo.Question);
- textImg.Append(HtmlHelper.DoTextImg(testInfo.Question));
- testInfo.ShaCode = ShaHashHelper.GetSHA1(textImg.ToString());
- List<string> answers = testInfo.Answer;
- HashSet<string> ans = new HashSet<string>();
- //处理答案
- for (int i = 0; i < answers.Count; i++)
- {
- string Answer = answers[i].Replace(AnswerTag, "").Replace(AnalysisTag, "").TrimStart().TrimEnd();
- Answer.Select(s => s.ToString()).ToList().ForEach(x =>
- {
- ans.Add(x);
- });
- }
- testInfo.Answer = ans.ToList();
- //处理解析
- testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
- testInfo.Explain = HtmlHelper.DoUselessTag(testInfo.Explain);
- }
- return testInfos;
- }
- private List<ExerciseDto> SubjectiveConvert(string TypeKey, List<string> tests)
- {
- // List<string> tests = ConvertTest(testHtml);
- List<ExerciseDto> testInfos = ConvertTestInfo(tests, TypeKey);
- foreach (ExerciseDto testInfo in testInfos)
- {
- testInfo.Question = testInfo.Question.Replace(AnalysisTag, "").Replace(SummaryTag, "").Replace(AnswerTag, "");
- testInfo.Question = HtmlHelper.DoUselessTag(testInfo.Question);
- StringBuilder textImg = new StringBuilder(HtmlHelper.DoTextImg(testInfo.Question));
- testInfo.ShaCode = ShaHashHelper.GetSHA1(textImg.ToString());
- for (int i = 0; i < testInfo.Answer.Count; i++)
- {
- testInfo.Answer[i] = testInfo.Answer[i].Replace(AnswerTag, "").Replace(AnalysisTag, "");
- testInfo.Answer[i] = HtmlHelper.DoUselessTag(testInfo.Answer[i]);
- }
- testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, "");
- testInfo.Explain = HtmlHelper.DoUselessTag(testInfo.Explain);
- }
- return testInfos;
- }
- private List<ExerciseDto> ComposeConvert(string TypeKey, List<string> list, string Lang)
- {
- List<ExerciseDto> exerciseDtos = new List<ExerciseDto>();
- foreach (string html in list)
- {
- ExerciseDto exercise = new ExerciseDto() { Type = TypeKey };
- string RegexStr = ComposeStart + "([\\s\\S]*?)" + ComposeEnd;
- Match mt = Regex.Match(html, RegexStr);
- exercise.Question = HtmlHelper.DoUselessTag(mt.Value.Replace(ComposeStart, "").Replace(ComposeEnd, ""));
- string testinfo = Regex.Replace(html, RegexStr, "").Replace(ComposeTag, CompleteStart);
- //获取综合题的材料加每个小题的sha1Code
- string testQs = HtmlHelper.DoTextImg(exercise.Question);
- List<ExerciseDto> dtos = AnalyzeWordAsync(testinfo, Lang);
- if (dtos.IsNotEmpty())
- {
- dtos.ForEach(x => { testQs = testQs + x.ShaCode; });
- exercise.ShaCode = ShaHashHelper.GetSHA1(testQs);
- dtos.ForEach(x => { x.PShaCode = exercise.ShaCode; });
- exercise.Children.AddRange(dtos);
- }
- exerciseDtos.Add(exercise);
- }
- return exerciseDtos;
- }
- public static List<ExerciseDto> ConvertTestInfo(List<string> tests, string TypeKey)
- {
- List<ExerciseDto> testInfos = new List<ExerciseDto>();
- foreach (string html in tests)
- {
- Dictionary<string, string> regex = new Dictionary<string, string>();
- Dictionary<string, string> question = new Dictionary<string, string> { { "Summary", TestType[TypeKey].Split("|")[0] + "|" + AnswerTag }, { "Answer", AnswerTag + "|" + AnalysisTag }, { "Analysis", AnalysisTag + "|" + EndedTag } };
- Dictionary<string, string> compquestion = new Dictionary<string, string> { { "Summary", TestType[TypeKey].Split("|")[0] + "|" + AnalysisTag }, { "Analysis", AnalysisTag + "|" + EndedTag } };
- ExerciseDto test = new ExerciseDto();
- test.Type = TypeKey;
- List<string> keys = new List<string>();
- if (TypeKey.Equals("Complete"))
- {
- keys = compquestion.Keys.ToList();
- regex = compquestion;
- }
- else
- {
- keys = question.Keys.ToList();
- regex = question;
- }
- foreach (string key in keys)
- {
- string[] tags = regex[key].Split("|");
- string RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1];
- Match mt = Regex.Match(html, RegexStr);
- switch (key)
- {
- case "Summary":
- test.Question = mt.Value.Replace(TestType[TypeKey].Split("|")[0], ""); break;
- case "Answer":
- string Answer = mt.Value;
- ///单选或多选,判断答案 脱html标签
- if (TypeKey.Equals("Single") || TypeKey.Equals("Multiple") || TypeKey.Equals("Judge"))
- {
- HtmlDocument doc = new HtmlDocument();
- doc.LoadHtml(mt.Value);
- Answer = doc.DocumentNode.InnerText;
- }
- test.Answer = new List<string>() { Answer }; break;
- case "Analysis":
- test.Explain = mt.Value; break;
- default: break;
- }
- }
- testInfos.Add(test);
- }
- return testInfos;
- }
- /// <summary>
- /// 解析题型
- /// </summary>
- /// <param name="testHtml"></param>
- /// <returns></returns>
- public static Dictionary<string, List<string>> ConvertTest(string testHtml)
- {
- string start = SummaryTag;
- string end = EndedTag;
- Dictionary<string, List<string>> TestInType = new Dictionary<string, List<string>>();
- foreach (string key in TestType.Keys)
- {
- string[] tags = TestType[key].Split("|");
- string regRex = tags[0] + "([\\s\\S]*?)" + tags[1];
- var m = Regex.Match(testHtml, regRex);
- //int index = 1;
- List<string> tests = new List<string>();
- while (m.Success)
- {
- string testInfo = tags[0] + m.Groups[1].ToString() + tags[1];
- tests.Add(testInfo);
- m = m.NextMatch();
- }
- TestInType.Add(key, tests);
- }
- return TestInType;
- }
- }
- class ReplaceDto
- {
- public string oldstr { get; set; }
- public string newstr { get; set; }
- }
- }
|