using DocumentFormat.OpenXml.Packaging; using HtmlAgilityPack; using Microsoft.AspNetCore.Http; using OpenXmlPowerTools; using System; using System.Collections.Generic; using System.Drawing.Imaging; using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Xml.Linq; using TEAMModelOS.Model.Core.Dtos; using TEAMModelOS.Model.Core.Models; using TEAMModelOS.Model.Evaluation.Dtos.Own; using TEAMModelOS.Model.Evaluation.Models; using TEAMModelOS.SDK.Context.Configuration; using TEAMModelOS.SDK.Extension.DataResult.JsonRpcRequest; using TEAMModelOS.SDK.Extension.SnowFlake; using TEAMModelOS.SDK.Helper.Common.CollectionHelper; using TEAMModelOS.SDK.Helper.Common.FileHelper; using TEAMModelOS.SDK.Helper.Common.JsonHelper; using TEAMModelOS.SDK.Helper.Common.StringHelper; using TEAMModelOS.SDK.Helper.Network.HttpHelper; using TEAMModelOS.SDK.Helper.Security.ShaHash; using TEAMModelOS.SDK.Module.AzureBlob.Container; using TEAMModelOS.SDK.Module.AzureBlob.Interfaces; using TEAMModelOS.SDK.Module.AzureTable.Interfaces; using TEAMModelOS.Service.Core.Implements; using TEAMModelOS.Service.Evaluation.Interfaces; namespace TEAMModelOS.Service.Evaluation.Implements { public class ImportExerciseService : BaseService, IImportExerciseService { public async Task> UploadWord(IFormFile file) { Dictionary resdict = new Dictionary(); string shaCode = ShaHashHelper.GetSHA1(file.OpenReadStream()); long length = file.Length; Dictionary dict = new Dictionary { { "Sha1Code", shaCode } }; List models = await FindListByDict(dict); if (models.IsNotEmpty()) { resdict.Add("HtmlString", HttpHelper.HttpGet(models[0].BlobUrl)); resdict.Add("Sha1Code", models[0].Sha1Code); return resdict; } string folder = BaseConfigModel.ContentRootPath + "/Upload/" + IdWorker.getInstance().NextId(); System.IO.Directory.CreateDirectory(folder); var filePath = folder +"\\"+ file.FileName; using (var stream = new FileStream(filePath, FileMode.Create)) { await file.CopyToAsync(stream); } var htmlInfo = ConvertDocxToHtml(filePath, folder); AzureBlobModel model = await azureBlobDBRepository.UploadPath(htmlInfo.blobPath); model.Sha1Code = shaCode; await Save(model); FileHelper.DeleteDirAndFiles(BaseConfigModel.ContentRootPath + "/Upload"); resdict.Add("HtmlString", htmlInfo.htmlString); resdict.Add("Sha1Code", shaCode); return resdict; } public static dynamic ConvertDocxToHtml(string filePath ,string folder) { byte[] byteArray = File.ReadAllBytes(filePath); //byte[] bytes = new byte[stream.Length]; using (MemoryStream memoryStream = new MemoryStream()) { memoryStream.Write(byteArray, 0, byteArray.Length); using (WordprocessingDocument doc = WordprocessingDocument.Open(memoryStream, true)) { int imageCounter = 0; WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings() { PageTitle = "My Page Title", AdditionalCss = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }", FabricateCssClasses = true, CssClassPrefix = "pt-", RestrictToSupportedLanguages = false, RestrictToSupportedNumberingFormats = false, ImageHandler = imageInfo => { ++imageCounter; string extension = imageInfo.ContentType.Split('/')[1].ToLower(); ImageFormat imageFormat = null; if (extension == "png") imageFormat = ImageFormat.Png; else if (extension == "gif") imageFormat = ImageFormat.Gif; else if (extension == "bmp") imageFormat = ImageFormat.Bmp; else if (extension == "jpeg") imageFormat = ImageFormat.Jpeg; else if (extension == "tiff") { extension = "gif"; imageFormat = ImageFormat.Gif; } else if (extension == "x-wmf") { extension = "wmf"; imageFormat = ImageFormat.Wmf; } if (imageFormat == null) return null; string base64 = null; try { using (MemoryStream ms = new MemoryStream()) { imageInfo.Bitmap.Save(ms, imageFormat); var ba = ms.ToArray(); base64 = System.Convert.ToBase64String(ba); } } catch (System.Runtime.InteropServices.ExternalException) { return null; } ImageFormat format = imageInfo.Bitmap.RawFormat; ImageCodecInfo codec = ImageCodecInfo.GetImageDecoders() .First(c => c.FormatID == format.Guid); string mimeType = codec.MimeType; string imageSource = string.Format("data:{0};base64,{1}", mimeType, base64); XElement img = new XElement(Xhtml.img, new XAttribute(NoNamespace.src, imageSource), imageInfo.ImgStyleAttribute, imageInfo.AltText != null ? new XAttribute(NoNamespace.alt, imageInfo.AltText) : null); return img; } }; // XElement html = HtmlConverter.ConvertToHtml(doc, settings); // File.WriteAllText(@"E:\document\kk.html", html.ToStringNewLineOnAttributes()); XElement htmlElement = WmlToHtmlConverter.ConvertToHtml(doc, settings); var htmls = new XDocument(new XDocumentType("html", null, null, null), htmlElement); var htmlString = htmls.ToString(SaveOptions.DisableFormatting); //引入MathJax插件 htmlString = htmlString + ""; File.WriteAllText(folder+"/"+"index.html", htmlString); return new { htmlString ,blobPath= folder + "/" + "index.html" }; }; } } private static string SummaryTag = "【题文】"; private static string AnswerTag = "【答案】"; private static string AnalysisTag = "【解析】"; private static string EndedTag = "【结束】"; private static string Options = "ABCDEFGHIJ"; private static string CompleteStart = "【"; private static string CompleteEnd = "】"; Dictionary TestType = new Dictionary { { "Single", "单选题|多选题" }, { "Multiple", "多选题|判断题" }, { "Judge", "判断题|填空题" }, { "Complete", "填空题|主观题" }, { "Subjective", "主观题|【完结】" } }; //private readonly IAzureTableDBRepository azureTableDBRepository; private readonly IHttpContextAccessor httpContextAccessor; private readonly IAzureBlobDBRepository azureBlobDBRepository; public ImportExerciseService(IHttpContextAccessor _httpContextAccessor , IAzureBlobDBRepository _azureBlobDBRepository) { azureBlobDBRepository = _azureBlobDBRepository; httpContextAccessor = _httpContextAccessor; } public async Task> AnalyzeWordAsync(DocInfoDto dict, string Lang) { List contents = await this.FindListByKey("DigestCode", dict.ShaCode); string html; if (contents.IsNotEmpty()) { html = contents[0].Content; } else { string Word2html = BaseConfigModel.Configuration["HaBookAuth:Word2html"]; JosnRPCRequest request = new JosnRPCRequest(); request.@params = dict; string jsondata = MessagePackHelper.ObjectToJson(request); html = await HttpHelper.HttpPostAsync(Word2html, jsondata); ContentVerify content = new ContentVerify { RowKey = dict.ShaCode, PartitionKey = Lang, AlgorithmType = "Sha1", Content = html, DigestCode = dict.ShaCode }; await this.SaveOrUpdate(content); } //String url = "https://teammodelstorage.blob.core.chinacloudapi.cn/teammodelcontest/20190517/%E6%A8%A1%E6%9D%BF%E6%A0%B7%E4%BE%8B.doc"; Dictionary TestInType = new Dictionary(); html = html.Replace("\t", " ").Replace("", "").Replace("", ""); //去除class string classpattern = "class=\"([^\"]*)\""; html = Regex.Replace(html, classpattern, ""); string pattern = "]{0,})>"; html = Regex.Replace(html, pattern, ""); foreach (string key in TestType.Keys) { string[] tags = TestType[key].Split("|"); string RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1]; Match mt = Regex.Match(html, RegexStr); TestInType.Add(key, mt.Value); } List tests = new List(); ///解析几种题型的题目 foreach (string key in TestInType.Keys) { switch (key) { case "Single": List exercisesSingle = SingleConvert(key, TestInType[key]); tests.AddRange(exercisesSingle); break; case "Multiple": List exercisesMultiple = MultipleConvert(key, TestInType[key]); tests.AddRange(exercisesMultiple); break; case "Judge": List exercisesJudge = JudgeConvert(key, TestInType[key]); tests.AddRange(exercisesJudge); break; case "Complete": List exercisesComplete = CompleteConvert(key, TestInType[key]); tests.AddRange(exercisesComplete); break; case "Subjective": List exercisesSubjective = SubjectiveConvert(key, TestInType[key]); tests.AddRange(exercisesSubjective); break; default: break; } } SaveExercise(tests, Lang); return tests; } public async void SaveExercise(List exercises, string Lang) { string tmdid = ""; List ids = HttpContextHelper.GetLoginUser(httpContextAccessor, "id"); if (ids.IsNotEmpty()) { tmdid = ids[0]; } List exerciseVerifies = new List(); exercises.ForEach(x => { ExerciseVerify exercise0 = new ExerciseVerify { TeamModelId = tmdid, RowKey = x.ShaCode, Type = 0, PartitionKey = Lang, Content = x.Question, SummaryCode = x.ShaCode, Status = 1 }; exerciseVerifies.Add(exercise0); if (x.Option.IsNotEmpty()) { string opt = MessagePackHelper.ObjectToJson(x.Option); ExerciseVerify exercise1 = new ExerciseVerify { TeamModelId = tmdid, RowKey = x.ShaCode + "-" + ShaHashHelper.GetSHA1(opt), Type = 1, PartitionKey = Lang, Content = opt, SummaryCode = x.ShaCode, Status = 1 }; exerciseVerifies.Add(exercise1); } if (x.Answer.IsNotEmpty()) { string ans = MessagePackHelper.ObjectToJson(x.Answer); ExerciseVerify exercise2 = new ExerciseVerify { TeamModelId = tmdid, RowKey = x.ShaCode + "-" + ShaHashHelper.GetSHA1(ans), Type = 2, PartitionKey = Lang, Content = ans, SummaryCode = x.ShaCode, Status = 1 }; exerciseVerifies.Add(exercise2); } ExerciseVerify exercise3 = new ExerciseVerify { TeamModelId = tmdid, RowKey = x.ShaCode + "-" + ShaHashHelper.GetSHA1(x.Explain), Type = 3, PartitionKey = Lang, Content = x.Explain, SummaryCode = x.ShaCode, Status = 1 }; exerciseVerifies.Add(exercise3); }); await this.SaveOrUpdateAll(exerciseVerifies); } public static List SingleConvert(string TypeKey, string testHtml) { List testInfos = OptionProcess(TypeKey, testHtml); return testInfos; } public static List MultipleConvert(string TypeKey, string testHtml) { List testInfos = OptionProcess(TypeKey, testHtml); return testInfos; } public static List JudgeConvert(string TypeKey, string testHtml) { List testInfos = OptionProcess(TypeKey, testHtml); return testInfos; } public static List CompleteConvert(string TypeKey, string testHtml) { List testInfos = CompleteProcess(TypeKey, testHtml); return testInfos; } public static List SubjectiveConvert(string TypeKey, string testHtml) { List tests = ConvertTest(testHtml); List testInfos = ConvertTestInfo(tests, TypeKey); foreach (ExerciseDto testInfo in testInfos) { testInfo.Question = testInfo.Question.Replace(AnalysisTag, "").Replace(SummaryTag, "").Replace(AnswerTag, ""); testInfo.Question = HtmlHelper.DoUselessTag(testInfo.Question); StringBuilder textImg = new StringBuilder(HtmlHelper.DoTextImg(testInfo.Question)); testInfo.ShaCode = ShaHashHelper.GetSHA1(textImg.ToString()); for (int i = 0; i < testInfo.Answer.Count; i++) { testInfo.Answer[i] = testInfo.Answer[i].Replace(AnswerTag, "").Replace(AnalysisTag, ""); testInfo.Answer[i] = HtmlHelper.DoUselessTag(testInfo.Answer[i]); } testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, ""); testInfo.Explain = HtmlHelper.DoUselessTag(testInfo.Explain); } return testInfos; } public static List CompleteProcess(string TypeKey, string testHtml) { List tests = ConvertTest(testHtml); List testInfos = ConvertTestInfo(tests, TypeKey); HtmlDocument doc = new HtmlDocument(); foreach (ExerciseDto testInfo in testInfos) { List ans = new List(); testInfo.Question = testInfo.Question.Replace(AnalysisTag, "").Replace(SummaryTag, "").Replace(AnswerTag, ""); string regRex = CompleteStart + "([\\s\\S]*?)" + CompleteEnd; List replaces = new List(); var m = Regex.Match(testInfo.Question, regRex); int index = 1; while (m.Success) { string an = m.Groups[1].ToString(); doc.LoadHtml(an); string anstr = doc.DocumentNode.InnerText; string nbsp = ""; int length = System.Text.Encoding.Default.GetBytes(anstr).Length; for (int i = 0; i < length * 3; i++) { nbsp += " "; } ReplaceDto replaceDto = new ReplaceDto { oldstr = "【" + an + "】", newstr = "" + nbsp + "" }; replaces.Add(replaceDto); ans.Add(an); m = m.NextMatch(); index++; } string textImg = testInfo.Question; //消除答案 foreach (ReplaceDto replace in replaces) { testInfo.Question = testInfo.Question.Replace(replace.oldstr, replace.newstr); testInfo.Question = HtmlHelper.DoUselessTag(testInfo.Question); //只要题干文字和图片 //不加underline标记 textImg = testInfo.Question.Replace(replace.oldstr, ""); } textImg = HtmlHelper.DoTextImg(textImg); testInfo.ShaCode = ShaHashHelper.GetSHA1(textImg); //处理解析 testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, ""); testInfo.Explain = HtmlHelper.DoUselessTag(testInfo.Explain); testInfo.Answer.AddRange(ans); } return testInfos; } /// /// 选择题处理 /// /// /// /// public static List OptionProcess(string TypeKey, string testHtml) { //处理 \t List tests = ConvertTest(testHtml); string[] optionsKeys = Options.Select(s => s.ToString()).ToArray(); List testInfos = ConvertTestInfo(tests, TypeKey); foreach (ExerciseDto testInfo in testInfos) { string optsRgex = optionsKeys[0] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + AnswerTag; string optsHtml = Regex.Match(testInfo.Question, optsRgex).Value; //HtmlDocument doc = new HtmlDocument(); //doc.LoadHtml(optsHtml); //optsHtml = doc.DocumentNode.InnerText; //处理选项 StringBuilder textImg = new StringBuilder(); for (int i = 0; i < optionsKeys.Length - 1; i++) { string optRgex = optionsKeys[i] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + optionsKeys[i + 1] + "(\\.|\\.|\\、|\\:|\\:)"; string optHtml = Regex.Match(optsHtml, optRgex).Value; if (!string.IsNullOrEmpty(optHtml)) { optHtml = optHtml.Substring(2, optHtml.Length - 4); optHtml = HtmlHelper.DoUselessTag(optHtml); textImg.Append(HtmlHelper.DoTextImg(optHtml)); testInfo.Option.Add(new CodeValue { Code = optionsKeys[i], Value = optHtml }); //testInfo.Option.Add(new Dictionary { { "code", optionsKeys[i] },{ "value", optHtml } }); //testInfo.Option.TryAdd(optionsKeys[i], optHtml); } else { optRgex = optionsKeys[i] + "(\\.|\\.|\\、|\\:|\\:)([\\s\\S]*?)" + AnswerTag; optHtml = Regex.Match(optsHtml, optRgex).Value; if (!string.IsNullOrEmpty(optHtml)) { optHtml = optHtml.Substring(2, optHtml.Length - 6); optHtml = HtmlHelper.DoUselessTag(optHtml); textImg.Append(HtmlHelper.DoTextImg(optHtml)); testInfo.Option.Add(new CodeValue { Code = optionsKeys[i], Value = optHtml }); //testInfo.Option.Add(new Dictionary { { "code", optionsKeys[i] }, { "value", optHtml } }); //testInfo.Option.TryAdd(optionsKeys[i], optHtml); } } } //处理题干 testInfo.Question = testInfo.Question.Replace(optsHtml, "").Replace(SummaryTag, "").Replace(AnswerTag, ""); testInfo.Question = HtmlHelper.DoUselessTag(testInfo.Question); textImg.Append(HtmlHelper.DoTextImg(testInfo.Question)); testInfo.ShaCode = ShaHashHelper.GetSHA1(textImg.ToString()); List answers = testInfo.Answer; HashSet ans = new HashSet(); //处理答案 for (int i = 0; i < answers.Count; i++) { string Answer = answers[i].Replace(AnswerTag, "").Replace(AnalysisTag, "").TrimStart().TrimEnd(); Answer.Select(s => s.ToString()).ToList().ForEach(x => { ans.Add(x); }); } testInfo.Answer = ans.ToList(); //处理解析 testInfo.Explain = testInfo.Explain.Replace(AnalysisTag, "").Replace(EndedTag, ""); testInfo.Explain = HtmlHelper.DoUselessTag(testInfo.Explain); } return testInfos; } public static List ConvertTestInfo(List tests, string TypeKey) { List testInfos = new List(); foreach (string html in tests) { Dictionary regex = new Dictionary(); Dictionary question = new Dictionary { { "Summary", SummaryTag + "|" + AnswerTag }, { "Answer", AnswerTag + "|" + AnalysisTag }, { "Analysis", AnalysisTag + "|" + EndedTag } }; Dictionary compquestion = new Dictionary { { "Summary", SummaryTag + "|" + AnalysisTag }, { "Analysis", AnalysisTag + "|" + EndedTag } }; ExerciseDto test = new ExerciseDto(); test.Type = TypeKey; List keys = new List(); if (TypeKey.Equals("Complete")) { keys = compquestion.Keys.ToList(); regex = compquestion; } else { keys = question.Keys.ToList(); regex = question; } foreach (string key in keys) { string[] tags = regex[key].Split("|"); string RegexStr = tags[0] + "([\\s\\S]*?)" + tags[1]; Match mt = Regex.Match(html, RegexStr); switch (key) { case "Summary": test.Question = mt.Value; break; case "Answer": string Answer = mt.Value; ///单选或多选,判断答案 脱html标签 if (TypeKey.Equals("Single") || TypeKey.Equals("Multiple") || TypeKey.Equals("Judge")) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(mt.Value); Answer = doc.DocumentNode.InnerText; } test.Answer = new List() { Answer }; break; case "Analysis": test.Explain = mt.Value; break; default: break; } } testInfos.Add(test); } return testInfos; } public static List ConvertTest(string testHtml) { string start = SummaryTag; string end = EndedTag; List tests = new List(); while (testHtml.IndexOf(start) > 0) { int indexStart = testHtml.IndexOf(start); int indexEnd = testHtml.IndexOf(end); string test = testHtml.Substring(indexStart, indexEnd - indexStart + start.Length); tests.Add(test); testHtml = testHtml.Substring(indexEnd + end.Length); } return tests; } } class ReplaceDto { public string oldstr { get; set; } public string newstr { get; set; } } }