ImportController.cs 17 KB


  1. using Azure.Storage.Blobs.Models;
  2. using Azure.Storage.Sas;
  3. using HTEXLib;
  4. using HTEXLib.Builders;
  5. using HTEXLib.Helpers.ShapeHelpers;
  6. using Microsoft.AspNetCore.Hosting;
  7. using Microsoft.AspNetCore.Http;
  8. using Microsoft.AspNetCore.Mvc;
  9. using System;
  10. using System.Collections.Generic;
  11. using System.Globalization;
  12. using System.IdentityModel.Tokens.Jwt;
  13. using System.IO;
  14. using System.Linq;
  15. using System.Net.Http;
  16. using System.Text;
  17. using System.Text.Json;
  18. using System.Threading.Tasks;
  19. using TEAMModelOS.SDK.Models;
  20. using TEAMModelOS.Models.Dto;
  21. using TEAMModelOS.SDK;
  22. using TEAMModelOS.SDK.DI;
  23. using TEAMModelOS.SDK.Extension;
  24. using TEAMModelOS.SDK.Module.AzureBlob.Configuration;
  25. using TEAMModelOS.SDK.Module.AzureBlob.Container;
  26. using TEAMModelOS.Services;
  27. using HTEXLib.Translator;
  28. using HTEXLib.DOCX.Models;
  29. using System.Collections.Concurrent;
  30. using TEAMModelOS.Filter;
  31. using Ionic.Zip;
  32. namespace TEAMModelOS.Controllers
  33. {
  34. [Route("import")]
  35. [ApiController]
  36. public class ImportController : ControllerBase
  37. {
  38. public readonly DingDing _dingDing;
  39. public PPTX2HTEXTranslator _PPTX2HTEXTranslator { get; set; }
  40. public DOXC2HTMLTranslator _DOXC2HTMLTranslator { get; set; }
  41. public AzureStorageFactory _azureStorage { get; set; }
  42. private readonly IHttpClientFactory _clientFactory;
  43. public HTML2ITEMV3Translator _HTML2ITEMV3Translator { get; set; }
  44. public ImportController(
  45. PPTX2HTEXTranslator PPTX2HTEXTranslator, IHttpClientFactory clientFactory,
  46. DOXC2HTMLTranslator DOXC2HTMLTranslator, HTML2ITEMV3Translator HTML2ITEMV3Translator, AzureStorageFactory azureStorage, DingDing ding)
  47. {
  48. _HTML2ITEMV3Translator = HTML2ITEMV3Translator;
  49. _DOXC2HTMLTranslator = DOXC2HTMLTranslator;
  50. _clientFactory = clientFactory;
  51. _azureStorage = azureStorage;
  52. this._PPTX2HTEXTranslator = PPTX2HTEXTranslator;
  53. _dingDing = ding;
  54. }
  55. private static string ReplaceLast(string input, string oldValue, string newValue)
  56. {
  57. int index = input.LastIndexOf(oldValue);
  58. if (index < 0)
  59. {
  60. return input;
  61. }
  62. else
  63. {
  64. StringBuilder sb = new StringBuilder(input.Length - oldValue.Length + newValue.Length);
  65. sb.Append(input.Substring(0, index));
  66. sb.Append(newValue);
  67. sb.Append(input.Substring(index + oldValue.Length,
  68. input.Length - index - oldValue.Length));
  69. return sb.ToString();
  70. }
  71. }
  72. /// <summary>
  73. /// {"file":"www....xxxx.pptx","scope":"private/school"}
  74. /// </summary>
  75. /// <param name="request"></param>
  76. /// <returns></returns>
  77. [HttpPost("parse-doc")]
  78. //[RequestSizeLimit(102_400_000_00)] //最大10000m左右
  79. [AuthToken(Roles = "admin,teacher")]
  80. public async Task<IActionResult> ParseDoc(JsonElement request)
  81. {
  82. //string id_token = HttpContext.GetXAuth("IdToken");
  83. //if (string.IsNullOrEmpty(id_token)) return BadRequest();
  84. //var jwt = new JwtSecurityToken(id_token);
  85. //if (!jwt.Payload.Iss.Equals("account.teammodel", StringComparison.OrdinalIgnoreCase)) return BadRequest();
  86. //var id = jwt.Payload.Sub;
  87. var (id, _, _, school) = HttpContext.GetAuthTokenInfo();
  88. var containerid = id;
  89. if (request.TryGetProperty("scope", out JsonElement jscope))
  90. {
  91. if (jscope.GetString().Equals("school"))
  92. {
  93. containerid = school;
  94. }
  95. }
  96. request.TryGetProperty("file", out JsonElement code);
  97. string azureBlobSAS = System.Web.HttpUtility.UrlDecode(code.ToString(), Encoding.UTF8);
  98. (string, string) a = BlobUrlString(azureBlobSAS);
  99. string ContainerName = a.Item1;
  100. string BlobName = a.Item2;
  101. bool flg = IsBlobName(BlobName);
  102. var codes = azureBlobSAS.Split("/");
  103. var file = codes[codes.Length - 1].Split(".");
  104. var ext = file[file.Length - 1];
  105. var FileName = ReplaceLast(codes[codes.Length - 1], "." + ext, "");
  106. if (flg)
  107. {
  108. BlobAuth blobAuth = _azureStorage.GetBlobSasUriRead(ContainerName, BlobName);
  109. var response = await _clientFactory.CreateClient().GetAsync(new Uri(blobAuth.url));
  110. response.EnsureSuccessStatusCode();
  111. Stream stream = await response.Content.ReadAsStreamAsync();
  112. if (ext.ToLower() == "pptx" || ext.ToLower() == "xml")
  113. {
  114. string index = await PPTXTranslator(containerid, FileName, stream);
  115. // await _azureStorage.GetBlobServiceClient().DeleteBlobs(_dingDing, containerid, new List<string>() { $"{FileName}.{ext}" });
  116. return Ok(new { index = index });
  117. }
  118. else if (ext.ToLower() == "docx" || ext.ToLower() == "doc")
  119. {
  120. return Ok(new { index = "" });
  121. // await _azureStorage.GetBlobServiceClient().DeleteBlobs(containerid, new List<string>() { $"{FileName}.{ext}" });
  122. }
  123. else if (ext.ToLower() == "htex")
  124. {
  125. var index = await HTEXTranslator(containerid, FileName, stream);
  126. //await _azureStorage.GetBlobServiceClient().DeleteBlobs(_dingDing, containerid, new List<string>() { $"{FileName}.{ext}" });
  127. return Ok(new { index = index });
  128. }
  129. else
  130. {
  131. return Ok(new { index = "" });
  132. }
  133. }
  134. else { return BadRequest("不是正确的Blob链接!"); }
  135. }
  136. private async Task<string> HTEXTranslator(string containerid, string FileName, Stream stream)
  137. {
  138. Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
  139. //处理中文乱码问题
  140. Encoding encoding = Encoding.GetEncoding("GB2312");
  141. var options = new ReadOptions { Encoding = encoding };
  142. string index = null;
  143. bool hasindex = false;
  144. List<Task<string>> tasks = new List<Task<string>>();
  145. ZipFile zip = ZipFile.Read(stream, options);
  146. zip.AlternateEncoding = encoding;
  147. List<Stream> streams = new List<Stream>();
  148. foreach (var f in zip.Entries)
  149. {
  150. string name = FileName + "/" + f.FileName;
  151. if (f.IsDirectory)
  152. {
  153. continue;
  154. }
  155. var uploadStream = f.OpenReader();
  156. byte[] buffer = new byte[uploadStream.Length];
  157. uploadStream.Read(buffer, 0, buffer.Length);
  158. Stream blobstream = new MemoryStream(buffer);
  159. streams.Add(blobstream);
  160. tasks.Add(_azureStorage.UploadFileByContainer(containerid, blobstream, "res", $"{name}", false));
  161. if (name.Contains($"{FileName}/index.json"))
  162. {
  163. hasindex = true;
  164. }
  165. uploadStream.Close();
  166. }
  167. zip.Dispose();
  168. stream.Close();
  169. if (hasindex)
  170. {
  171. await Task.WhenAll(tasks);
  172. foreach (var task in tasks)
  173. {
  174. var url = System.Web.HttpUtility.UrlDecode(task.Result, Encoding.UTF8);
  175. if (url.Contains($"{FileName}/index.json"))
  176. {
  177. index = url;
  178. }
  179. }
  180. }
  181. //释放资源
  182. streams.ForEach(x => { x.Close(); });
  183. return index;
  184. }
  185. private static (string, string) BlobUrlString(string sasUrl)
  186. {
  187. sasUrl = sasUrl.Substring(8);
  188. string[] sasUrls = sasUrl.Split("/");
  189. string ContainerName;
  190. ContainerName = sasUrls[1].Clone().ToString();
  191. string item = sasUrls[0] + "/" + sasUrls[1] + "/";
  192. string blob = sasUrl.Replace(item, "");
  193. return (ContainerName, blob);
  194. }
  195. public static bool IsBlobName(string BlobName)
  196. {
  197. return System.Text.RegularExpressions.Regex.IsMatch(BlobName,
  198. @"(?!((^(con)$)|^(con)\\..*|(^(prn)$)|^(prn)\\..*|(^(aux)$)|^(aux)\\..*|(^(nul)$)|^(nul)\\..*|(^(com)[1-9]$)|^(com)[1-9]\\..*|(^(lpt)[1-9]$)|^(lpt)[1-9]\\..*)|^\\s+|.*\\s$)(^[^\\\\\\:\\<\\>\\*\\?\\\\\\""\\\\|]{1,255}$)");
  199. }
  200. /// <summary>
  201. ///
  202. /// </summary>
  203. /// <param name="request"></param>
  204. /// <returns></returns>
  205. [HttpPost("upload-pptx")]
  206. [RequestSizeLimit(102_400_000_00)] //最大10000m左右
  207. public async Task<IActionResult> UploadPPTX([FromForm] IFormFile file)
  208. {
  209. var (id, _, _, school) = HttpContext.GetAuthTokenInfo();
  210. if (FileType.GetExtention(file.FileName).ToLower().Equals("pptx") || FileType.GetExtention(file.FileName).ToLower().Equals("xml"))
  211. {
  212. string FileName = file.FileName.Split(".")[0];
  213. Stream streamFile = file.OpenReadStream();
  214. string index = await PPTXTranslator(id, FileName, streamFile);
  215. return Ok(new { index = index });
  216. }
  217. else
  218. {
  219. return BadRequest("type is not pptx or xml !");
  220. }
  221. }
  222. /// <summary>
  223. /// docUrl
  224. /// folder
  225. /// shaCode
  226. ///
  227. /// UploadWord
  228. /// </summary>
  229. /// <param name="request"></param>
  230. /// <returns></returns>
  231. [HttpPost("upload-word")]
  232. [RequestSizeLimit(102_400_000_00)] //最大10000m左右
  233. public IActionResult UploadWord([FromForm] IFormFile file)
  234. {
  235. // ResponseBuilder responseBuilder = new ResponseBuilder();
  236. if (!FileType.GetExtention(file.FileName).ToLower().Equals("docx"))
  237. {
  238. return BadRequest(new Dictionary<string, object> { { "msg", "type is not docx!" }, { "code", ResponseCode.FAILED } });
  239. }
  240. var doc = _DOXC2HTMLTranslator.Translate(file.OpenReadStream());
  241. // Dictionary<string, object> model = await ImportExerciseService.UploadWord(_azureStorage, file);
  242. return Ok(new { htmlString = doc });
  243. }
  244. /// <summary>
  245. /// word直接转题目
  246. /// </summary>
  247. /// <param name="request"></param>
  248. /// <returns></returns>
  249. [HttpPost("parse-word")]
  250. public IActionResult ParseWord([FromForm] IFormFile file)
  251. {
  252. if (!FileType.GetExtention(file.FileName).ToLower().Equals("docx"))
  253. {
  254. return BadRequest(new Dictionary<string, object> { { "msg", "type is not docx!" }, { "code", ResponseCode.FAILED } });
  255. }
  256. var doc = _DOXC2HTMLTranslator.Translate(file.OpenReadStream());
  257. (List<HTEXLib.DOCX.Models.ItemInfo> tests, List<string> error) = _HTML2ITEMV3Translator.Translate(doc);
  258. return Ok(new { tests, emferror = error });
  259. }
  260. /// <summary>
  261. /// word转html
  262. /// </summary>
  263. /// <param name="request"></param>
  264. /// <returns></returns>
  265. [HttpPost("parse-docx")]
  266. public IActionResult ParseDocx([FromForm] IFormFile file)
  267. {
  268. if (!FileType.GetExtention(file.FileName).ToLower().Equals("docx"))
  269. {
  270. return BadRequest(new Dictionary<string, object> { { "msg", "type is not docx!" }, { "code", ResponseCode.FAILED } });
  271. }
  272. var doc = _DOXC2HTMLTranslator.Translate(file.OpenReadStream());
  273. // (List<HTEXLib.DOCX.Models.ItemInfo> tests, List<string> error) = _HTML2ITEMV3Translator.Translate(doc);
  274. return Ok(new { html = doc });
  275. }
  276. /// <summary>
  277. /// html转题目
  278. /// </summary>
  279. /// <param name="request"></param>
  280. /// <returns></returns>
  281. [HttpPost("parse-html")]
  282. public IActionResult AnalyzeHtml(JsonElement request)
  283. {
  284. if (!request.TryGetProperty("html", out JsonElement html)) { return BadRequest(); }
  285. (List<HTEXLib.DOCX.Models.ItemInfo> tests, List<string> error) = _HTML2ITEMV3Translator.Translate(html.GetString());
  286. return Ok(new { tests, emferror = error });
  287. }
  288. private async Task<string> PPTXTranslator(string containerid, string FileName, Stream streamFile)
  289. {
  290. if (string.IsNullOrWhiteSpace(containerid))
  291. {
  292. containerid = "teammodelos";
  293. }
  294. //var status = await _azureStorage.GetBlobServiceClient().DeleteBlobs(_dingDing, containerid, new List<string> { $"res/{FileName}" });
  295. string shaCode = Guid.NewGuid().ToString();
  296. HTEXLib.Htex htex = _PPTX2HTEXTranslator.Translate(streamFile);
  297. htex.name = FileName;
  298. var slides = htex.slides;
  299. List<Task<string>> tasks = new List<Task<string>>();
  300. HTEXIndex index = new HTEXIndex() { name = FileName, size = htex.size, thumbnail = htex.thumbnail, id = shaCode };
  301. List<KeyValuePair<string, string>> blobslidenames = new List<KeyValuePair<string, string>>();
  302. foreach (var slide in slides)
  303. {
  304. string json = JsonHelper.ToJson(slide, ignoreNullValue: false);
  305. string guid = Guid.NewGuid().ToString();
  306. blobslidenames.Add(new KeyValuePair<string, string>(guid, json));
  307. }
  308. List<Sld> slds = new List<Sld>();
  309. foreach (var key in blobslidenames)
  310. {
  311. slds.Add(new Sld { type = "normal", url = $"{key.Key}.json", scoring = null }); ;
  312. tasks.Add(_azureStorage.UploadFileByContainer(containerid, key.Value, "res", $"{FileName}/{key.Key}.json", false));
  313. }
  314. await Task.WhenAll(tasks);
  315. // Dictionary<string, Store> dict = new Dictionary<string, Store>();
  316. List<Task> tasksFiles = new List<Task>();
  317. foreach (var key in htex.stores.Keys)
  318. {
  319. if (key.EndsWith(".wdp") || key.EndsWith(".xlsx"))
  320. {
  321. htex.stores.Remove(key);
  322. continue;
  323. }
  324. var store = htex.stores[key];
  325. Store str = new Store() { path = key, contentType = store.contentType, isLazy = store.isLazy };
  326. if (!store.isLazy && store.contentType != null && ContentTypeDict.extdict.TryGetValue(store.contentType, out string ext) && store.url.Contains(";base64,"))
  327. {
  328. string[] strs = store.url.Split(',');
  329. Stream stream = new MemoryStream(Convert.FromBase64String(strs[1]));
  330. // var urlstrs = key.Split("/");
  331. var name = key.Replace("/", "");
  332. str.url = $"{name}";
  333. tasksFiles.Add(_azureStorage.UploadFileByContainer(containerid, stream, "res", $"{FileName}/{name}", false));
  334. }
  335. else
  336. {
  337. str.url = System.Web.HttpUtility.UrlDecode(store.url, Encoding.UTF8);
  338. }
  339. // dict.TryAdd(key, str);
  340. }
  341. await Task.WhenAll(tasksFiles);
  342. // index.stores = dict;
  343. index.slides = slds;
  344. var BlobUrl = await _azureStorage.UploadFileByContainer(containerid, JsonHelper.ToJson(index, ignoreNullValue: false), "res", FileName + "/" + "index.json", false);
  345. return System.Web.HttpUtility.UrlDecode(BlobUrl, Encoding.UTF8);
  346. }
  347. }
  348. public class HTEXIndex
  349. {
  350. public string id { get; set; }
  351. public string version { get; set; } = "1.0.20201210";
  352. public string name { get; set; }
  353. public HTEXLib.HtexSize size { get; set; }
  354. public List<Sld> slides { get; set; }
  355. //缩略图
  356. public string thumbnail { get; set; }
  357. // public int page { get; set; }
  358. // public Dictionary<string, Store> stores { get; set; }
  359. public List<string> knowledge { get; set; }
  360. public string periodId { get; set; }
  361. public List<string> gradeIds { get; set; }
  362. public string subjectId { get; set; }
  363. public string subjectName { get; set; }
  364. public string score { get; set; }
  365. public string code { get; set; }
  366. public string scope { get; set; }
  367. public int? multipleRule { get; set; }
  368. }
  369. public class Sld
  370. {
  371. /// <summary>
  372. /// normal,普通的hte页面 single 单选题 multiple 多选题 judge 判断题 complete 填空题 subjective 问答题 compose 综合题
  373. /// </summary>
  374. public string type { get; set; }
  375. /// <summary>
  376. /// 单页PPTx htex 的解析链接或一个题目的链接
  377. /// </summary>
  378. public string url { get; set; }
  379. /// <summary>
  380. /// 题目的配分,如果为type为normal 及compose ,则 scoring=null
  381. /// </summary>
  382. public Scoring scoring { get; set; }
  383. /// <summary>
  384. /// 单页PPTx htex 的缩略图
  385. /// </summary>
  386. public string thumbnail { get; set; }
  387. }
  388. public class Scoring
  389. {
  390. public double score { get; set; }
  391. public List<string> ans { get; set; } = new List<string>();
  392. }
  393. }