ImportController.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. using Azure.Storage.Blobs.Models;
  2. using Azure.Storage.Sas;
  3. using HTEXLib;
  4. using HTEXLib.Builders;
  5. using HTEXLib.Helpers.ShapeHelpers;
  6. using Microsoft.AspNetCore.Hosting;
  7. using Microsoft.AspNetCore.Http;
  8. using Microsoft.AspNetCore.Mvc;
  9. using System;
  10. using System.Collections.Generic;
  11. using System.Globalization;
  12. using System.IdentityModel.Tokens.Jwt;
  13. using System.IO;
  14. using System.Linq;
  15. using System.Net.Http;
  16. using System.Text;
  17. using System.Text.Json;
  18. using System.Threading.Tasks;
  19. using TEAMModelOS.SDK.Models;
  20. using TEAMModelOS.Models.Dto;
  21. using TEAMModelOS.SDK;
  22. using TEAMModelOS.SDK.Context.Constant;
  23. using TEAMModelOS.SDK.Context.Constant.Common;
  24. using TEAMModelOS.SDK.DI;
  25. using TEAMModelOS.SDK.Extension;
  26. using TEAMModelOS.SDK.Module.AzureBlob.Configuration;
  27. using TEAMModelOS.SDK.Module.AzureBlob.Container;
  28. using TEAMModelOS.Services;
  29. using HTEXLib.Translator;
  30. using HTEXLib.DOCX.Models;
  31. using System.Collections.Concurrent;
  32. using TEAMModelOS.Filter;
  33. using Ionic.Zip;
  34. namespace TEAMModelOS.Controllers
  35. {
  36. [Route("import")]
  37. [ApiController]
  38. public class ImportController : ControllerBase
  39. {
  40. public PPTX2HTEXTranslator _PPTX2HTEXTranslator { get; set; }
  41. public DOXC2HTMLTranslator _DOXC2HTMLTranslator { get; set; }
  42. public AzureStorageFactory _azureStorage { get; set; }
  43. private readonly IHttpClientFactory _clientFactory;
  44. public HTML2ITEMV3Translator _HTML2ITEMV3Translator { get; set; }
  45. public ImportController(
  46. PPTX2HTEXTranslator PPTX2HTEXTranslator, IHttpClientFactory clientFactory,
  47. DOXC2HTMLTranslator DOXC2HTMLTranslator, HTML2ITEMV3Translator HTML2ITEMV3Translator, AzureStorageFactory azureStorage)
  48. {
  49. _HTML2ITEMV3Translator = HTML2ITEMV3Translator;
  50. _DOXC2HTMLTranslator = DOXC2HTMLTranslator;
  51. _clientFactory = clientFactory;
  52. _azureStorage = azureStorage;
  53. this._PPTX2HTEXTranslator = PPTX2HTEXTranslator;
  54. }
  55. private static string ReplaceLast(string input, string oldValue, string newValue)
  56. {
  57. int index = input.LastIndexOf(oldValue);
  58. if (index < 0)
  59. {
  60. return input;
  61. }
  62. else
  63. {
  64. StringBuilder sb = new StringBuilder(input.Length - oldValue.Length + newValue.Length);
  65. sb.Append(input.Substring(0, index));
  66. sb.Append(newValue);
  67. sb.Append(input.Substring(index + oldValue.Length,
  68. input.Length - index - oldValue.Length));
  69. return sb.ToString();
  70. }
  71. }
  72. /// <summary>
  73. /// {"file":"www....xxxx.pptx","scope":"private/school"}
  74. /// </summary>
  75. /// <param name="request"></param>
  76. /// <returns></returns>
  77. [HttpPost("parse-doc")]
  78. //[RequestSizeLimit(102_400_000_00)] //最大10000m左右
  79. [AuthToken(Roles = "admin,teacher")]
  80. public async Task<IActionResult> ParseDoc(JsonElement request)
  81. {
  82. //string id_token = HttpContext.GetXAuth("IdToken");
  83. //if (string.IsNullOrEmpty(id_token)) return BadRequest();
  84. //var jwt = new JwtSecurityToken(id_token);
  85. //if (!jwt.Payload.Iss.Equals("account.teammodel", StringComparison.OrdinalIgnoreCase)) return BadRequest();
  86. //var id = jwt.Payload.Sub;
  87. var (id, _, _, school) = HttpContext.GetAuthTokenInfo();
  88. var containerid = id;
  89. if (request.TryGetProperty("scope", out JsonElement jscope))
  90. {
  91. if (jscope.GetString().Equals("school"))
  92. {
  93. containerid = school;
  94. }
  95. }
  96. request.TryGetProperty("file", out JsonElement code);
  97. string azureBlobSAS = System.Web.HttpUtility.UrlDecode(code.ToString(), Encoding.UTF8);
  98. (string, string) a = BlobUrlString(azureBlobSAS);
  99. string ContainerName = a.Item1;
  100. string BlobName = a.Item2;
  101. bool flg = IsBlobName(BlobName);
  102. var codes = azureBlobSAS.Split("/");
  103. var file = codes[codes.Length - 1].Split(".");
  104. var ext = file[file.Length - 1];
  105. var FileName = ReplaceLast(codes[codes.Length - 1], "." + ext, "");
  106. if (flg)
  107. {
  108. BlobAuth blobAuth = _azureStorage.GetBlobSasUriRead(ContainerName, BlobName);
  109. var response = await _clientFactory.CreateClient().GetAsync(new Uri(blobAuth.url));
  110. response.EnsureSuccessStatusCode();
  111. Stream stream = await response.Content.ReadAsStreamAsync();
  112. if (ext.ToLower() == "pptx" || ext.ToLower() == "xml")
  113. {
  114. string index = await PPTXTranslator(containerid, FileName, stream);
  115. return Ok(new { index = index });
  116. }
  117. else if (ext.ToLower() == "docx" || ext.ToLower() == "doc")
  118. {
  119. return Ok(new { index = "" });
  120. }
  121. else if (ext.ToLower() == "htex")
  122. {
  123. var index = await HTEXTranslator(containerid, FileName, stream);
  124. return Ok(new { index = index });
  125. }
  126. else
  127. {
  128. return Ok(new { index = "" });
  129. }
  130. }
  131. else { return BadRequest("不是正确的Blob链接!"); }
  132. }
  133. private async Task<string> HTEXTranslator(string containerid, string FileName, Stream stream)
  134. {
  135. await _azureStorage.GetBlobServiceClient().DelectBlobs(containerid,new List<string> { $"res/{FileName}" });
  136. Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
  137. //处理中文乱码问题
  138. Encoding encoding = Encoding.GetEncoding("GB2312");
  139. var options = new ReadOptions { Encoding = encoding };
  140. string index = null;
  141. bool hasindex = false;
  142. List<Task<string>> tasks = new List<Task<string>>();
  143. ZipFile zip = ZipFile.Read(stream, options);
  144. zip.AlternateEncoding = encoding;
  145. List<Stream> streams = new List<Stream>();
  146. foreach (var f in zip.Entries)
  147. {
  148. string name = FileName + "/" + f.FileName;
  149. if (f.IsDirectory)
  150. {
  151. continue;
  152. }
  153. var uploadStream = f.OpenReader();
  154. byte[] buffer = new byte[uploadStream.Length];
  155. uploadStream.Read(buffer, 0, buffer.Length);
  156. Stream blobstream = new MemoryStream(buffer);
  157. streams.Add(blobstream);
  158. tasks.Add(_azureStorage.UploadFileByContainer(containerid, blobstream, "res", $"{name}", false));
  159. if (name.Contains($"{FileName}/index.json"))
  160. {
  161. hasindex = true;
  162. }
  163. uploadStream.Close();
  164. }
  165. zip.Dispose();
  166. stream.Close();
  167. if (hasindex)
  168. {
  169. await Task.WhenAll(tasks);
  170. foreach (var task in tasks)
  171. {
  172. var url = System.Web.HttpUtility.UrlDecode(task.Result, Encoding.UTF8);
  173. if (url.Contains($"{FileName}/index.json"))
  174. {
  175. index = url;
  176. }
  177. }
  178. }
  179. //释放资源
  180. streams.ForEach(x => { x.Close(); });
  181. return index;
  182. }
  183. private static (string, string) BlobUrlString(string sasUrl)
  184. {
  185. sasUrl = sasUrl.Substring(8);
  186. string[] sasUrls = sasUrl.Split("/");
  187. string ContainerName;
  188. ContainerName = sasUrls[1].Clone().ToString();
  189. string item = sasUrls[0] + "/" + sasUrls[1] + "/";
  190. string blob = sasUrl.Replace(item, "");
  191. return (ContainerName, blob);
  192. }
  193. public static bool IsBlobName(string BlobName)
  194. {
  195. return System.Text.RegularExpressions.Regex.IsMatch(BlobName,
  196. @"(?!((^(con)$)|^(con)\\..*|(^(prn)$)|^(prn)\\..*|(^(aux)$)|^(aux)\\..*|(^(nul)$)|^(nul)\\..*|(^(com)[1-9]$)|^(com)[1-9]\\..*|(^(lpt)[1-9]$)|^(lpt)[1-9]\\..*)|^\\s+|.*\\s$)(^[^\\\\\\:\\<\\>\\*\\?\\\\\\""\\\\|]{1,255}$)");
  197. }
  198. /// <summary>
  199. ///
  200. /// </summary>
  201. /// <param name="request"></param>
  202. /// <returns></returns>
  203. [HttpPost("upload-pptx")]
  204. [RequestSizeLimit(102_400_000_00)] //最大10000m左右
  205. public async Task<IActionResult> UploadPPTX([FromForm] IFormFile file)
  206. {
  207. var (id, _, _, school) = HttpContext.GetAuthTokenInfo();
  208. if (FileType.GetExtention(file.FileName).ToLower().Equals("pptx") || FileType.GetExtention(file.FileName).ToLower().Equals("xml"))
  209. {
  210. string FileName = file.FileName.Split(".")[0];
  211. Stream streamFile = file.OpenReadStream();
  212. string index = await PPTXTranslator(id, FileName, streamFile);
  213. return Ok(new { index = index });
  214. }
  215. else
  216. {
  217. return BadRequest("type is not pptx or xml !");
  218. }
  219. }
  220. /// <summary>
  221. /// docUrl
  222. /// folder
  223. /// shaCode
  224. ///
  225. /// UploadWord
  226. /// </summary>
  227. /// <param name="request"></param>
  228. /// <returns></returns>
  229. [HttpPost("upload-word")]
  230. [RequestSizeLimit(102_400_000_00)] //最大10000m左右
  231. public IActionResult UploadWord([FromForm] IFormFile file)
  232. {
  233. // ResponseBuilder responseBuilder = new ResponseBuilder();
  234. if (!FileType.GetExtention(file.FileName).ToLower().Equals("docx"))
  235. {
  236. return BadRequest(new Dictionary<string, object> { { "msg", "type is not docx!" }, { "code", ResponseCode.FAILED } });
  237. }
  238. var doc = _DOXC2HTMLTranslator.Translate(file.OpenReadStream());
  239. // Dictionary<string, object> model = await ImportExerciseService.UploadWord(_azureStorage, file);
  240. return Ok(new { htmlString = doc });
  241. }
  242. /// <summary>
  243. /// word直接转题目
  244. /// </summary>
  245. /// <param name="request"></param>
  246. /// <returns></returns>
  247. [HttpPost("parse-word")]
  248. public IActionResult ParseWord([FromForm] IFormFile file)
  249. {
  250. if (!FileType.GetExtention(file.FileName).ToLower().Equals("docx"))
  251. {
  252. return BadRequest(new Dictionary<string, object> { { "msg", "type is not docx!" }, { "code", ResponseCode.FAILED } });
  253. }
  254. var doc = _DOXC2HTMLTranslator.Translate(file.OpenReadStream());
  255. (List<HTEXLib.DOCX.Models.ItemInfo> tests, List<string> error) = _HTML2ITEMV3Translator.Translate(doc);
  256. return Ok(new { tests, emferror= error });
  257. }
  258. /// <summary>
  259. /// word转html
  260. /// </summary>
  261. /// <param name="request"></param>
  262. /// <returns></returns>
  263. [HttpPost("parse-docx")]
  264. public IActionResult ParseDocx([FromForm] IFormFile file)
  265. {
  266. if (!FileType.GetExtention(file.FileName).ToLower().Equals("docx"))
  267. {
  268. return BadRequest(new Dictionary<string, object> { { "msg", "type is not docx!" }, { "code", ResponseCode.FAILED } });
  269. }
  270. var doc = _DOXC2HTMLTranslator.Translate(file.OpenReadStream());
  271. // (List<HTEXLib.DOCX.Models.ItemInfo> tests, List<string> error) = _HTML2ITEMV3Translator.Translate(doc);
  272. return Ok(new { html= doc });
  273. }
  274. /// <summary>
  275. /// html转题目
  276. /// </summary>
  277. /// <param name="request"></param>
  278. /// <returns></returns>
  279. [HttpPost("parse-html")]
  280. public IActionResult AnalyzeHtml(JsonElement request)
  281. {
  282. if (!request.TryGetProperty("html", out JsonElement html)) { return BadRequest(); }
  283. (List<HTEXLib.DOCX.Models.ItemInfo> tests, List<string> error) = _HTML2ITEMV3Translator.Translate(html.GetString());
  284. return Ok(new { tests, emferror= error });
  285. }
  286. private async Task<string> PPTXTranslator(string containerid, string FileName, Stream streamFile)
  287. {
  288. if (string.IsNullOrWhiteSpace(containerid))
  289. {
  290. containerid = "teammodelos";
  291. }
  292. var status = await _azureStorage.GetBlobServiceClient().DelectBlobs(containerid, new List<string> { $"res/{FileName}" });
  293. string shaCode = Guid.NewGuid().ToString();
  294. HTEXLib.Htex htex = _PPTX2HTEXTranslator.Translate(streamFile);
  295. htex.name = FileName;
  296. var slides = htex.slides;
  297. List<Task<string>> tasks = new List<Task<string>>();
  298. HTEXIndex index = new HTEXIndex() { name = FileName, size = htex.size, thumbnail = htex.thumbnail, id = shaCode };
  299. List<KeyValuePair<string, string>> blobslidenames = new List<KeyValuePair<string, string>>();
  300. foreach (var slide in slides)
  301. {
  302. string json = JsonHelper.ToJson(slide, ignoreNullValue: false);
  303. string guid = Guid.NewGuid().ToString();
  304. blobslidenames.Add(new KeyValuePair<string, string>(guid, json));
  305. }
  306. List<Sld> slds = new List<Sld>();
  307. foreach (var key in blobslidenames)
  308. {
  309. slds.Add(new Sld { type = "normal", url = $"{key.Key}.json", scoring = null }); ;
  310. tasks.Add(_azureStorage.UploadFileByContainer(containerid, key.Value, "res", $"{FileName}/{key.Key}.json", false));
  311. }
  312. await Task.WhenAll(tasks);
  313. // Dictionary<string, Store> dict = new Dictionary<string, Store>();
  314. List<Task> tasksFiles = new List<Task>();
  315. foreach (var key in htex.stores.Keys)
  316. {
  317. if (key.EndsWith(".wdp") || key.EndsWith(".xlsx"))
  318. {
  319. htex.stores.Remove(key);
  320. continue;
  321. }
  322. var store = htex.stores[key];
  323. Store str = new Store() { path = key, contentType = store.contentType, isLazy = store.isLazy };
  324. if (!store.isLazy && store.contentType != null && ContentTypeDict.extdict.TryGetValue(store.contentType, out string ext) && store.url.Contains(";base64,"))
  325. {
  326. string[] strs = store.url.Split(',');
  327. Stream stream = new MemoryStream(Convert.FromBase64String(strs[1]));
  328. // var urlstrs = key.Split("/");
  329. var name = key.Replace("/", "");
  330. str.url = $"{name}";
  331. tasksFiles.Add(_azureStorage.UploadFileByContainer(containerid, stream, "res", $"{FileName}/{name}", false));
  332. }
  333. else
  334. {
  335. str.url = System.Web.HttpUtility.UrlDecode(store.url, Encoding.UTF8);
  336. }
  337. // dict.TryAdd(key, str);
  338. }
  339. await Task.WhenAll(tasksFiles);
  340. // index.stores = dict;
  341. index.slides = slds;
  342. var BlobUrl = await _azureStorage.UploadFileByContainer(containerid, JsonHelper.ToJson(index, ignoreNullValue: false), "res", FileName + "/" + "index.json", false);
  343. return System.Web.HttpUtility.UrlDecode(BlobUrl, Encoding.UTF8);
  344. }
  345. }
  346. public class HTEXIndex
  347. {
  348. public string id { get; set; }
  349. public string version { get; set; } = "1.0.20201210";
  350. public string name { get; set; }
  351. public HTEXLib.HtexSize size { get; set; }
  352. public List<Sld> slides { get; set; }
  353. //缩略图
  354. public string thumbnail { get; set; }
  355. // public int page { get; set; }
  356. // public Dictionary<string, Store> stores { get; set; }
  357. public List<string> knowledge { get; set; }
  358. public string periodId { get; set; }
  359. public List<string> gradeIds { get; set; }
  360. public string subjectId { get; set; }
  361. public string subjectName { get; set; }
  362. public string score { get; set; }
  363. public string code { get; set; }
  364. public string scope { get; set; }
  365. public int? multipleRule { get; set; }
  366. }
  367. public class Sld
  368. {
  369. /// <summary>
  370. /// normal,普通的hte页面 single 单选题 multiple 多选题 judge 判断题 complete 填空题 subjective 问答题 compose 综合题
  371. /// </summary>
  372. public string type { get; set; }
  373. /// <summary>
  374. /// 单页PPTx htex 的解析链接或一个题目的链接
  375. /// </summary>
  376. public string url { get; set; }
  377. /// <summary>
  378. /// 题目的配分,如果为type为normal 及compose ,则 scoring=null
  379. /// </summary>
  380. public Scoring scoring { get; set; }
  381. /// <summary>
  382. /// 单页PPTx htex 的缩略图
  383. /// </summary>
  384. public string thumbnail { get; set; }
  385. }
  386. public class Scoring
  387. {
  388. public double score { get; set; }
  389. public List<string> ans { get; set; } = new List<string>();
  390. }
  391. }