CrazyIter_Bin 7 hónapja
szülő
commit
19346de24f

+ 42 - 13
HTEX.DataETL/Controllers/LessonRecordController.cs

@@ -137,7 +137,7 @@ namespace HTEX.DataETL.Controllers
                     await System.IO.File.WriteAllTextAsync(Path.Combine(path, $"student-analysis.json"), studentLessonDatas.ToJsonString());
                     string jsons = await System.IO.File.ReadAllTextAsync($"{lessonPath}\\analysis\\analysis.json");
                     LessonDataAnalysisCluster lessonDataAnalysis = jsons.ToObject<LessonDataAnalysisCluster>();
-                    var lessonItems=  LessonETLService.ProcessStudentData(studentLessonDatas, lessonDataAnalysis);
+                    var lessonItems=  LessonETLService.ProcessStudentDataV1(studentLessonDatas, lessonDataAnalysis);
                     XmlDocument xmlDocument = new XmlDocument();
                     var runtimePath=  System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);
                     xmlDocument.Load($"{runtimePath}\\summary.xml");
@@ -148,14 +148,45 @@ namespace HTEX.DataETL.Controllers
         }
 
         /// <summary>
-        /// 课例数据ETL处理过程
+        /// 
         /// </summary>
         /// <param name="json"></param>
         /// <returns></returns>
         [HttpPost("process-history-students")]
-        public async Task<IActionResult> ProcessHistoryStudents(JsonElement json) 
+        public async Task<IActionResult> ProcessHistoryStudents(JsonElement json)
         {
-            return Ok();
+            string? lessonBasePath = _configuration.GetValue<string>("LessonPath");
+            string? pathLessons = $"{lessonBasePath}\\lessons";
+            string? pathAnalysis = $"{lessonBasePath}\\analysis";
+            string jsons = await System.IO.File.ReadAllTextAsync($"{pathAnalysis}\\analysis.json");
+            LessonDataAnalysisCluster lessonDataAnalysis = jsons.ToObject<LessonDataAnalysisCluster>();
+            List<string>  filesLessons = FileHelper.ListAllFiles(pathLessons, "-local.json");
+            var runtimePath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);
+            XmlDocument xmlDocument = new XmlDocument();
+            xmlDocument.Load($"{runtimePath}\\summary.xml");
+            List<string> noStujson = new List<string>();
+            foreach (string fileLesson in filesLessons) 
+            {
+                try {
+                    var stuFile = fileLesson.Replace("-local.json", "-stu.json");
+                    var excleFile = fileLesson.Replace("-local.json", "-stu.xlsx");
+                    if (System.IO.File.Exists(stuFile))
+                    {
+                        string stuJson = await System.IO.File.ReadAllTextAsync(stuFile);
+                        List<StudentLessonData> studentLessonDatas = stuJson.ToObject<List<StudentLessonData>>();
+                        var lessonItems = LessonETLService.ProcessStudentDataV1(studentLessonDatas, lessonDataAnalysis);
+                        await LessonETLService.ExportToExcel(lessonItems, excleFile, xmlDocument);
+                    }
+                    else
+                    {
+                        noStujson.Add(stuFile);
+                    }
+                }
+                catch (Exception ex) {
+                   throw new Exception($"{fileLesson},{ex.Message}",ex);
+                }
+            }
+            return Ok(noStujson);
         }
 
 
@@ -337,17 +368,18 @@ namespace HTEX.DataETL.Controllers
                 lessonDataAnalysisCluster.interactNormal=LessonETLService. CleanDataBySDThreshold(lessonDataAnalysisCluster.interactNormal.OrderBy(x => x), thresholdMultiplier);
                 lessonDataAnalysisCluster.stuCowork=LessonETLService.CleanDataBySDThreshold(lessonDataAnalysisCluster.stuCowork.OrderBy(x=>x), thresholdMultiplier);
                 lessonDataAnalysisCluster.groupCowork=LessonETLService.CleanDataBySDThreshold(lessonDataAnalysisCluster.groupCowork.OrderBy(x => x), thresholdMultiplier);
-                List<KeyValuePair<double, List<int>>> clustersDataInteract = new();
-                var clusterInteract = KMeansService.KMeans(lessonDataAnalysisCluster.interactNormal.Select(x => (int)x).OrderBy(x => x));
+                var d = lessonDataAnalysisCluster.interactNormal.Select(x => x).OrderBy(x => x).ToArray();
+                //  设置0 为自动规划聚类,11 则为自动规划后得到的数字。
+                var clusterInteract = KMeansService.KMeansOptimized(d,11,10);
                 //foreach (var item in clusterInteract)
                 //{
                 //    Console.WriteLine($"dp:{item.Key} ,avg: {item.Value.Average()}, count: {item.Value.Count}, min:{item.Value.Min()}, max:{item.Value.Max()}");
                 //}
-                foreach (var s in clusterInteract.OrderBy(x => x.Key))
+                foreach (var item in clusterInteract.OrderBy(x => x.Key))
                 {
-                    clustersDataInteract.Add(s);
+                    lessonDataAnalysisCluster.clustersInteract.Add(new KeyValuePair<double, List<double>>(item.Value.Average(), item.Value));
+                    //Console.WriteLine($"dp:{item.Key} ,avg: {item.Value.Average()}, count: {item.Value.Count}, min:{item.Value.Min()}, max:{item.Value.Max()},weight:{item.Value.Count*1.0/d.Count()}");
                 }
-                lessonDataAnalysisCluster.clustersInteract= clustersDataInteract;
                 System.IO.File.WriteAllText(Path.Combine(pathAnalysis, "analysis.json"), lessonDataAnalysisCluster.ToJsonString());
             }
             return Ok(new { yearMonth });
@@ -385,10 +417,7 @@ namespace HTEX.DataETL.Controllers
                         count.smartRatingCount= lessonLocal.smartRatingDatas.Count;
                         count.timeCount=lessonLocal.sokratesDatas.Where(x => !ignore.Contains(x.Event)  &&  !x.Event.Contains("End", StringComparison.OrdinalIgnoreCase)).GroupBy(x => x.Event).Select(x => new CodeLong() { code=x.Key, value= x.ToList().Count }).ToList();
                         
-                        if (string.IsNullOrWhiteSpace("636725021468921856"))
-                        {
-                            int d = 1;
-                        }
+                        
                         if (lessonLocal.lessonRecord!=null)
                         {
 

+ 224 - 0
TEAMModelOS.Extension/HTEX.Lib/ETL/KMeansService.cs

@@ -1,4 +1,5 @@
 using System;
+using System.Collections;
 using System.Collections.Generic;
 using System.Linq;
 using System.Text;
@@ -8,6 +9,229 @@ namespace HTEX.Lib.ETL
 {
     public class KMeansService
     {
+
+        public static Dictionary<double, List<double>> KMeansOptimized(IEnumerable<double> datas, int optimalK = 0, int maxClusters = 10)
+        {
+           
+            if (optimalK<=0)
+            { 
+                // 使用肘部法则找到最佳聚类数目
+                optimalK = FindOptimalK(datas, maxClusters);
+            }
+            // 使用 K-means++ 方法初始化质心
+            List<double> centroids = InitializeCentroids(datas, optimalK);
+
+            Dictionary<double, List<double>> clustersResults = new Dictionary<double, List<double>>();
+
+            // 迭代次数
+            int maxIterations = 100;
+            for (int iteration = 0; iteration < maxIterations; iteration++)
+            {
+                // 将数据点分配给最近的质心
+                Dictionary<double, List<double>> clusters = new Dictionary<double, List<double>>();
+                foreach (int point in datas)
+                {
+                    double nearestCentroid = centroids.OrderBy(c => Math.Abs(point - c)).First();
+                    if (!clusters.ContainsKey(nearestCentroid))
+                    {
+                        clusters[nearestCentroid] = new List<double>();
+                    }
+                    clusters[nearestCentroid].Add(point);
+                }
+
+                // 更新质心位置
+                List<double> newCentroids = new List<double>();
+                foreach (KeyValuePair<double, List<double>> cluster in clusters)
+                {
+                    double sum = cluster.Value.Sum();
+                    int count = cluster.Value.Count;
+                    double newCentroid = sum / count; // 计算均值
+                    newCentroids.Add(newCentroid);
+                }
+
+                // 检查质心是否改变
+                clustersResults = clusters;
+                if (newCentroids.SequenceEqual(centroids))
+                {
+                    break;
+                }
+                centroids = newCentroids;
+            }
+            return clustersResults;
+        }
+
+        private static int FindOptimalK(IEnumerable<double> datas, int maxClusters)
+        {
+            double[] silhouetteCoefficients = new double[maxClusters];
+
+            for (int k = 2; k <= maxClusters; k++) // 至少需要两个聚类
+            {
+                var centroids = InitializeCentroids(datas, k);
+                var clusters = AssignPointsToClusters(datas, centroids);
+                silhouetteCoefficients[k - 1] = CalculateSilhouetteCoefficient(clusters);
+            }
+
+            // 寻找最高的轮廓系数对应的聚类数
+            int optimalK = 0;
+            double maxCoefficient = double.MinValue;
+            for (int i = 0; i < silhouetteCoefficients.Length; i++)
+            {
+                if (silhouetteCoefficients[i] > maxCoefficient)
+                {
+                    maxCoefficient = silhouetteCoefficients[i];
+                    optimalK = i + 2; // 因为我们从2开始计算
+                }
+            }
+
+            return optimalK;
+        }
+
+        private static double CalculateSilhouetteCoefficient(Dictionary<double, List<double>> clusters)
+        {
+            double silhouetteCoefficient = 0;
+            int totalElements = clusters.Sum(cluster => cluster.Value.Count);
+
+            foreach (var cluster in clusters)
+            {
+                foreach (var element in cluster.Value)
+                {
+                    double a = AverageDistanceWithinCluster(element, cluster.Value);
+                    double b = Double.PositiveInfinity;
+                    foreach (var otherCluster in clusters.Where(c => !c.Key.Equals(cluster.Key)))
+                    {
+                        double dist = AverageDistanceBetweenClusters(element, otherCluster.Value);
+                        if (dist < b)
+                        {
+                            b = dist;
+                        }
+                    }
+                    silhouetteCoefficient += (b - a) / Math.Max(a, b);
+                }
+            }
+
+            return silhouetteCoefficient / totalElements;
+        }
+        private static double AverageDistanceWithinCluster(double element, List<double> cluster)
+        {
+            double sum = 0;
+            foreach (var otherElement in cluster.Where(e => !e.Equals(element)))
+            {
+                sum += Math.Abs(element - otherElement);
+            }
+            return sum / (cluster.Count - 1);
+        }
+        private static double AverageDistanceBetweenClusters(double element, List<double> otherCluster)
+        {
+            double sum = 0;
+            foreach (var otherElement in otherCluster)
+            {
+                sum += Math.Abs(element - otherElement);
+            }
+            return sum / otherCluster.Count;
+        }
+        /// <summary>
+        /// 随机方法初始化质心
+        /// </summary>
+        /// <param name="datas"></param>
+        /// <param name="k"></param>
+        /// <returns></returns>
+        //private static List<int> InitializeCentroids(IEnumerable<int> datas, int k)
+        //{
+        //    // K-means++ 初始化
+        //    var centroids = new List<int>();
+        //    var random = new Random();
+        //    //  centroids.Add(datas.ElementAt(random.Next(datas.Count())));
+        //    centroids.Add(1);
+        //    while (centroids.Count < k)
+        //    {
+        //        int[] distancesSquared = datas.Select(dataPoint =>
+        //            centroids.Min(centroid => (dataPoint - centroid) * (dataPoint - centroid))).ToArray();
+        //        double totalDistance = distancesSquared.Sum();
+        //        //double r = random.NextDouble() * totalDistance;
+        //        double r = totalDistance;
+        //        double accumulator = 0;
+        //        for (int i = 0; i < distancesSquared.Length; i++)
+        //        {
+        //            accumulator += distancesSquared[i];
+        //            if (accumulator >= r)
+        //            {
+        //                centroids.Add(datas.ElementAt(i));
+        //                break;
+        //            }
+        //        }
+        //    }
+        //    return centroids;
+        //}
+
+
+
+        /// <summary>
+        /// 分位数法 来初始化质心,将数据按照一定的百分位数划分来确定初始质心的位置。确保质心在整个数据范围内的分布更加均匀
+        /// </summary>
+        /// <param name="datas"></param>
+        /// <param name="k"></param>
+        /// <returns></returns>
+        //private static List<int> InitializeCentroids(IEnumerable<int> datas, int k)
+        //{
+        //    // 排序数据
+        //    var sortedData = datas.OrderBy(d => d).ToArray();
+
+        //    // 计算每个质心的位置
+        //    List<int> centroids = new List<int>();
+        //    for (int i = 0; i < k; i++)
+        //    {
+        //        // 计算第i个质心应该位于哪个位置
+        //        int index = (sortedData.Length * i + 1) / k - 1;
+        //        if (index < 0 || index >= sortedData.Length) continue; // 防止索引越界
+        //        centroids.Add(sortedData[index]);
+        //    }
+
+        //    return centroids;
+        //}
+        private static List<double> InitializeCentroids(IEnumerable<double> datas, int k)
+        {
+            // 计算每个数据点的局部密度
+            var densities = CalculateLocalDensities(datas);
+
+            // 选择高密度点作为初始质心
+            var centroids = new List<double>();
+            foreach (var density in densities.OrderByDescending(d => d.Value).Take(k))
+            {
+                centroids.Add(density.Key);
+            }
+
+            return centroids;
+        }
+
+        private static Dictionary<double, int> CalculateLocalDensities(IEnumerable<double> datas, int radius = 10)
+        {
+            // 计算每个数据点周围的邻居数量作为局部密度
+            var densities = new Dictionary<double, int>();
+            foreach (var dataPoint in datas)
+            {
+                int localDensity = datas.Count(p => Math.Abs(p - dataPoint) <= radius);
+                densities[dataPoint] = localDensity;
+            }
+
+            return densities;
+        }
+
+        private static Dictionary<double, List<double>> AssignPointsToClusters(IEnumerable<double> datas, List<double> centroids)
+        {
+            // 分配数据点到最近的质心
+            var clusters = new Dictionary<double, List<double>>();
+            foreach (var point in datas)
+            {
+                double nearestCentroid = centroids.OrderBy(c => Math.Abs(point - c)).First();
+                if (!clusters.ContainsKey(nearestCentroid))
+                {
+                    clusters[nearestCentroid] = new List<double>();
+                }
+                clusters[nearestCentroid].Add(point);
+            }
+            return clusters;
+        }
+
         public static Dictionary<double, List<double>> KMeans(IEnumerable<double> datas)
         {
 

+ 238 - 6
TEAMModelOS.Extension/HTEX.Lib/ETL/Lesson/LessonETLService.cs

@@ -1387,7 +1387,239 @@ namespace HTEX.Lib.ETL.Lesson
             //排名 = (积分 - 最低积分) / (最高积分 - 最低积分) * (最大排名 - 最小排名) + 最小排名
             return x==0 ? 0 : max-min!=0 ? (x - min)*1.0 / (max - min) * (maxRank - minRank) + minRank : (x)*1.0 / (max) * (maxRank - minRank) + minRank;
         }
+        public static List<StudentLessonItem> ProcessStudentDataV2(List<StudentLessonData> studentLessonDatas, LessonDataAnalysisCluster lessonDataAnalysis) 
+        
+        {
+
+            //历史记录的个人计分集合,通过“2倍标准差规则”移除异常值后得到的集合
+            var max_q = lessonDataAnalysis.pscore.Max();
+            //历史记录的互动计分集合,通过“2倍标准差规则”移除异常值后得到的集合
+            var max_t = lessonDataAnalysis.tscore.Max();
+            //历史记录的小组计分集合,通过“2倍标准差规则”移除异常值后得到的集合
+            var max_h = lessonDataAnalysis.gscore.Max();
+            var j = InteractWeight.T1;
+            double t = InteractWeight.TT;
+            List<StudentLessonItem> lessonItems = new List<StudentLessonItem>();
+            foreach (var studentLessonData in studentLessonDatas)
+            {
+                StudentLessonItem lessonItem = new StudentLessonItem() { studentId= studentLessonData.id! };
+                double u = 0.0;
+                if (studentLessonData.attend==1)
+                {
+                    u=100.0;
+                }
+                //c个人计分指数,d互动计分指数,e小组计分指数
+                double c = 0, d = 0, e = 0;
+                {
+                    //互动相关的计分
+                    //课例互动次数
+                    double n = studentLessonData.interactRecord.interactRecords.Count()*1.0;
+                    if (n>0)
+                    {
+                        //是IES大陆正式站历史课例数据,自2024-03-01至2024-10-08日,互动指数或学法指数黄灯或绿灯,不包含醍摩豆学校及测试学校,课例时长超过5分钟的有效课例(10,680笔数据) 的IRS互动+抢权+挑人的次数集合,
+                        //通过“2倍标准差规则” 移除异常值后得到的集合,再通过K-Means聚类算法得到高低位阶互动频次两个集合,并根据当前课例互动次数位阶的集合的质心值,该值定为m值
+                        var m = n<=lessonDataAnalysis.clustersInteract.First().Value.Max() ? lessonDataAnalysis.clustersInteract.First().Value.Max()*1.0 : lessonDataAnalysis.clustersInteract.Last().Value.Max() *1.0;
+                        //学生作答次数
+                        var w = studentLessonData.interactRecord.interactRecords.Where(x => x.resultWeight>=InteractWeight.T1).Count()*1.0;
+                        //作答正确数(包括部分正确)
+                        var r = studentLessonData.interactRecord.interactRecords.Where(x => x.resultWeight>InteractWeight.T1).Count()*1.0;
+                        //有参与的权重集合60≤k(x)≤100
+                        var kw = studentLessonData.interactRecord.interactRecords.Where(x => x.resultWeight>=InteractWeight.T1).Sum(x => x.resultWeight*1.0);
+                        //有得分的权重集合60<e(x)≤100
+                        var er = studentLessonData.interactRecord.interactRecords.Where(x => x.resultWeight>InteractWeight.T1).Sum(x => x.resultWeight*1.0);
+                        //本节课的所有互动计分
+                        var i = studentLessonData.interactRecord.interactRecords.Sum(x => x.itemScore*1.0);
+                        //本节课教师手动给学生的个人计分
+                        var s = studentLessonData.pscore;
+                        //个人计分指数
+                        c =  GetPersent(lessonDataAnalysis.pscore, s).persent/100;// s*1.0/max_q;
+                        //互动计分指数
+                        d =  GetPersent(lessonDataAnalysis.tscore, i).persent/100; //i*1.0/max_t;
+                        //互动成效指数
+                        var a = (d+w*kw/(j*m)+r*er/(j*m))*1.0/n;
+                        //互动参与指数
+                        var b = ((w*w)/m+(r*r)/m)*1.0/n;
+                        //c+a= 个人计分指数+ 个人互动成效指数
+                        //学习成效
+                        var f1 = Math.Round(190*1.0/(1+Math.Exp(-(c+a)))-95, 4);
+                        lessonItem.hd_cx=f1;
+                        var f2 = Math.Round(200*1.0/(1+Math.Exp(-(b+u/100)))-100, 4);
+                        lessonItem.hd_cy=f2;
+                        lessonItem.hd_cyc=w;
+                        lessonItem.hd_fqc=n;
+                        lessonItem.hd_zqc=r;
+                        lessonItem.gr_jf=s;
+                    }
+                    //studentLessonData.achieve=f1;
+                    //studentLessonData.attitude=f2;
+                    // _logger.LogInformation($"{studentLessonData.id}=>学习成效:{f1}\t学习态度:{f2}\t互动次数:{n}\t参与次数:{w}\t正确次数:{r}\t个人计分:{s}\t{Math.Round(c, 2)}\t互动计分:{i}\t{Math.Round(d, 2)}"); 
+                }
+                {
+                    //评测相关指数
+                    double n = studentLessonData.examRecords.Count()*1.0;
+                    if (n>0)
+                    {
+                        //题目数量
+                        double nq = studentLessonData.examRecords.Sum(x => x.qcount)*1.0;
+                        // double max_e = lessonDataAnalysis.exam.Max();
+                        //得分率
+                        double sum_s = studentLessonData.examRecords.Sum(x => x.scoreRate);
+                        //作答率
+                        double sum_a = studentLessonData.examRecords.Sum(x => x.answerRate);
+                        double f8 = Math.Round(sum_s/n*100, 4);
+                        double f9 = Math.Round(sum_a/n*100, 4);
+                        lessonItem.pc_df=f8;
+                        lessonItem.pc_zd=f9;
+                    }
+                    //   _logger.LogInformation($"{studentLessonData.id}=>评测指数:{f8}\t得分率:{Math.Round(sum_s/n,4)}\t参与指数:{f9}\t作答率:{Math.Round(sum_a/n,4)}");
+                }
+                {
+                    //小组相关指数
+                }
+                {
+                    //任务相关指数
+                    double n = studentLessonData.taskRecord.itemRecords.Count()*1.0;
+                    if (n>0)
+                    {
+                        double max_m = lessonDataAnalysis.task.Max();
+                        double w = studentLessonData.taskRecord.itemRecords.Where(x => x.resultWeight>0).Count()*1.0;
+                        double y = (10 *w/n+(j/t) *w)/max_m;
+                        double l = max_m*(w*w/n+(j/t) * w)/n;
+                        double f4 = Math.Round(190*1.0/(1+Math.Exp(-(y)))-95, 4);
+                        double f5 = Math.Round(200*1.0/(1+Math.Exp(-(l)))-100, 4);
+                        lessonItem.rw_fqc =n;
+                        lessonItem.rw_cyc =w;
+                        lessonItem.rw_cx =f4;
+                        lessonItem.rw_cy =f5;
+                    }
+                    // _logger.LogInformation($"{studentLessonData.id}=>任务指数:{f4}\t参与指数:{f5}\t任务次数:{n}\t参与次数:{w}\t");
 
+                }
+                {
+                    //评价相关指数
+
+                    double n = studentLessonData.rateingRecord.itemRecords.Count()*1.0;
+                    if (n>0)
+                    {
+                        var v = studentLessonData.rateingRecord.itemRecords.Where(x => x.itemType.Equals("Voting"));
+                        double vc = v.Count()*1.0;
+                        var g = studentLessonData.rateingRecord.itemRecords.Where(x => x.itemType.Equals("GrandRating"));
+                        double gc = g.Count()*1.0;
+                        var p = studentLessonData.rateingRecord.itemRecords.Where(x => x.itemType.Equals("PeerAssessment"));
+                        double pc = p.Count()*1.0;
+
+                        var vg = v.Sum(x => x.itemScore);
+                        var vo = v.Sum(x => x.optCount);
+                        double vs = vc/n* (vg+ vo);
+
+                        var gg = g.Sum(x => x.itemScore);
+                        var go = g.Sum(x => x.optCount);
+                        double gs = gc/n* (gg+ go);
+
+                        var pg = p.Sum(x => x.itemScore);
+                        var po = p.Sum(x => x.optCount);
+                        double ps = pc/n* (pg+ po);
+                        double h = vs+ps+gs;
+                        double f3 = Math.Round(190*1.0/(1+Math.Exp(-(h)))-95, 4);
+                        studentLessonData.appraise=f3;
+                        // _logger.LogInformation($"{studentLessonData.id}=>评价能力:{f3}\t评价次数:{n}\t投票次数:{vc}-{vg}-{vo}\t星光次数:{gc}-{gg}-{go}\t互评次数:{pc}-{pg}-{po}");
+
+                        lessonItem.pj_nl =f3;
+                        lessonItem.pj_cs =n;
+                        lessonItem.pj_vc =vc;
+                        lessonItem.pj_vg =vg;
+                        lessonItem.pj_vo =vo;
+                        lessonItem.pj_gc =gc;
+                        lessonItem.pj_gg =gg;
+                        lessonItem.pj_go =go;
+                        lessonItem.pj_pc =pc;
+                        lessonItem.pj_pg =pg;
+                        lessonItem.pj_po =po;
+                    }
+                }
+                {
+                    //协作相关指数
+                    var n = studentLessonData.coworkRecord.itemRecords.Count()*1.0;
+
+                    if (n>0)
+                    {
+
+                        //总的协作成果数
+
+                        var w = studentLessonData.coworkRecord.itemRecords.Where(x => x.resultWeight>0);
+                        double ss = w.Sum(x => x.itemScore)*1.0;
+                        double sw = w.Sum(x => x.resultWeight)*1.0;
+                        double wc = w.Count()*1.0;
+                        double x = 0.0;
+                        if (wc>0)
+                        {
+                            x=sw/(j *wc);
+                        }
+                        double max_xzcg = 40;
+                        double k = (wc*wc/n+x)/n+ wc*(ss/max_xzcg)* (wc/n);
+                        double f6 = Math.Round(190*1.0/(1+Math.Exp(-(k)))-95, 4);
+                        double f7 = Math.Round(200*1.0/(1+Math.Exp(-(k)))-100, 4);
+                        lessonItem.xz_fqc =n;
+                        lessonItem.xz_cyc =wc;
+                        lessonItem.xz_cgf =ss;
+                        lessonItem.xz_cx =f6;
+                        lessonItem.xz_cy =f7;
+                    }
+                    //_logger.LogInformation($"{studentLessonData.id}=>协作指数:{f6}\t参与指数:{f7}\t协作次数:{n}\t参与次数:{wc}\t协作成果分数:{ss}\t{k}");
+                }
+
+                double xx_cx = 0, xx_cy = 0;
+                int avg_cx = 0, avg_cy = 0;
+                if (lessonItem.xz_cx>0)
+                {
+                    avg_cx+=1;
+                }
+                if (lessonItem.pj_nl>0)
+                {
+                    avg_cx+=1;
+                }
+                if (lessonItem.rw_cx>0)
+                {
+                    avg_cx+=1;
+                }
+                if (lessonItem.pc_df>0)
+                {
+                    avg_cx+=1;
+
+                }
+                if (lessonItem.hd_cx>0)
+                {
+                    avg_cx+=1;
+                }
+                xx_cx+=lessonItem.hd_cx * 1.0/avg_cx+ lessonItem.pc_df* 1.0/avg_cx+ lessonItem.rw_cx* 1.0/avg_cx+ lessonItem.pj_nl* 1.0/avg_cx+ lessonItem.xz_cx* 1.0/avg_cx;
+
+                if (lessonItem.xz_cy>0)
+                {
+                    avg_cy+=1;
+                }
+                if (lessonItem.pj_nl>0)
+                {
+                    avg_cy+=1;
+                }
+                if (lessonItem.rw_cy>0)
+                {
+                    avg_cy+=1;
+                }
+                if (lessonItem.pc_zd>0)
+                {
+                    avg_cy+=1;
+                }
+                if (lessonItem.hd_cy>0)
+                {
+                    avg_cy+=1;
+                }
+                xx_cy+=lessonItem.hd_cy * 1.0/avg_cy+ lessonItem.pc_zd* 1.0/avg_cy+ lessonItem.rw_cy* 1.0/avg_cy+ lessonItem.pj_nl* 1.0/avg_cy+ lessonItem.xz_cy* 1.0/avg_cy;
+                lessonItem.xx_cx=xx_cx;
+                lessonItem.xx_cy=xx_cy;
+                lessonItems.Add(lessonItem);
+            }
+            return lessonItems;
+        }
 
 
         /// <summary>
@@ -1396,7 +1628,7 @@ namespace HTEX.Lib.ETL.Lesson
         /// <param name="studentLessonDatas"></param>
         /// <param name="lessonDataAnalysis"></param>
         /// <returns></returns>
-        public static List<StudentLessonItem>    ProcessStudentData(List<StudentLessonData> studentLessonDatas, LessonDataAnalysisCluster lessonDataAnalysis)
+        public static List<StudentLessonItem>    ProcessStudentDataV1(List<StudentLessonData> studentLessonDatas, LessonDataAnalysisCluster lessonDataAnalysis)
         {
             //历史记录的个人计分集合,通过“2倍标准差规则”移除异常值后得到的集合
             var max_q = lessonDataAnalysis.pscore.Max();
@@ -1439,9 +1671,9 @@ namespace HTEX.Lib.ETL.Lesson
                         //本节课教师手动给学生的个人计分
                         var s = studentLessonData.pscore;
                         //个人计分指数
-                        c =  GetPersent(lessonDataAnalysis.pscore, s)/100;// s*1.0/max_q;
+                        c =  GetPersent(lessonDataAnalysis.pscore, s).persent/100;// s*1.0/max_q;
                         //互动计分指数
-                        d =  GetPersent(lessonDataAnalysis.tscore, i)/100; //i*1.0/max_t;
+                        d =  GetPersent(lessonDataAnalysis.tscore, i).persent/100; //i*1.0/max_t;
                         //互动成效指数
                         var a = (d+w*kw/(j*m)+r*er/(j*m))*1.0/n;
                         //互动参与指数
@@ -1715,7 +1947,7 @@ namespace HTEX.Lib.ETL.Lesson
         /// <param name="nums"></param>
         /// <param name="curr"></param>
         /// <returns></returns>
-        public static double GetPersent(IEnumerable<double> nums, double curr)
+        public static (double persent,int count ) GetPersent(IEnumerable<double> nums, double curr)
         {
             int count = 0;
             foreach (var op in nums.OrderBy(x => x))
@@ -1733,7 +1965,7 @@ namespace HTEX.Lib.ETL.Lesson
                     break;
                 }
             }
-            return count *1.0/ nums.Count() * 100;
+            return (count *1.0/ nums.Count() * 100,count);
         }
     }
     /// <summary>
@@ -1877,7 +2109,7 @@ namespace HTEX.Lib.ETL.Lesson
         /// <summary>
         /// 
         /// </summary>
-        public List<KeyValuePair<double, List<int>>> clustersInteract { get; set; } = new List<KeyValuePair<double, List<int>>>();
+        public List<KeyValuePair<double, List<double>>> clustersInteract { get; set; } = new List<KeyValuePair<double, List<double>>>();
         ///// <summary>
         ///// 
         ///// </summary>

+ 9 - 1
TEAMModelOS.Extension/HTEX.Lib/summary.xml

@@ -8155,6 +8155,14 @@
             <param name="worksheet">Worksheet document to add</param>
             <returns>Worksheet part just added</returns>
         </member>
+        <member name="M:HTEX.Lib.ETL.KMeansService.InitializeCentroids(System.Collections.Generic.IEnumerable{System.Double},System.Int32)">
+            <summary>
+            分位数法 来初始化质心,将数据按照一定的百分位数划分来确定初始质心的位置。确保质心在整个数据范围内的分布更加均匀
+            </summary>
+            <param name="datas"></param>
+            <param name="k"></param>
+            <returns></returns>
+        </member>
         <member name="P:HTEX.Lib.ETL.Lesson.LocalStudent.attend">
             <summary>
             出席状态 1出席,6公假,5事假,4病假,2缺席,0未签到
@@ -8274,7 +8282,7 @@
             </summary>
             <returns></returns>
         </member>
-        <member name="M:HTEX.Lib.ETL.Lesson.LessonETLService.ProcessStudentData(System.Collections.Generic.List{TEAMModelOS.SDK.Models.StudentLessonData},HTEX.Lib.ETL.Lesson.LessonDataAnalysisCluster)">
+        <member name="M:HTEX.Lib.ETL.Lesson.LessonETLService.ProcessStudentDataV1(System.Collections.Generic.List{TEAMModelOS.SDK.Models.StudentLessonData},HTEX.Lib.ETL.Lesson.LessonDataAnalysisCluster)">
             <summary>
             计算学生的学习成效,学习态度,合作能力,协作能力,评价能力
             </summary>

+ 1 - 0
TEAMModelOS.Extension/HTEX.Test/HTEX.Test.csproj

@@ -16,6 +16,7 @@
 
   <ItemGroup>
     <ProjectReference Include="..\..\TEAMModelOS.SDK\TEAMModelOS.SDK.csproj" />
+    <ProjectReference Include="..\HTEX.Lib\HTEX.Lib.csproj" />
   </ItemGroup>
 
 </Project>

+ 110 - 55
TEAMModelOS.Extension/HTEX.Test/Program.cs

@@ -1,7 +1,9 @@
 using Azure;
 using Bogus;
+using HTEX.Lib.ETL.Lesson;
 using HTEX.Test.Service;
 using MathNet.Numerics;
+using MathNet.Numerics.LinearAlgebra;
 using System;
 using System.Configuration;
 using System.Diagnostics;
@@ -12,6 +14,7 @@ using System.Text.RegularExpressions;
 using TEAMModelOS.SDK;
 using TEAMModelOS.SDK.DI;
 using TEAMModelOS.SDK.Extension;
+using TEAMModelOS.SDK.Helper.Common.FileHelper;
 using TEAMModelOS.SDK.Models;
 using TEAMModelOS.SDK.Models.Cosmos.Student;
 using static HTEX.Test.Controllers.LessonRecordController;
@@ -23,7 +26,7 @@ namespace HTEX.Test
     {
         public static void Main(string[] args)
         {
-            Test();
+            Test2();
 
 
             var builder = WebApplication.CreateBuilder(args);
@@ -54,78 +57,130 @@ namespace HTEX.Test
 
         public static string Test2() 
         {
-            // 创建一个包含三万个元素的数组
-            int[] data = Enumerable.Range(1, 30000).Select(i => new Random().Next(1, 31)).ToArray();
-
-            // 设置聚类的数量
-            int k = 2;
-            // 初始化质心,可以选择数据中的k个随机点作为初始质心
-            List<int> centroids = new List<int>();
-            Random rand = new Random();
-            for (int i = 0; i < k; i++)
+            string? pathAnalysis = $"F:\\lesson-local\\analysis";
+            var filesAnalysis = FileHelper.ListAllFiles(pathAnalysis);
+            List<LessonDataAnalysisMonth> lessonDataAnalysisMonths = new List<LessonDataAnalysisMonth>();
+            LessonDataAnalysisCluster lessonDataAnalysisCluster = new LessonDataAnalysisCluster();
+            foreach (var file in filesAnalysis)
             {
-                centroids.Add(rand.Next(1, 31));
-            }
-            centroids.Sort(); // 对质心进行排序以避免重复
-
-            Dictionary<int, List<int>> clustersD= new Dictionary<int, List<int>>();
-            // 迭代次数
-            int maxIterations = 100;
-            for (int iteration = 0; iteration < maxIterations; iteration++)
-            {
-                // 将数据点分配给最近的质心
-                Dictionary<int, List<int>> clusters = new Dictionary<int, List<int>>();
-                foreach (int point in data)
+                //读取每月的数据
+                if (file.EndsWith("-m-analysis.json"))
                 {
-                    int nearestCentroid = centroids.OrderBy(c => Math.Abs(point - c)).First();
-                    if (!clusters.ContainsKey(nearestCentroid))
+                    string jsons =   System.IO.File.ReadAllText(file);
+                    LessonDataAnalysisMonth lessonDataAnalysisMonth = jsons.ToObject<LessonDataAnalysisMonth>();
+
+                    lessonDataAnalysisMonths.Add(lessonDataAnalysisMonth);
+                    if (lessonDataAnalysisMonth.task.IsNotEmpty())
+                    {
+                        lessonDataAnalysisCluster.task.AddRange(lessonDataAnalysisMonth.task);
+                    }
+                    if (lessonDataAnalysisMonth.irs.IsNotEmpty())
+                    {
+                        lessonDataAnalysisCluster.irs.AddRange(lessonDataAnalysisMonth.irs);
+                    }
+                    if (lessonDataAnalysisMonth.interactNormal.IsNotEmpty())
+                    {
+                        lessonDataAnalysisCluster.interactNormal.AddRange(lessonDataAnalysisMonth.interactNormal);
+                    }
+                    if (lessonDataAnalysisMonth.pscore.IsNotEmpty())
+                    {
+                        lessonDataAnalysisCluster.pscore.AddRange(lessonDataAnalysisMonth.pscore);
+                    }
+                    if (lessonDataAnalysisMonth.gscore.IsNotEmpty())
+                    {
+                        lessonDataAnalysisCluster.gscore.AddRange(lessonDataAnalysisMonth.gscore);
+                    }
+                    if (lessonDataAnalysisMonth.tscore.IsNotEmpty())
+                    {
+                        lessonDataAnalysisCluster.tscore.AddRange(lessonDataAnalysisMonth.tscore);
+                    }
+                    if (lessonDataAnalysisMonth.stuCowork.IsNotEmpty())
                     {
-                        clusters[nearestCentroid] = new List<int>();
+                        lessonDataAnalysisCluster.stuCowork.AddRange(lessonDataAnalysisMonth.stuCowork);
+                    }
+                    if (lessonDataAnalysisMonth.groupCowork.IsNotEmpty())
+                    {
+                        lessonDataAnalysisCluster.groupCowork.AddRange(lessonDataAnalysisMonth.groupCowork);
                     }
-                    clusters[nearestCentroid].Add(point);
                 }
+            }
+          
+            //标准差偏差N倍,视为异常数据
+            double thresholdMultiplier =2;
+            lessonDataAnalysisCluster.pscore= LessonETLService.CleanDataBySDThreshold(lessonDataAnalysisCluster.pscore.OrderBy(x => x), thresholdMultiplier);
+            lessonDataAnalysisCluster.gscore= LessonETLService.CleanDataBySDThreshold(lessonDataAnalysisCluster.gscore.OrderBy(x => x), thresholdMultiplier);
+            lessonDataAnalysisCluster.tscore= LessonETLService.CleanDataBySDThreshold(lessonDataAnalysisCluster.tscore.OrderBy(x => x), thresholdMultiplier);
+            lessonDataAnalysisCluster.irs = LessonETLService.CleanDataBySDThreshold(lessonDataAnalysisCluster.irs.OrderBy(x => x), thresholdMultiplier);
+            lessonDataAnalysisCluster.interactNormal=LessonETLService.CleanDataBySDThreshold(lessonDataAnalysisCluster.interactNormal.OrderBy(x => x), thresholdMultiplier);
+            lessonDataAnalysisCluster.stuCowork=LessonETLService.CleanDataBySDThreshold(lessonDataAnalysisCluster.stuCowork.OrderBy(x => x), thresholdMultiplier);
+            lessonDataAnalysisCluster.groupCowork=LessonETLService.CleanDataBySDThreshold(lessonDataAnalysisCluster.groupCowork.OrderBy(x => x), thresholdMultiplier);
+             //超过60 80的
+
+
+            var d = lessonDataAnalysisCluster.interactNormal.Select(x =>  x).OrderBy(x => x).ToArray();
+            double n = d.Max()+1;
+            var clusterInteract = HTEX.Lib.ETL. KMeansService.KMeansOptimized(d,3);
+            foreach (var item in clusterInteract.OrderBy(x => x.Key))
+            {
+                lessonDataAnalysisCluster.clustersInteract.Add(new KeyValuePair<double, List<double>>(item.Value.Average(), item.Value));
+                Console.WriteLine($"dp:{item.Key} ,avg: {item.Value.Average()}, count: {item.Value.Count}, min:{item.Value.Min()}, max:{item.Value.Max()},weight:{item.Value.Count*1.0/d.Count()}");
+            }
 
-                // 更新质心位置
-                List<int> newCentroids = new List<int>();
-                foreach (KeyValuePair<int, List<int>> cluster in clusters)
+            IEnumerable<double> all = lessonDataAnalysisCluster.clustersInteract.SelectMany(x => x.Value);
+            int pass = 0;
+            for (var i = 1; i<n; i++) 
+            {
+                var p = LessonETLService.GetPersent(all, i);
+                if (p.persent>=60) 
                 {
-                    int sum = cluster.Value.Sum();
-                    int count = cluster.Value.Count;
-                    int newCentroid = sum / count; // 计算均值
-                    newCentroids.Add(newCentroid);
-
-                    // 输出当前聚类的信息
-                    Console.WriteLine($"Cluster with centroid {cluster.Key}:");
-                    Console.WriteLine($"Min: {cluster.Value.Min()}, Max: {cluster.Value.Max()}, Average: {sum / count}");
+                    pass = i;
+                    break;
                 }
-
-                // 检查质心是否改变
-                newCentroids.Sort();
-                clustersD=clusters;
-                if (newCentroids.SequenceEqual(centroids))
+            }
+            int good = 0;
+            for (var i = 1; i<n; i++)
+            {
+                var p = LessonETLService.GetPersent(all, i);
+                if (p.persent>=80)
                 {
-                   
+                    good = i;
                     break;
                 }
-                centroids = newCentroids;
             }
+            double ss = 0;
+            for (var i = 1; i<n; i++) 
+            {
+               // KeyValuePair<double, List<int>> curr = new KeyValuePair<double, List<int>>();
+                var s = lessonDataAnalysisCluster.clustersInteract.FindAll(x => x.Value.Min()<=i  && x.Value.Max()>=i).MinBy(x=>x.Key);
+                var p = LessonETLService.GetPersent(all, i);
+                var l= i<pass?pass:i<good?pass:good;
+                var e = (i*1.0/l) *(p.persent)  *  (s.Value.Count*1.0/d.Count());
+                ss+= e;
+                Console.WriteLine($"n: {i},l: {l}, persent: {p.persent},count: {p.count},s-min: {s.Value.Min()},s-max: {s.Value.Max()},value: {e}");
+            }
+
+            //foreach (var s in clusterInteract.OrderBy(x => x.Key))
+            //{
+            //    clustersDataInteract.Add(s);
+            //}
+            //lessonDataAnalysisCluster.clustersInteract= clustersDataInteract;
+            System.IO.File.WriteAllText(Path.Combine(pathAnalysis, "analysis.json"), lessonDataAnalysisCluster.ToJsonString());
             return "";
         }
 
         public static string Test()
         {
-             
-            string jsons =   System.IO.File.ReadAllText( $"F:\\lesson-local\\analysis.json");
-            LessonDataAnalysis lessonDataAnlysis = jsons.ToObject<LessonDataAnalysis>();
-            var n = 5;
-            //聚类分数量大和数量小的类群。判断当前出题数在两个类群的最大范围内,再取其质心。
-            var m = n<=lessonDataAnlysis.clustersInteract.First().Value.Max() ? lessonDataAnlysis.clustersInteract.First().Value: lessonDataAnlysis.clustersInteract.Last().Value;
-            var persent = GetPersent(lessonDataAnlysis.interactNormal, n);
-            //Console.WriteLine(persent);
+            
+            // 创建一个随机数生成器
+            Random random = new Random();
+
+            // 示例数据集
+            var data = Enumerable.Range(1, 25000).Select(i => Vector<double>.Build.Dense(i % 10, i % 10)).ToArray();
+          
             return "Hello World!";
         }
-
-        static bool[] MarkAnomalies(List<int> array)
+         
+    static bool[] MarkAnomalies(List<int> array)
         {
             if (array.Count == 0) return new bool[0];
 
@@ -188,4 +243,4 @@ namespace HTEX.Test
             public double Weight { get; set; }
         }
     }
-}
+}