KMeansService.cs 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Threading.Tasks;
  6. namespace HTEX.Lib.ETL
  7. {
  8. public class KMeansService
  9. {
  10. public static Dictionary<double, List<double>> KMeans(IEnumerable<double> datas)
  11. {
  12. // 初始化质心,可以选择数据中的k个随机点作为初始质心
  13. List<double> centroids = new List<double>();
  14. centroids.Add(datas.Min());
  15. centroids.Add(datas.Max());
  16. centroids.Sort(); // 对质心进行排序以避免重复
  17. Dictionary<double, List<double>> clustersResults = new Dictionary<double, List<double>>();
  18. // 迭代次数
  19. int maxIterations = 100;
  20. for (int iteration = 0; iteration < maxIterations; iteration++)
  21. {
  22. // 将数据点分配给最近的质心
  23. Dictionary<double, List<double>> clusters = new Dictionary<double, List<double>>();
  24. foreach (int point in datas)
  25. {
  26. double nearestCentroid = centroids.OrderBy(c => Math.Abs(point - c)).First();
  27. if (!clusters.ContainsKey(nearestCentroid))
  28. {
  29. clusters[nearestCentroid] = new List<double>();
  30. }
  31. clusters[nearestCentroid].Add(point);
  32. }
  33. // 更新质心位置
  34. List<double> newCentroids = new List<double>();
  35. foreach (KeyValuePair<double, List<double>> cluster in clusters)
  36. {
  37. double sum = cluster.Value.Sum();
  38. int count = cluster.Value.Count;
  39. double newCentroid = sum *1.0/ count; // 计算均值
  40. newCentroids.Add(newCentroid);
  41. // 输出当前聚类的信息
  42. //Console.WriteLine($"Cluster with centroid {cluster.Key}:");
  43. //Console.WriteLine($"Min: {cluster.Value.Min()}, Max: {cluster.Value.Max()}, Average: {sum / count}");
  44. }
  45. // 检查质心是否改变
  46. newCentroids.Sort();
  47. clustersResults=clusters;
  48. if (newCentroids.SequenceEqual(centroids))
  49. {
  50. break;
  51. }
  52. centroids = newCentroids;
  53. }
  54. return clustersResults;
  55. }
  56. public static Dictionary<double, List<int>> KMeans(IEnumerable<int> datas)
  57. {
  58. // 初始化质心,可以选择数据中的k个随机点作为初始质心
  59. List<double> centroids = new List<double>();
  60. centroids.Add(datas.Min());
  61. centroids.Add(datas.Max());
  62. centroids.Sort(); // 对质心进行排序以避免重复
  63. Dictionary<double, List<int>> clustersResults = new Dictionary<double, List<int>>();
  64. // 迭代次数
  65. int maxIterations = 100;
  66. for (int iteration = 0; iteration < maxIterations; iteration++)
  67. {
  68. // 将数据点分配给最近的质心
  69. Dictionary<double, List<int>> clusters = new Dictionary<double, List<int>>();
  70. foreach (int point in datas)
  71. {
  72. double nearestCentroid = centroids.OrderBy(c => Math.Abs(point - c)).First();
  73. if (!clusters.ContainsKey(nearestCentroid))
  74. {
  75. clusters[nearestCentroid] = new List<int>();
  76. }
  77. clusters[nearestCentroid].Add(point);
  78. }
  79. // 更新质心位置
  80. List<double> newCentroids = new List<double>();
  81. foreach (KeyValuePair<double, List<int>> cluster in clusters)
  82. {
  83. int sum = cluster.Value.Sum();
  84. int count = cluster.Value.Count;
  85. double newCentroid = sum *1.0/ count; // 计算均值
  86. newCentroids.Add(newCentroid);
  87. // 输出当前聚类的信息
  88. //Console.WriteLine($"Cluster with centroid {cluster.Key}:");
  89. //Console.WriteLine($"Min: {cluster.Value.Min()}, Max: {cluster.Value.Max()}, Average: {sum / count}");
  90. }
  91. // 检查质心是否改变
  92. newCentroids.Sort();
  93. clustersResults=clusters;
  94. if (newCentroids.SequenceEqual(centroids))
  95. {
  96. break;
  97. }
  98. centroids = newCentroids;
  99. }
  100. return clustersResults;
  101. }
  102. }
  103. }