123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 |
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.Threading.Tasks;
- namespace HTEX.Lib.ETL
- {
- public class KMeansService
- {
- public static Dictionary<double, List<double>> KMeans(IEnumerable<double> datas)
- {
- // 初始化质心,可以选择数据中的k个随机点作为初始质心
- List<double> centroids = new List<double>();
- centroids.Add(datas.Min());
- centroids.Add(datas.Max());
- centroids.Sort(); // 对质心进行排序以避免重复
- Dictionary<double, List<double>> clustersResults = new Dictionary<double, List<double>>();
- // 迭代次数
- int maxIterations = 100;
- for (int iteration = 0; iteration < maxIterations; iteration++)
- {
- // 将数据点分配给最近的质心
- Dictionary<double, List<double>> clusters = new Dictionary<double, List<double>>();
- foreach (int point in datas)
- {
- double nearestCentroid = centroids.OrderBy(c => Math.Abs(point - c)).First();
- if (!clusters.ContainsKey(nearestCentroid))
- {
- clusters[nearestCentroid] = new List<double>();
- }
- clusters[nearestCentroid].Add(point);
- }
- // 更新质心位置
- List<double> newCentroids = new List<double>();
- foreach (KeyValuePair<double, List<double>> cluster in clusters)
- {
- double sum = cluster.Value.Sum();
- int count = cluster.Value.Count;
- double newCentroid = sum *1.0/ count; // 计算均值
- newCentroids.Add(newCentroid);
- // 输出当前聚类的信息
- //Console.WriteLine($"Cluster with centroid {cluster.Key}:");
- //Console.WriteLine($"Min: {cluster.Value.Min()}, Max: {cluster.Value.Max()}, Average: {sum / count}");
- }
- // 检查质心是否改变
- newCentroids.Sort();
- clustersResults=clusters;
- if (newCentroids.SequenceEqual(centroids))
- {
- break;
- }
- centroids = newCentroids;
- }
- return clustersResults;
- }
- public static Dictionary<double, List<int>> KMeans(IEnumerable<int> datas)
- {
- // 初始化质心,可以选择数据中的k个随机点作为初始质心
- List<double> centroids = new List<double>();
- centroids.Add(datas.Min());
- centroids.Add(datas.Max());
- centroids.Sort(); // 对质心进行排序以避免重复
- Dictionary<double, List<int>> clustersResults = new Dictionary<double, List<int>>();
- // 迭代次数
- int maxIterations = 100;
- for (int iteration = 0; iteration < maxIterations; iteration++)
- {
- // 将数据点分配给最近的质心
- Dictionary<double, List<int>> clusters = new Dictionary<double, List<int>>();
- foreach (int point in datas)
- {
- double nearestCentroid = centroids.OrderBy(c => Math.Abs(point - c)).First();
- if (!clusters.ContainsKey(nearestCentroid))
- {
- clusters[nearestCentroid] = new List<int>();
- }
- clusters[nearestCentroid].Add(point);
- }
- // 更新质心位置
- List<double> newCentroids = new List<double>();
- foreach (KeyValuePair<double, List<int>> cluster in clusters)
- {
- int sum = cluster.Value.Sum();
- int count = cluster.Value.Count;
- double newCentroid = sum *1.0/ count; // 计算均值
- newCentroids.Add(newCentroid);
- // 输出当前聚类的信息
- //Console.WriteLine($"Cluster with centroid {cluster.Key}:");
- //Console.WriteLine($"Min: {cluster.Value.Min()}, Max: {cluster.Value.Max()}, Average: {sum / count}");
- }
- // 检查质心是否改变
- newCentroids.Sort();
- clustersResults=clusters;
- if (newCentroids.SequenceEqual(centroids))
- {
- break;
- }
- centroids = newCentroids;
- }
- return clustersResults;
- }
- }
- }
|