forked from campusx-official/100-days-of-machine-learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
kmeans.py
50 lines (37 loc) · 1.46 KB
/
kmeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import random
import numpy as np
class KMeans:
def __init__(self,n_clusters=2,max_iter=100):
self.n_clusters = n_clusters
self.max_iter = max_iter
self.centroids = None
def fit_predict(self,X):
random_index = random.sample(range(0,X.shape[0]),self.n_clusters)
self.centroids = X[random_index]
for i in range(self.max_iter):
# assign clusters
cluster_group = self.assign_clusters(X)
old_centroids = self.centroids
# move centroids
self.centroids = self.move_centroids(X,cluster_group)
# check finish
if (old_centroids == self.centroids).all():
break
return cluster_group
def assign_clusters(self,X):
cluster_group = []
distances = []
for row in X:
for centroid in self.centroids:
distances.append(np.sqrt(np.dot(row-centroid,row-centroid)))
min_distance = min(distances)
index_pos = distances.index(min_distance)
cluster_group.append(index_pos)
distances.clear()
return np.array(cluster_group)
def move_centroids(self,X,cluster_group):
new_centroids = []
cluster_type = np.unique(cluster_group)
for type in cluster_type:
new_centroids.append(X[cluster_group == type].mean(axis=0))
return np.array(new_centroids)