06: Učení bez učitele¶
In [1]:
Copied!
import numpy as np
from sklearn.cluster import KMeans
from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn import datasets
import matplotlib.pyplot as plt
Data¶
In [2]:
Copied!
n_clusters = 5
n_samples = 500
n_clusters = 5
n_samples = 500
In [3]:
Copied!
X, y_true = datasets.make_blobs(n_samples=n_samples, centers=n_clusters, cluster_std=0.4, random_state=0)
# centers=[[2, 2], [4, 5], [3, 10]]
# data generovana jako kruznice
# X, y_true = datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05)
X, y_true = datasets.make_blobs(n_samples=n_samples, centers=n_clusters, cluster_std=0.4, random_state=0)
# centers=[[2, 2], [4, 5], [3, 10]]
# data generovana jako kruznice
# X, y_true = datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05)
K-Means¶
In [5]:
Copied!
kmeans = KMeans(n_clusters=n_clusters)
kmeans = KMeans(n_clusters=n_clusters)
In [6]:
Copied!
kmeans.fit(X)
kmeans.fit(X)
Out[6]:
KMeans(n_clusters=5)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KMeans(n_clusters=5)
In [8]:
Copied!
centers = kmeans.cluster_centers_
print(centers)
centers = kmeans.cluster_centers_
print(centers)
[[ 0.96056297 4.34505773] [ 9.28510086 -2.29391247] [-1.26927837 7.78360769] [ 1.9876471 0.85602104] [-1.58354718 2.87334064]]
Kontrola kvality - Analýza siluet¶
In [10]:
Copied!
from sklearn.metrics import silhouette_samples, silhouette_score
import matplotlib.cm as cm
from sklearn.metrics import silhouette_samples, silhouette_score
import matplotlib.cm as cm
In [11]:
Copied!
silhouette_avg = silhouette_score(X, y_pred)
print(f'Prumerna hodnota analyzy siluet pro {n_clusters} shluku je {silhouette_avg}')
silhouette_avg = silhouette_score(X, y_pred)
print(f'Prumerna hodnota analyzy siluet pro {n_clusters} shluku je {silhouette_avg}')
Prumerna hodnota analyzy siluet pro 5 shluku je 0.8126659651729236
In [12]:
Copied!
silhouette_scores = silhouette_samples(X, y_pred)
silhouette_scores = silhouette_samples(X, y_pred)
In [13]:
Copied!
np.mean(silhouette_scores)
np.mean(silhouette_scores)
Out[13]:
np.float64(0.8126659651729236)
In [14]:
Copied!
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
ax1.set_xlim([-0.2, 1])
ax1.set_xlabel("Koeficient siluet")
ax1.set_ylabel("Shluk")
colour = cm.inferno(y_pred / n_clusters)
y_lower = 10
for i in range(n_clusters):
silhouette_scores_i = silhouette_scores[y_pred == i]
silhouette_scores_i.sort()
y_upper = y_lower + len(silhouette_scores_i)
ax1.fill_betweenx(
np.arange(y_lower, y_upper),
0,
silhouette_scores_i,
facecolor=cm.inferno(i / n_clusters)
)
ax1.text(-0.1, y_lower + 0.5 * len(silhouette_scores_i), str(i))
y_lower = y_upper + 10
ax1.set_yticks([])
ax1.axvline(silhouette_avg, color='red', linestyle='--')
ax2.scatter(X[:, 0], X[:, 1], c=colour)
ax2.set_xticks([])
ax2.set_yticks([])
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
ax1.set_xlim([-0.2, 1])
ax1.set_xlabel("Koeficient siluet")
ax1.set_ylabel("Shluk")
colour = cm.inferno(y_pred / n_clusters)
y_lower = 10
for i in range(n_clusters):
silhouette_scores_i = silhouette_scores[y_pred == i]
silhouette_scores_i.sort()
y_upper = y_lower + len(silhouette_scores_i)
ax1.fill_betweenx(
np.arange(y_lower, y_upper),
0,
silhouette_scores_i,
facecolor=cm.inferno(i / n_clusters)
)
ax1.text(-0.1, y_lower + 0.5 * len(silhouette_scores_i), str(i))
y_lower = y_upper + 10
ax1.set_yticks([])
ax1.axvline(silhouette_avg, color='red', linestyle='--')
ax2.scatter(X[:, 0], X[:, 1], c=colour)
ax2.set_xticks([])
ax2.set_yticks([])
In [ ]:
Copied!