how can we cluster/classify data into different categories in visualized form as such if we make change in data, the change should be replicated in the visual also.

Like this?

```
from sklearn.cluster import KMeans
import numpy as np
import streamlit as st
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor="none", **kwargs):
if x.size != y.size:
raise ValueError("x and y must be the same size")
cov = np.cov(x, y)
pearson = cov[0, 1] / np.sqrt(cov[0, 0] * cov[1, 1])
ell_radius_x = np.sqrt(1 + pearson)
ell_radius_y = np.sqrt(1 - pearson)
ellipse = Ellipse(
(0, 0),
width=ell_radius_x * 2,
height=ell_radius_y * 2,
facecolor=facecolor,
**kwargs
)
scale_x = np.sqrt(cov[0, 0]) * n_std
mean_x = np.mean(x)
scale_y = np.sqrt(cov[1, 1]) * n_std
mean_y = np.mean(y)
transf = (
transforms.Affine2D()
.rotate_deg(45)
.scale(scale_x, scale_y)
.translate(mean_x, mean_y)
)
ellipse.set_transform(transf + ax.transData)
return ax.add_patch(ellipse)
@st.cache
def data():
X = np.random.normal(0, 1, 1000).reshape(-1, 2)
return X
X = data()
cluster_slider = st.slider(
min_value=1, max_value=6, value=2, label="Number of clusters: "
)
kmeans = KMeans(n_clusters=cluster_slider, random_state=0).fit(X)
labels = kmeans.labels_
selectbox = st.selectbox("Visualize confidence bounds", [False, True])
stdbox = st.selectbox("Number of standard deviations: ", [1, 2, 3])
clrs = ["red", "seagreen", "orange", "blue", "yellow", "purple"]
n_labels = len(set(labels))
individual = st.selectbox("Individual subplots?", [False, True])
if individual:
fig, ax = plt.subplots(ncols=n_labels)
else:
fig, ax = plt.subplots()
for i, yi in enumerate(set(labels)):
if not individual:
a = ax
else:
a = ax[i]
xi = X[labels == yi]
x_pts = xi[:, 0]
y_pts = xi[:, 1]
a.scatter(x_pts, y_pts, c=clrs[yi])
if selectbox:
confidence_ellipse(
x=x_pts,
y=y_pts,
ax=a,
edgecolor="black",
facecolor=clrs[yi],
alpha=0.2,
n_std=stdbox,
)
plt.tight_layout()
st.write(fig)
```

Updated with individual plots:

1 Like

yeah somewhat like this only. thank you.

but can we make it more categorical visually.

for example- categorizing the data into 4 blocks and we can check the homogeneous data in each category/block?

1 Like

Updated. Is this what you meant?

Obviously you’ll have to think of a dynamic subplot layout that fits your data best, but you get the point.

1 Like

yeah, i got it. thanks.