Color in art.chart

N = 150
centers = [[2, 3], [5, 5], [1, 8]]
n_classes = len(centers)
data, labels = make_blobs(N, 
                            centers=np.array(centers),
                            random_state=1)

nhom_0 = []
nhom_1 = []
nhom_2 = []
for i in range(150):
    if labels[i] == 0:
        nhom_0.append([data[i,0], data[i,1],1,1])
    elif labels[i] == 1:
        nhom_1.append([data[i,0], data[i,1],2,2])
    else:
        nhom_2.append([data[i,0], data[i,1],3,3])
nhom_0 = np.array(nhom_0)
nhom_1 = np.array(nhom_1)
nhom_2 = np.array(nhom_2)
    
df = pd.DataFrame(
    (*nhom_0 , *nhom_1 ,*nhom_2),
    columns=['x','y','color','nhom']
)
c = alt.Chart(df).mark_circle().encode(
x='x' , y='y' , color='color', tooltip=['x', 'y', 'color','nhom'])
st.altair_chart(c, use_container_width=True)
base = alt.Chart(df).encode(alt.X('X:O'))
chart_test_count = base.mark_line().encode(alt.Y('Y:N'))
chart_test_failures = base.mark_line().encode(alt.Y('Color:N'))

if i using number in column ‘color’ then it shows


but if i using ‘red’,‘blue’,… in column ‘color’ it shows

How can i fix this

1 Like

Hi @loctp2003, can you also share the code for your make_blobs function?

import streamlit as st
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
import altair as alt
N = 150
centers = [[2, 3], [5, 5], [1, 8]]
n_classes = len(centers)
data, labels = make_blobs(N, 
                            centers=np.array(centers),
                            random_state=1)

nhom_0 = []
nhom_1 = []
nhom_2 = []
for i in range(150):
    if labels[i] == 0:
        nhom_0.append([data[i,0], data[i,1],1,1])
    elif labels[i] == 1:
        nhom_1.append([data[i,0], data[i,1],2,2])
    else:
        nhom_2.append([data[i,0], data[i,1],3,3])
nhom_0 = np.array(nhom_0)
nhom_1 = np.array(nhom_1)
nhom_2 = np.array(nhom_2)
    
df = pd.DataFrame(
    (*nhom_0 , *nhom_1 ,*nhom_2),
    columns=['x','y','color','nhom']
)
c = alt.Chart(df).mark_circle().encode(
x='x' , y='y' , color='color', tooltip=['x', 'y', 'color','nhom'])
st.altair_chart(c, use_container_width=True)
base = alt.Chart(df).encode(alt.X('X:O'))
chart_test_count = base.mark_line().encode(alt.Y('Y:N'))
chart_test_failures = base.mark_line().encode(alt.Y('Color:N'))

The keys seem to be:

  1. Use scale=None with the color to use the literal values you pass as colors
  2. Use :Q instead of :N with x and y to specify them as continuous variables
import altair as alt
import numpy as np
import pandas as pd
import streamlit as st
from sklearn.datasets import make_blobs

N = 150
centers = [[2, 3], [5, 5], [1, 8]]
n_classes = len(centers)
data, labels = make_blobs(N, centers=np.array(centers), random_state=1)

nhom_0 = []
nhom_1 = []
nhom_2 = []
for i in range(150):
    if labels[i] == 0:
        nhom_0.append([data[i, 0], data[i, 1], "red", 1])
    elif labels[i] == 1:
        nhom_1.append([data[i, 0], data[i, 1], "green", 2])
    else:
        nhom_2.append([data[i, 0], data[i, 1], "blue", 3])
nhom_0 = np.array(nhom_0)
nhom_1 = np.array(nhom_1)
nhom_2 = np.array(nhom_2)

df = pd.DataFrame((*nhom_0, *nhom_1, *nhom_2), columns=["x", "y", "color", "nhom"])
st.expander("Show data").write(df)

c = (
    alt.Chart(df)
    .mark_circle()
    .encode(
        x="x:Q",
        y="y:Q",
        color=alt.Color("color", scale=None),
        tooltip=["x", "y", "color", "nhom"],
    )
)
st.altair_chart(c, use_container_width=True)
1 Like

How can I draw lines like this in altair_chart.

@loctp2003 You can accomplish this by creating new Chart objects for each line/point and adding them to your original chart, like so:

line1 = (
    alt.Chart(pd.DataFrame({"x": [0, 8], "y": [8, 0]}))
    .mark_line()
    .encode(x="x:Q", y="y:Q")
)

line2 = (
    alt.Chart(pd.DataFrame({"x": [0, 8], "y": [10, 2]}))
    .mark_line(strokeDash=[4, 2])
    .encode(x="x:Q", y="y:Q")
)

line3 = (
    alt.Chart(pd.DataFrame({"x": [0, 8], "y": [6, -2]}))
    .mark_line(strokeDash=[4, 2])
    .encode(x="x:Q", y="y:Q")
)


lines = line1 + line2 + line3

st.altair_chart(c + lines, use_container_width=True)

Which makes the chart look like this:

The line is overflow like this. How can fix this


The chart right like this

import numpy as np
import streamlit as st;
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
import pandas as pd
import altair as alt
def main():
    X = np.random.rand(1000)
    y = 4 + 3 * X + .5*np.random.randn(1000)
    z = "blue"
    st.expander("Show data").write(pd.DataFrame({"x":X,"y":y,"z":z,"t":2}))
    c = (
        alt.Chart(pd.DataFrame({"x":X,"y":y,"z":z,"t":2}))
        .mark_circle(size=50)
        .encode(
            x="x:Q",
            y="y:Q",
            color = alt.Color("z", scale=None),
        )
    )
    # Chuyen mang 1 chieu thanh vector
    X = np.array([X])
    y = np.array([y])
    # Chuyen vi ma tran
    X = X.T
    y = y.T
    model = LinearRegression()
    model.fit(X, y)
    w0 = model.intercept_
    w1 = model.coef_[0]
    
    x0 = 0
    y0 = w1*x0 + w0
    x1 = 1
    y1 = w1*x1 + w0
    
    lines = (
    alt.Chart(pd.DataFrame({"x": [x0, x1], "y": [y0, y1],}))
    .mark_line()
    .encode(x="x", y="y")
    )
    
    st.altair_chart(c+lines, use_container_width=True)
    
if __name__ == '__main__':
    main()