Adding and Displaying Limit Lines in Scatter Plots with Interactive Features in Altair and streamlit

s.vahishta · July 5, 2024, 6:53pm

I am running the app locally
Streamlit Version 1.35.0 and Python version: 3.10.4

The following code creates scatter plots of all variables and allows users to highlight specific data points with different colors and sizes. I added three lines to include lower and upper limit columns for each variable:

main_col = get_base_column_name(col)
max_lim_col = f'{main_col}MaxLim'
min_lim_col = f'{main_col}MinLim'

The columns are named in the format: col_1, col_1_MaxLim, col_1_MinLim, col_2, col_2_MaxLim, col_2_MinLim, etc. This part of the code adds a mark line for the maximum and minimum limit variables to the current plot. However, when using the interactive features such as zooming or moving the data points, the highlighted data points remain in the same position while the rest of the data points move:

if max_lim_col in filtered_df.columns:
	step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(max_lim_col, title='Step Values'),size=alt.value(2))
	combined_plot += step_chart
                
if min_lim_col in filtered_df.columns:
	step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(min_lim_col, title='Step Values'),size=alt.value(2)).properties(width=800, height=400)
	combined_plot += step_chart

The whole Code:

def generate_plot(data_option, df, x_col, columns, sn=None, start_date=None, end_date=None, hours_range=None, cs=None):
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    df["rgn"] = np.where(df['HDC'].str[0] == '3', "NML",np.where(df['HDC'].str[0] == 'B', "NM", "Unknown"))  
    lim_columns = [col for col in df.columns if col.endswith("MaxLim") or col.endswith("MinLim")]
    
    if sn:
        if sn[-1] == 'W':   
            temp_df = df[(df['sn'].str[-1] =='W')]
        else:
            temp_df = df[(df['sn'].str[-1] =='H')]
        df_sn = temp_df[temp_df['sn'] == sn]
        
        if not df_sn.empty and hours_range:
            s_date = df_sn['Date'].iloc[0]
            start_date = s_date - pd.Timedelta(hours=hours_range)
            end_date = s_date + pd.Timedelta(hours=hours_range)
            filtered_df = temp_df[(temp_df['Date'] >= start_date) & (temp_df['Date'] <= end_date)]
        else:
            st.write(f"sn {sn} not found!")
            filtered_df = temp_df
    else:
        temp_df = df.copy()
        if start_date:
            temp_df = temp_df[temp_df['Date'] >= pd.Timestamp(start_date)]
        if end_date:
            temp_df = temp_df[temp_df['Date'] <= pd.Timestamp(end_date)]
        filtered_df = temp_df
        if filtered_df.empty:
            st.write("No data found in this date range!")

    if cs:
        c_map = {
            "c 1": ["1", "2"],
            "c 2": ["3", "4"],
            "c 3": ["5", "6"],
            "c 4": ["7", "8"]
        }
        selected_c = [num for c in cs for num in c_map[c]]
        selected_cols = ["sn", "Date", 'HDC',"rank","stat","rgn"] + selected_c
        cols = filtered_df.columns[filtered_df.columns.str.contains("|".join(selected_cols))]
        filtered_df = filtered_df[cols]
        filtered_columns = [col for col in columns if any(f"{i}" in col for i in selected_c) or "stat" in col]
        stat_col = [col for col in filtered_columns if 'stat' in col]   
    else:
        filtered_columns = columns
    
    plots = []
    filtered_df['c'] = filtered_df['sn'].apply(lambda x: 3 if x.endswith('W') else (4 if x.endswith('H') else 0))
    filtered_df.reset_index(drop=True, inplace=True)
    if sn:
        output_file = f"anomalies_{data_option}.csv"  
        sigma_threshold = 1.5 
        window_size = 30              
        output = calculate_std(filtered_df, output_file,sn,window_size, sigma_threshold)
        st.dataframe(output[output['Sigma'] >= sigma_threshold])
        st.write("-" * 50)
    
    for col in filtered_columns:
        if not filtered_df[col].dropna().empty: 
            main_col = get_base_column_name(col)
            max_lim_col = f'{main_col}MaxLim'
            min_lim_col = f'{main_col}MinLim'
            title = f"{col} - sn: {sn}" if sn else f"{col}"
            dmax = max_lim_col if max_lim_col in filtered_df.columns else filtered_df[col].max()
            dmin = min_lim_col if min_lim_col in filtered_df.columns else filtered_df[col].min()     
            base = alt.Chart(filtered_df).mark_point(size=20).encode(
                x=x_col,
                y=alt.Y(col, scale=alt.Scale(domain=[dmin, dmax])),
                tooltip=['sn', 
                         alt.Tooltip('Date:T', title='Date', format='%Y-%m-%d %H:%M:%S'), 
                         col, 
                         'rgn'],
                shape=alt.Shape('rgn:N', scale=alt.Scale(domain=['NM', 'NML'], range=['triangle', 'diamond']),legend=alt.Legend(title='Country')),
                stroke = alt.Stroke('c:N', scale=alt.Scale(range=['blue', 'green']), legend=None)
            ).properties(title=title, width=800, height=400)
  
            # highlight specific sn
            if sn:
                highlight = filtered_df[filtered_df['sn'] == sn]
                highlight_plot = alt.Chart(highlight).mark_circle(filled=True, size=100, opacity=0.7).encode(
                    x=x_col,
                    y=alt.Y(col, scale=alt.Scale(domain=[filtered_df[col].min(), filtered_df[col].max()])),
                    tooltip=[
                        'sn',
                        alt.Tooltip('Date:T', title='Date', format='%Y-%m-%d %H:%M:%S'),
                        alt.Tooltip(col, title=col),
                        alt.Tooltip('rank', title='Date Order'),
                        alt.Tooltip(stat_col[0], title='Part stat')
                    ],
                    size=alt.Size('rank:N', scale=alt.Scale(range=[50, 300]), legend=None),
                    color=alt.Color('rank:N', scale=alt.Scale(domain=list(range(1, filtered_df['rank'].max() + 1)), 
                                                              range=['red', 'yellow', 'black', 'orange', 'purple']), legend=alt.Legend(title='Date Order'))
                ).properties(title=f"{col} - sn: {sn} Highlight")
                combined_plot = base + highlight_plot

            # highlight for date range 
            elif start_date and end_date:
                highlight = filtered_df[filtered_df[stat_col[0]] == 'R']
                highlight_plot = alt.Chart(highlight).mark_circle(filled=True, size=100, opacity=0.7).encode(
                    x=x_col,
                    y=alt.Y(col, scale=alt.Scale(domain=[filtered_df[col].min(), filtered_df[col].max()])),
                    tooltip=[
                        'sn',
                        alt.Tooltip('Date:T', title='Date', format='%Y-%m-%d %H:%M:%S'),
                        alt.Tooltip(col, title=col),
                        'rank', stat_col[0]
                    ],
                    size=alt.Size('rank:N', scale=alt.Scale(range=[50, 300]),legend=None),
                    color=alt.Color('rank:N', scale=alt.Scale(domain=list(range(1, filtered_df['rank'].max() + 1)), range=['red', 'yellow', 'black', 'orange', 'purple']), legend=alt.Legend(title='Date Order'))                 
                ).properties(title=f"{col}")
                combined_plot = base + highlight_plot
            else:
                combined_plot = base
            
            # if max_lim_col in filtered_df.columns:
            #     step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(max_lim_col, title='Step Values'),size=alt.value(2))
            #     # combined_plot = base + highlight_plot
            #     combined_plot += step_chart
                
            # if min_lim_col in filtered_df.columns:
            #     step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(min_lim_col, title='Step Values'),size=alt.value(2)).properties(width=800, height=400)
            #     combined_plot += step_chart
            
            combined_plot = combined_plot.configure_range(category={'scheme': 'dark2'}).interactive().configure_axis(
                domain=True,
                domainOpacity=0.8,
                tickOpacity=0.7,
                grid=True,
                gridColor='gray',
                gridDash=[4, 2],
                gridOpacity=0.5,
                gridWidth=0.5,
                gridCap='round'
            )
            plots.append(combined_plot)
        

    return plots

system · January 1, 2025, 6:53pm

This topic was automatically closed 180 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
Maximum row length for Altair chart? Using Streamlit discussion	1	127	March 21, 2025
St.line_chart Using Streamlit	3	12577	November 19, 2021
Can I animate a scatter plot? Using Streamlit matplotlib	5	7534	November 19, 2021
Line in altair_chart is overflow in streamlit Using Streamlit	2	601	December 6, 2023
Appending to scatter chart sub-plots Using Streamlit matplotlib	5	4507	November 19, 2021

Adding and Displaying Limit Lines in Scatter Plots with Interactive Features in Altair and streamlit

Related topics

Hello there 👋🏻

Cookie settings

Strictly necessary cookies

Performance cookies

Functional cookies

Targeting cookies