Adding and Displaying Limit Lines in Scatter Plots with Interactive Features in Altair and streamlit

  1. I am running the app locally
  2. Streamlit Version 1.35.0 and Python version: 3.10.4

The following code creates scatter plots of all variables and allows users to highlight specific data points with different colors and sizes. I added three lines to include lower and upper limit columns for each variable:

main_col = get_base_column_name(col)
max_lim_col = f'{main_col}MaxLim'
min_lim_col = f'{main_col}MinLim'

The columns are named in the format: col_1, col_1_MaxLim, col_1_MinLim, col_2, col_2_MaxLim, col_2_MinLim, etc. This part of the code adds a mark line for the maximum and minimum limit variables to the current plot. However, when using the interactive features such as zooming or moving the data points, the highlighted data points remain in the same position while the rest of the data points move:

if max_lim_col in filtered_df.columns:
	step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(max_lim_col, title='Step Values'),size=alt.value(2))
	combined_plot += step_chart
                
if min_lim_col in filtered_df.columns:
	step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(min_lim_col, title='Step Values'),size=alt.value(2)).properties(width=800, height=400)
	combined_plot += step_chart

The whole Code:

def generate_plot(data_option, df, x_col, columns, sn=None, start_date=None, end_date=None, hours_range=None, cs=None):
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    df["rgn"] = np.where(df['HDC'].str[0] == '3', "NML",np.where(df['HDC'].str[0] == 'B', "NM", "Unknown"))  
    lim_columns = [col for col in df.columns if col.endswith("MaxLim") or col.endswith("MinLim")]
    
    if sn:
        if sn[-1] == 'W':   
            temp_df = df[(df['sn'].str[-1] =='W')]
        else:
            temp_df = df[(df['sn'].str[-1] =='H')]
        df_sn = temp_df[temp_df['sn'] == sn]
        
        if not df_sn.empty and hours_range:
            s_date = df_sn['Date'].iloc[0]
            start_date = s_date - pd.Timedelta(hours=hours_range)
            end_date = s_date + pd.Timedelta(hours=hours_range)
            filtered_df = temp_df[(temp_df['Date'] >= start_date) & (temp_df['Date'] <= end_date)]
        else:
            st.write(f"sn {sn} not found!")
            filtered_df = temp_df
    else:
        temp_df = df.copy()
        if start_date:
            temp_df = temp_df[temp_df['Date'] >= pd.Timestamp(start_date)]
        if end_date:
            temp_df = temp_df[temp_df['Date'] <= pd.Timestamp(end_date)]
        filtered_df = temp_df
        if filtered_df.empty:
            st.write("No data found in this date range!")

    if cs:
        c_map = {
            "c 1": ["1", "2"],
            "c 2": ["3", "4"],
            "c 3": ["5", "6"],
            "c 4": ["7", "8"]
        }
        selected_c = [num for c in cs for num in c_map[c]]
        selected_cols = ["sn", "Date", 'HDC',"rank","stat","rgn"] + selected_c
        cols = filtered_df.columns[filtered_df.columns.str.contains("|".join(selected_cols))]
        filtered_df = filtered_df[cols]
        filtered_columns = [col for col in columns if any(f"{i}" in col for i in selected_c) or "stat" in col]
        stat_col = [col for col in filtered_columns if 'stat' in col]   
    else:
        filtered_columns = columns
    
    plots = []
    filtered_df['c'] = filtered_df['sn'].apply(lambda x: 3 if x.endswith('W') else (4 if x.endswith('H') else 0))
    filtered_df.reset_index(drop=True, inplace=True)
    if sn:
        output_file = f"anomalies_{data_option}.csv"  
        sigma_threshold = 1.5 
        window_size = 30              
        output = calculate_std(filtered_df, output_file,sn,window_size, sigma_threshold)
        st.dataframe(output[output['Sigma'] >= sigma_threshold])
        st.write("-" * 50)
    
    for col in filtered_columns:
        if not filtered_df[col].dropna().empty: 
            main_col = get_base_column_name(col)
            max_lim_col = f'{main_col}MaxLim'
            min_lim_col = f'{main_col}MinLim'
            title = f"{col} - sn: {sn}" if sn else f"{col}"
            dmax = max_lim_col if max_lim_col in filtered_df.columns else filtered_df[col].max()
            dmin = min_lim_col if min_lim_col in filtered_df.columns else filtered_df[col].min()     
            base = alt.Chart(filtered_df).mark_point(size=20).encode(
                x=x_col,
                y=alt.Y(col, scale=alt.Scale(domain=[dmin, dmax])),
                tooltip=['sn', 
                         alt.Tooltip('Date:T', title='Date', format='%Y-%m-%d %H:%M:%S'), 
                         col, 
                         'rgn'],
                shape=alt.Shape('rgn:N', scale=alt.Scale(domain=['NM', 'NML'], range=['triangle', 'diamond']),legend=alt.Legend(title='Country')),
                stroke = alt.Stroke('c:N', scale=alt.Scale(range=['blue', 'green']), legend=None)
            ).properties(title=title, width=800, height=400)
  
            # highlight specific sn
            if sn:
                highlight = filtered_df[filtered_df['sn'] == sn]
                highlight_plot = alt.Chart(highlight).mark_circle(filled=True, size=100, opacity=0.7).encode(
                    x=x_col,
                    y=alt.Y(col, scale=alt.Scale(domain=[filtered_df[col].min(), filtered_df[col].max()])),
                    tooltip=[
                        'sn',
                        alt.Tooltip('Date:T', title='Date', format='%Y-%m-%d %H:%M:%S'),
                        alt.Tooltip(col, title=col),
                        alt.Tooltip('rank', title='Date Order'),
                        alt.Tooltip(stat_col[0], title='Part stat')
                    ],
                    size=alt.Size('rank:N', scale=alt.Scale(range=[50, 300]), legend=None),
                    color=alt.Color('rank:N', scale=alt.Scale(domain=list(range(1, filtered_df['rank'].max() + 1)), 
                                                              range=['red', 'yellow', 'black', 'orange', 'purple']), legend=alt.Legend(title='Date Order'))
                ).properties(title=f"{col} - sn: {sn} Highlight")
                combined_plot = base + highlight_plot

            # highlight for date range 
            elif start_date and end_date:
                highlight = filtered_df[filtered_df[stat_col[0]] == 'R']
                highlight_plot = alt.Chart(highlight).mark_circle(filled=True, size=100, opacity=0.7).encode(
                    x=x_col,
                    y=alt.Y(col, scale=alt.Scale(domain=[filtered_df[col].min(), filtered_df[col].max()])),
                    tooltip=[
                        'sn',
                        alt.Tooltip('Date:T', title='Date', format='%Y-%m-%d %H:%M:%S'),
                        alt.Tooltip(col, title=col),
                        'rank', stat_col[0]
                    ],
                    size=alt.Size('rank:N', scale=alt.Scale(range=[50, 300]),legend=None),
                    color=alt.Color('rank:N', scale=alt.Scale(domain=list(range(1, filtered_df['rank'].max() + 1)), range=['red', 'yellow', 'black', 'orange', 'purple']), legend=alt.Legend(title='Date Order'))                 
                ).properties(title=f"{col}")
                combined_plot = base + highlight_plot
            else:
                combined_plot = base
            
            # if max_lim_col in filtered_df.columns:
            #     step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(max_lim_col, title='Step Values'),size=alt.value(2))
            #     # combined_plot = base + highlight_plot
            #     combined_plot += step_chart
                
            # if min_lim_col in filtered_df.columns:
            #     step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(min_lim_col, title='Step Values'),size=alt.value(2)).properties(width=800, height=400)
            #     combined_plot += step_chart
            
            combined_plot = combined_plot.configure_range(category={'scheme': 'dark2'}).interactive().configure_axis(
                domain=True,
                domainOpacity=0.8,
                tickOpacity=0.7,
                grid=True,
                gridColor='gray',
                gridDash=[4, 2],
                gridOpacity=0.5,
                gridWidth=0.5,
                gridCap='round'
            )
            plots.append(combined_plot)
        

    return plots

This topic was automatically closed 180 days after the last reply. New replies are no longer allowed.