- I am running the app locally
- Streamlit Version 1.35.0 and Python version: 3.10.4
The following code creates scatter plots of all variables and allows users to highlight specific data points with different colors and sizes. I added three lines to include lower and upper limit columns for each variable:
main_col = get_base_column_name(col)
max_lim_col = f'{main_col}MaxLim'
min_lim_col = f'{main_col}MinLim'
The columns are named in the format: col_1, col_1_MaxLim, col_1_MinLim, col_2, col_2_MaxLim, col_2_MinLim, etc. This part of the code adds a mark line for the maximum and minimum limit variables to the current plot. However, when using the interactive features such as zooming or moving the data points, the highlighted data points remain in the same position while the rest of the data points move:
if max_lim_col in filtered_df.columns:
step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(max_lim_col, title='Step Values'),size=alt.value(2))
combined_plot += step_chart
if min_lim_col in filtered_df.columns:
step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(min_lim_col, title='Step Values'),size=alt.value(2)).properties(width=800, height=400)
combined_plot += step_chart
The whole Code:
def generate_plot(data_option, df, x_col, columns, sn=None, start_date=None, end_date=None, hours_range=None, cs=None):
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df["rgn"] = np.where(df['HDC'].str[0] == '3', "NML",np.where(df['HDC'].str[0] == 'B', "NM", "Unknown"))
lim_columns = [col for col in df.columns if col.endswith("MaxLim") or col.endswith("MinLim")]
if sn:
if sn[-1] == 'W':
temp_df = df[(df['sn'].str[-1] =='W')]
else:
temp_df = df[(df['sn'].str[-1] =='H')]
df_sn = temp_df[temp_df['sn'] == sn]
if not df_sn.empty and hours_range:
s_date = df_sn['Date'].iloc[0]
start_date = s_date - pd.Timedelta(hours=hours_range)
end_date = s_date + pd.Timedelta(hours=hours_range)
filtered_df = temp_df[(temp_df['Date'] >= start_date) & (temp_df['Date'] <= end_date)]
else:
st.write(f"sn {sn} not found!")
filtered_df = temp_df
else:
temp_df = df.copy()
if start_date:
temp_df = temp_df[temp_df['Date'] >= pd.Timestamp(start_date)]
if end_date:
temp_df = temp_df[temp_df['Date'] <= pd.Timestamp(end_date)]
filtered_df = temp_df
if filtered_df.empty:
st.write("No data found in this date range!")
if cs:
c_map = {
"c 1": ["1", "2"],
"c 2": ["3", "4"],
"c 3": ["5", "6"],
"c 4": ["7", "8"]
}
selected_c = [num for c in cs for num in c_map[c]]
selected_cols = ["sn", "Date", 'HDC',"rank","stat","rgn"] + selected_c
cols = filtered_df.columns[filtered_df.columns.str.contains("|".join(selected_cols))]
filtered_df = filtered_df[cols]
filtered_columns = [col for col in columns if any(f"{i}" in col for i in selected_c) or "stat" in col]
stat_col = [col for col in filtered_columns if 'stat' in col]
else:
filtered_columns = columns
plots = []
filtered_df['c'] = filtered_df['sn'].apply(lambda x: 3 if x.endswith('W') else (4 if x.endswith('H') else 0))
filtered_df.reset_index(drop=True, inplace=True)
if sn:
output_file = f"anomalies_{data_option}.csv"
sigma_threshold = 1.5
window_size = 30
output = calculate_std(filtered_df, output_file,sn,window_size, sigma_threshold)
st.dataframe(output[output['Sigma'] >= sigma_threshold])
st.write("-" * 50)
for col in filtered_columns:
if not filtered_df[col].dropna().empty:
main_col = get_base_column_name(col)
max_lim_col = f'{main_col}MaxLim'
min_lim_col = f'{main_col}MinLim'
title = f"{col} - sn: {sn}" if sn else f"{col}"
dmax = max_lim_col if max_lim_col in filtered_df.columns else filtered_df[col].max()
dmin = min_lim_col if min_lim_col in filtered_df.columns else filtered_df[col].min()
base = alt.Chart(filtered_df).mark_point(size=20).encode(
x=x_col,
y=alt.Y(col, scale=alt.Scale(domain=[dmin, dmax])),
tooltip=['sn',
alt.Tooltip('Date:T', title='Date', format='%Y-%m-%d %H:%M:%S'),
col,
'rgn'],
shape=alt.Shape('rgn:N', scale=alt.Scale(domain=['NM', 'NML'], range=['triangle', 'diamond']),legend=alt.Legend(title='Country')),
stroke = alt.Stroke('c:N', scale=alt.Scale(range=['blue', 'green']), legend=None)
).properties(title=title, width=800, height=400)
# highlight specific sn
if sn:
highlight = filtered_df[filtered_df['sn'] == sn]
highlight_plot = alt.Chart(highlight).mark_circle(filled=True, size=100, opacity=0.7).encode(
x=x_col,
y=alt.Y(col, scale=alt.Scale(domain=[filtered_df[col].min(), filtered_df[col].max()])),
tooltip=[
'sn',
alt.Tooltip('Date:T', title='Date', format='%Y-%m-%d %H:%M:%S'),
alt.Tooltip(col, title=col),
alt.Tooltip('rank', title='Date Order'),
alt.Tooltip(stat_col[0], title='Part stat')
],
size=alt.Size('rank:N', scale=alt.Scale(range=[50, 300]), legend=None),
color=alt.Color('rank:N', scale=alt.Scale(domain=list(range(1, filtered_df['rank'].max() + 1)),
range=['red', 'yellow', 'black', 'orange', 'purple']), legend=alt.Legend(title='Date Order'))
).properties(title=f"{col} - sn: {sn} Highlight")
combined_plot = base + highlight_plot
# highlight for date range
elif start_date and end_date:
highlight = filtered_df[filtered_df[stat_col[0]] == 'R']
highlight_plot = alt.Chart(highlight).mark_circle(filled=True, size=100, opacity=0.7).encode(
x=x_col,
y=alt.Y(col, scale=alt.Scale(domain=[filtered_df[col].min(), filtered_df[col].max()])),
tooltip=[
'sn',
alt.Tooltip('Date:T', title='Date', format='%Y-%m-%d %H:%M:%S'),
alt.Tooltip(col, title=col),
'rank', stat_col[0]
],
size=alt.Size('rank:N', scale=alt.Scale(range=[50, 300]),legend=None),
color=alt.Color('rank:N', scale=alt.Scale(domain=list(range(1, filtered_df['rank'].max() + 1)), range=['red', 'yellow', 'black', 'orange', 'purple']), legend=alt.Legend(title='Date Order'))
).properties(title=f"{col}")
combined_plot = base + highlight_plot
else:
combined_plot = base
# if max_lim_col in filtered_df.columns:
# step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(max_lim_col, title='Step Values'),size=alt.value(2))
# # combined_plot = base + highlight_plot
# combined_plot += step_chart
# if min_lim_col in filtered_df.columns:
# step_chart = alt.Chart(filtered_df).mark_line(interpolate='cardinal-closed', color='red', strokeDash=[5, 5]).encode(x=col,y=alt.Y(min_lim_col, title='Step Values'),size=alt.value(2)).properties(width=800, height=400)
# combined_plot += step_chart
combined_plot = combined_plot.configure_range(category={'scheme': 'dark2'}).interactive().configure_axis(
domain=True,
domainOpacity=0.8,
tickOpacity=0.7,
grid=True,
gridColor='gray',
gridDash=[4, 2],
gridOpacity=0.5,
gridWidth=0.5,
gridCap='round'
)
plots.append(combined_plot)
return plots