Hi all,
I have a Streamlit app with:
- Uploading a DataFrame.
- A scatter plot allowing data point selection.
- A filtered DataFrame displayed below the selection.
Currently, changing the “Units” column in the df by typing in the change_unit text box and pressing confirm requires recreating the scatter plot. I’d like to update the column and corresponding plot dynamically.
Questions:
- Updating DataFrame and Scatter:
- Should I use Streamlit session state to store the DataFrame and plot for dynamic updates?
- How can I efficiently implement this approach?
- Preventing Scatter Refresh:
- How can I stop the scatter plot from refreshing the legend after each data point selection?
Additional Information:
- Please let me know if you need code snippets for the DataFrame creation, plot generation, selection handling, or text box/button implementation.
Desired Outcome:
- User enters text in the “Change Unit” box and clicks “Confirm”.
- DataFrame’s “Units” column is updated.
- Scatter plot reflects the updated data without redrawing.
- Legend remains static after data point selection.
Any help would be much appreciated!
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from streamlit_plotly_events import plotly_events
import plotly.io as pio
import os
import io
pio.templates.default = "plotly"
st.set_page_config(layout="wide")
st.set_option('deprecation.showPyplotGlobalUse', False)
st.session_state.selected_data = None # Initialize session state attribute
def main():
"""Semi Automated ML App with Streamlit"""
st.title("Geotechnical Data Analysis")
st.text("Using Streamlit == 1.11.0")
activities = ["EDA"]
choice = st.sidebar.selectbox("Select Activities", activities)
if choice == 'EDA':
st.subheader("Exploratory Data Analysis")
st.sidebar.subheader("Table options")
uploaded_file = st.file_uploader(
"Upload CSV with Location, Units and Depth as headers for full functionality", type=".csv")
use_example_file = st.checkbox("Use example file", False,
help="Use in-built example file to demo the app")
# Initialize session state if not already done
if 'selected_data' not in st.session_state:
st.session_state.selected_data = []
if 'changed_indices' not in st.session_state:
st.session_state.changed_indices = set()
if use_example_file:
uploaded_file = "iris_modified.csv"
if uploaded_file:
df = pd.read_csv(uploaded_file)
x = list(df.select_dtypes(include=[np.number]).columns)[0:]
y = list(df.select_dtypes(include=[np.number]).columns)[0:]
category1 = list(df.select_dtypes('object').columns[0:])
x_choice = st.sidebar.selectbox('Select your X-Axis:', x)
y_choice = st.sidebar.selectbox('Select your Y-Axis:', y)
category_choice = st.sidebar.selectbox('Select your Category:', category1)
Change_Unit = st.sidebar.text_input('Change Unit')
Confirm = st.sidebar.button('Confirm')
fig = px.scatter(df, x=x_choice, y=y_choice, color=category_choice, opacity=0.5,
render_mode='auto')
fig.update_yaxes(autorange="reversed")
fig.update_layout(xaxis={'side': 'top'}, xaxis_title=x_choice.title(), yaxis_title=y_choice.title(),
title=f'{y_choice.title()} vs {x_choice.title()}', title_x=0.0)
fig.update_layout(dragmode="lasso")
fig_selected = plotly_events(
fig,
select_event=True,
key="lasso_selection",
)
if fig_selected is not None:
st.session_state.selected_data = fig_selected
st.subheader("Selected Data Table")
if st.session_state.selected_data:
indices = [point['pointIndex'] for point in st.session_state.selected_data]
filtered_data = df.iloc[indices]
st.dataframe(filtered_data)
# Update the column in the selected rows
if Confirm:
st.session_state.changed_indices.update(indices)
df.loc[st.session_state.changed_indices, 'Units'] = Change_Unit
# Save the modified DataFrame to a CSV file
df.to_csv('modified_data.csv', index=False)
# Create a BytesIO buffer to store the CSV data
csv_buffer = io.StringIO()
# Save the DataFrame to the buffer as CSV
df.to_csv(csv_buffer, index=False)
# Get the value of the buffer as a string
csv_data = csv_buffer.getvalue()
# Display the download button
st.sidebar.download_button(
label="Download data as CSV",
data=csv_data,
file_name='large_df.csv',
mime='text/csv',
)
if __name__ == "__main__":
main()
type or paste code here