I would like to implement a stateful ML app that doesnt rerun for every intreaction with a widget.
Flow of app: Step 1: Enter filepath and upload data onto dataframe on click of a button. Step 2: Show sample data Step 3: Show descriptive stats Step 4: Plot histogram for selected feature dynamically on selection from selectbox.
When I select a new variable apart from the default selection for step 4 plot, the entire script reruns. How do I save state information such that when I am at step 4, everything above this doesnt get rerun i.e step1,2,3 shouldnt be called again. Only what I am interacting with in step 4. Kindly help me out.
import numpy as np
import pandas as pd
import sklearn as sk
import matplotlib.pyplot as plt
import streamlit as st
import pyspark
from pyspark import *
from PIL import Image
from io import StringIO
import st_state_patch
def load_data(ss,uploaded_file):
df = ss.read.format('csv').option('header','true').load(uploaded_file)
return df
def sample_data(df,widget):
df_sample = pd.DataFrame(df.head(5))
df_sample.columns = df.columns
widget.dataframe(df_sample)
def descriptive_stats(df,widget):
df_desc = df.summary().toPandas()
widget.dataframe(df_desc)
def hist_plot(df,col,widget):
df_plot = df.select(col).toPandas().iloc[:,0]
fig, ax = plt.subplots()
ax.hist(df_plot,density = False, bins = 50)
widget.pyplot(fig)
def main():
sparkapp = pyspark.sql.SparkSession.builder.master('local[4]').appName('No-code Spark Pipeline').getOrCreate()
df = pd.DataFrame()
st.title("No-Code ML Spark Pipeline")
st.subheader('1. Upload file (csv)')
uploaded_file = st.text_input("Provide local file path")
upload_button1 = st.button('Upload')
st.caption('Sample data')
upload_cont1 = st.empty()
white_background = Image.open('C:/Users/hp/Desktop/white_600_240.png')
upload_cont1.image(white_background)
# Call load data
if upload_button1:
df = load_data(sparkapp,uploaded_file)
sample_data(df, upload_cont1)
# Call sample data
st.subheader('2. Exploratory Data Analytics')
st.caption('Descriptive statistics')
eda_cont1 = st.empty()
eda_cont1.image(white_background,use_column_width=True)
# Call descriptive stats
if upload_button1:
descriptive_stats(df, eda_cont1)
st.caption('Histogram / Frequency plot')
eda_sel_feat = st.selectbox('Select feature to be displayed', options = df.columns)
eda_cont2 = st.empty()
eda_cont2.image(white_background,use_column_width=True)
# Call hist plot
if upload_button1:
hist_plot(df,eda_sel_feat,eda_cont2)
if __name__ == '__main__':
main()