Selectbox BUG on "chaining" forms!

@Jessica_Smith @andfanilo @thiago @okld @vdonato @kmcgrady @Charly_Wargnier @asehmi

import streamlit as st
from streamlit_pandas_profiling import st_profile_report
from streamlit import session_state as session

import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport

def cluster_duplicates(df, col_name, dis_num, dis_non_alphanum, sim, aff):
    st.write(col_name)
    st.write(df.head())
    st.write(df[col_name].unique())

def profiler(file, delim):
    file = st.session_state.upload
    delimiter = st.session_state.delim.split(" ")[1][1:-1]
    df = pd.read_csv(file, sep=delimiter, engine="python")
    file_info = {"Filename": file.name, "FileType": file.type, "FileSize": file.size}
    st.write(file_info)
    pr = ProfileReport(df, explorative=True)
    st_profile_report(pr)

    with st.form(key="cluster_duplicates"):
        cols = [val for val in df.columns]
        col_name = st.selectbox("Select column for clustering", cols, key="col_name")
        dis_num = st.checkbox("discard_numeric", key="dis_num")
        dis_non_alphanum = st.checkbox("discard_nonalpha_numeric", key="dis_non_alphanum")
        similarity = st.radio(label="Select Similarity Measure",
                              options=["levenshtein (recommended)", "cosine", "jaro_winkler", "trigram",
                                       "levenshtein_partial"], key="similarity")
        affinity = st.radio(label="Select Distance Measure",
                       options=["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"], key="affinity")
        method_args = (df, session.col_name, session.dis_num, session.dis_non_alphanum, session.similarity, session.affinity)
        submit_btn = st.form_submit_button(label = "Cluster Duplicates", on_click=cluster_duplicates,
                                           args=method_args)

def data_uploader_form():
   with st.form(key="file_upload"):
       data_file = st.file_uploader("Upload File", type=['csv', 'xlsx'], key="upload")
       delim_list = ["pipe (|)", r"tab (\t)", "comma (,)", "semicolon (;)"]
       delim = st.selectbox("Select File Seperator/Delimiter", delim_list, key="delim")
       submit_file_btn = st.form_submit_button(label='Profile Data', on_click=profiler, args=(session.upload, session.delim))

if __name__ =="__main__":
    #st.set_page_config(layout="wide")
    st.write("Data Profiler :wave:")
    data_uploader_form()

After submitting the 2nd form (key = “cluster_duplicates”)–it always fetches the first argument in the 2nd selectbox even when I select another one
See gif below-- I selected “Variety” in the 2nd selectbox but it still selects “Review #”
output

Hi @kart2k15,

I believe the problem you’re encountering is you’re nesting forms, which isn’t allowed. The nesting in your code occurs implicitly via the callback chain, and Streamlit doesn’t warn you under that nesting modality. The solution I found was to lift forms chaining up to the top level (into main) and control the forms invocation using session state variables.

Here’s a your code sample which I adjusted to work that way. (Note, I put the profile report gen behind a memo cache and set minimal reporting to help speed it up, as it was taking way to long on some large files I used.)

import streamlit as st
from streamlit_pandas_profiling import st_profile_report
from streamlit import session_state as session

import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport

if 'data_uploader_submitted' not in session:
    session.data_uploader_submitted = False
if 'cluster_duplicates_submitted' not in session:
    session.cluster_duplicates_submitted = False

def cluster_duplicates(df, col_name, dis_num, dis_non_alphanum, sim, aff):
    st.write(col_name)
    st.write(df.head())
    st.write(df[col_name].unique())

@st.experimental_memo(show_spinner=True, persist='disk')
def get_profile_report(file_info, df):
    pr = ProfileReport(df, explorative=True, lazy=True, minimal=True)
    return pr

def profiler(file, delim):
    file = st.session_state.upload
    delimiter = st.session_state.delim.split(" ")[1][1:-1]
    df = pd.read_csv(file, sep=delimiter, engine="python")
    session['df'] = df
    file_info = {"Filename": file.name, "FileType": file.type, "FileSize": file.size}
    st.write(file_info)
    pr = get_profile_report(file_info, df)
    st_profile_report(pr)

    cluster_duplicates_form = st.form(key="cluster_duplicates")
    with cluster_duplicates_form:
        cols = [val for val in df.columns]
        col_name = st.selectbox("Select column for clustering", cols, key="col_name")
        dis_num = st.checkbox("discard_numeric", key="dis_num")
        dis_non_alphanum = st.checkbox("discard_nonalpha_numeric", key="dis_non_alphanum")
        similarity = st.radio(label="Select Similarity Measure",
                              options=["levenshtein (recommended)", "cosine", "jaro_winkler", "trigram",
                                       "levenshtein_partial"], key="similarity")
        affinity = st.radio(label="Select Distance Measure",
                       options=["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"], key="affinity")
        if cluster_duplicates_form.form_submit_button(label = "Cluster Duplicates"):
            session.cluster_duplicates_submitted = True

def data_uploader_form():
    file_upload_form = st.form(key="file_upload")
    with file_upload_form:
        data_file = st.file_uploader("Upload File", type=['csv', 'xlsx'], key="upload")
        delim_list = ["pipe (|)", r"tab (\t)", "comma (,)", "semicolon (;)"]
        delim = st.selectbox("Select File Seperator/Delimiter", delim_list, key="delim")
        if file_upload_form.form_submit_button(label='Profile Data'):
            session.data_uploader_submitted = True

if __name__ =="__main__":
    #st.set_page_config(layout="wide")
    st.write("Data Profiler :wave:")
    data_uploader_form()
    if session.data_uploader_submitted:
        profiler(session.upload, session.delim)
    if session.cluster_duplicates_submitted:
        method_args = (session.df, session.col_name, session.dis_num, session.dis_non_alphanum, session.similarity, session.affinity)
        print(method_args)
        print(list(session.items()))
        cluster_duplicates(*method_args)
2 Likes

I got it resolved here: Forms/callback "chaining" not working:-- · Issue #4164 · streamlit/streamlit · GitHub

And yes you can chain forms/callbacks & create “dynamic” session variables, no need to create global session variables such as “data_uploader_submitted” or “cluster_duplicates_submitted”

Anyways thanks for taking a look into it, and KUDOS to @vdonato, who helped me with this promptly

So, where’s the error in your original post’s code above? And do you want successive forms to layer above each other or below each other? Please post the corrected solution. Thanks.

in the method_args variable, i’m “accessing” the session_state value BEFORE the button is clicked rather than then AFTER. My problem at the “base-level” has been chaining forms/callbacks

Read these two issues for the “full-story”:–

The “correct” solution is:–

import pandas as pd
import streamlit as st
from streamlit import session_state as session

def cluster_duplicates(df):
    st.write(session.col_name)#prints the column passed in the 2nd form GOOD
    st.write(df)
def profiler():
    df = pd.read_csv(session.upload, sep=session.delim, engine="python")
    if 'dataframe' not in session:
        session['dataframe']=df
    file_info = {"Filename": session.upload.name, "FileType": session.upload.type, "FileSize": session.upload.size}
    st.write(file_info)
    st.write(df.head())

    with st.form(key="cluster_duplicates"):
        cols = list(df.columns)
        col_name = st.selectbox('Enter Column Name', cols, key="col_name")
#if one passes args=(df) ==> args =df, when args should be a TUPLE thus args = (df,)
        submit_btn = st.form_submit_button(label = "Cluster Duplicates", on_click=cluster_duplicates, args=(df,))

def data_uploader_form():
   with st.form(key="file_upload"):
       data_file = st.file_uploader("Upload File", type=['csv', 'xlsx'], key="upload")
       delim_list = ["|", r"\t", ",", ";"]
       delim = st.selectbox("Select File Seperator/Delimiter", delim_list, key="delim")
       submit_file_btn = st.form_submit_button(label='Profile Data', on_click=profiler)

if __name__ =="__main__":
    #st.set_page_config(layout="wide")
    st.write("Data Profiler :wave:")
    data_uploader_form()

“cluster_duplicates” is chained AFTER “profiler”. Also, profiler has no “args”, but “cluster_duplicates” has one–which NEEDS to be passed like this (df,), I passed it as args=(df) and it threw an error–because it evaluated to args=df

Good it’s solved to your satisfaction. Stylistically, I prefer to have a top-level controller and then form “views” which get laid out on the page in the order they were called. With just two forms, though, callback chaining is manageable and won’t result in a tangle, but the layout order is inverted, and in your solution the profile report and forms disappear when cluster_duplicates is finally called (perhaps this is what you want).