@Jessica_Smith @andfanilo @thiago @okld @vdonato @kmcgrady @Charly_Wargnier @asehmi
import streamlit as st
from streamlit_pandas_profiling import st_profile_report
from streamlit import session_state as session
import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport
def cluster_duplicates(df, col_name, dis_num, dis_non_alphanum, sim, aff):
st.write(col_name)
st.write(df.head())
st.write(df[col_name].unique())
def profiler(file, delim):
file = st.session_state.upload
delimiter = st.session_state.delim.split(" ")[1][1:-1]
df = pd.read_csv(file, sep=delimiter, engine="python")
file_info = {"Filename": file.name, "FileType": file.type, "FileSize": file.size}
st.write(file_info)
pr = ProfileReport(df, explorative=True)
st_profile_report(pr)
with st.form(key="cluster_duplicates"):
cols = [val for val in df.columns]
col_name = st.selectbox("Select column for clustering", cols, key="col_name")
dis_num = st.checkbox("discard_numeric", key="dis_num")
dis_non_alphanum = st.checkbox("discard_nonalpha_numeric", key="dis_non_alphanum")
similarity = st.radio(label="Select Similarity Measure",
options=["levenshtein (recommended)", "cosine", "jaro_winkler", "trigram",
"levenshtein_partial"], key="similarity")
affinity = st.radio(label="Select Distance Measure",
options=["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"], key="affinity")
method_args = (df, session.col_name, session.dis_num, session.dis_non_alphanum, session.similarity, session.affinity)
submit_btn = st.form_submit_button(label = "Cluster Duplicates", on_click=cluster_duplicates,
args=method_args)
def data_uploader_form():
with st.form(key="file_upload"):
data_file = st.file_uploader("Upload File", type=['csv', 'xlsx'], key="upload")
delim_list = ["pipe (|)", r"tab (\t)", "comma (,)", "semicolon (;)"]
delim = st.selectbox("Select File Seperator/Delimiter", delim_list, key="delim")
submit_file_btn = st.form_submit_button(label='Profile Data', on_click=profiler, args=(session.upload, session.delim))
if __name__ =="__main__":
#st.set_page_config(layout="wide")
st.write("Data Profiler :wave:")
data_uploader_form()
After submitting the 2nd form (key = “cluster_duplicates”)–it always fetches the first argument in the 2nd selectbox even when I select another one
See gif below-- I selected “Variety” in the 2nd selectbox but it still selects “Review #”