I tried running my streamlit app both locally and when deployed but it is so slow and it can’t process its supposed function. I’d like to ask for help to know what’s wrong
Here’s my app:
import streamlit as st
import pandas as pd
import numpy as np
st.title("Merge Master Sample (Products to Merge)")
# Upload CSV
csv_file = st.file_uploader("Upload csv file here:")
if csv_file is not None:
csv_file = pd.read_csv(csv_file)
st.subheader("Preview of Uploaded File")
st.dataframe(csv_file)
# Applicability Symbol Input
applicability_symbol = st.text_input("Input used applicability symbol:")
# Step 1 -- Total of Applicable Parts per Product -- Additional Column
def count_occurrences(row, value):
return (row==value).sum()
# Apply the custom function to each row of the DataFrame
csv_file['Total Applicable C'] = csv_file.apply(lambda row: count_occurrences(row, applicability_symbol), axis=1)
# Step 2 -- Delete all rows with the maximum value in the Total Applicable column
drop_row_condition = csv_file['Total Applicable C'] == csv_file['Total Applicable C'].max()
csv_file = csv_file[~drop_row_condition]
# Step 3 -- Total of Applicable Products per Part -- Additional Row
# Transpose dataframe
csv_file = csv_file.transpose()
# Add another column -- Total Applicable R
def count_occurrences(row, value):
return (row==value).sum()
# Apply the custom function to each row of the DataFrame
csv_file['Total Applicable R'] = csv_file.apply(lambda row: count_occurrences(row, applicability_symbol), axis=1)
# Transpose dataframe
csv_file = csv_file.transpose()
# Step 4 - Print the column name of the row with highest value of Total Applicable R
mother_product = csv_file.iloc[-1].idxmax()
# Step 5 -- Drop unnecessary columns
# Multiselect box to choose columns to drop
columns_to_drop = st.multiselect("Select columns to drop:", csv_file.columns)
branch_column = st.selectbox("Select Branch Column:", csv_file.columns)
# Drop selected columns
csv_file = csv_file.drop(columns=columns_to_drop)
# Step 6 -- Final, Final
# Initialize a list to store the deleted values in the 'Part' column and the chosen column name for each iteration
deleted_values_per_iteration = []
# Continue the process until the DataFrame is empty
while not csv_file.empty:
# Identify the column with the highest count of non-NaN values
column_with_max_count = csv_file.iloc[:, 1:].count().idxmax()
# Remove rows with non-NaN values in the identified column
deleted_values = csv_file.loc[~csv_file[column_with_max_count].isna(), branch_column].tolist()
# Append the chosen column name to the list
deleted_values_per_iteration.append({'Column': column_with_max_count, 'DeletedValues': deleted_values})
# Update the DataFrame by removing rows with non-NaN values in the identified column
csv_file = csv_file[csv_file[column_with_max_count].isna()]
# Display the deleted values and the chosen column for each iteration
st.title("Products Needed to Form the Merge Master Sample")
for i, values_dict in enumerate(deleted_values_per_iteration, start=1):
column_name = values_dict['Column']
deleted_values = values_dict['DeletedValues']
# Highlight the first iteration as the "Mother Product"
if i == 1:
st.subheader(f'{i}: Mother Product: {column_name} \n Branches: {deleted_values}')
else:
st.subheader(f'{i}: Additional Product: {column_name} \n Branches: {deleted_values}')