Hey everyone! Brand new programmer here using streamlit to make an app that helps you understand US electricity cost growth over time. I used Matlab in the past while in mechanical engineering school, but thatās been about a decade. Streamlit is such an awesome tool for folks like me who want to produce something usable and donāt want to learn both a back end and front end language/framework.
So hereās my question:
Iād like the app to produce a URL with the current inputs so that you could easily share your results and then the app would interpret that URL to populate those same inputs.
Iāve played around with st.experimental_get_query_params and st.experimental_set_query_params a bunch, to no avail. Pretty confused with how to use it. Anyone want to help take a crack at it with me? I feel like this is a pretty simple thing that Iām just too green to feel confident enough doing.
The six inputs that matter are:
- states (a multiselect of state abbreviations)
- ownership_types (a multiselect of words/phrases such as āMunicipalā or āInvestor Ownedā
- utilities (a multiselect of electricity utility names such as āPacific Gas & Electric Co.ā or āSouthern California Edison Coā
- service_types (a multiselect of āBundledā, āDeliveryā, or āEnergyā)
- rate_types (a multiselect of āResidential Rateā, āCommercial Rateā, or āIndustrial Rateā)
- year_range (a slider that generates start_year and end_year)
Hereās my current version of the app, without any permalink functionality:
www.wattarchive.com
This is my code:
import streamlit as st
import pandas as pd
import plotly.express as px
import base64
import jsonSet the page layout to wide
st.set_page_config(layout=ācenteredā)
Hide menu bars CSS
hide_default_format = āā"
#MainMenu {visibility: hidden; }
footer {visibility: hidden;}
āā"Function to load the data from the CSV file
@st.cache_data
def load_data():
data = pd.read_csv(āEIA861_sales_ult_cust_1990_2021.csvā)
return datadef calculate_cagr(group):
try:
initial_value = group.iat[0]
final_value = group.iat[-1]
num_years = len(group) - 1
cagr_value = (final_value / initial_value) ** (1/num_years) - 1
return ā{:.2%}ā.format(cagr_value)
except ZeroDivisionError:
return ā0.00%āMain function to run the application
def main():
# Load the data
df = load_data()# Create a mapping of Utility Number to a single Utility Name utility_name_mapping = df.groupby('Utility Number')['Utility Name'].first() # Set the title of the application st.title('š WattArchive') # Simple explainer st.caption('Search and compare average electricity costs over time by utility, service type, and customer class.') st.header('Search') tab1, tab2 = st.tabs(["Utilities", "Data Types"]) with tab1: # Allow user to filter by state and ownership before selecting utility states = st.multiselect("Filter by states", sorted(df['State'].unique())) ownership_types = st.multiselect("Filter by ownership types", df['Ownership'].unique()) # Filter the DataFrame based on the selected states and ownership types df_filtered = df.copy() if states: df_filtered = df_filtered[df_filtered['State'].isin(states)] if ownership_types: df_filtered = df_filtered[df_filtered['Ownership'].isin(ownership_types)] # Allow user to select utilities from the filtered data if 'utilities' in st.session_state: utilities = st.multiselect("Select utilities", df_filtered['Utility Name'].unique(), default=st.session_state.utilities) else: utilities = st.multiselect("Select utilities", df_filtered['Utility Name'].unique()) st.session_state.utilities = utilities with tab2: # Allow user to select service types service_types = df_filtered['Service Type'].unique() selected_service_types = st.multiselect("Service Types", service_types) # Allow user to select rate types rate_types = ['Residential Rate', 'Commercial Rate', 'Industrial Rate'] selected_rates = st.multiselect("Rate types", rate_types) # Year range selection if not df_filtered.empty: year_range = st.slider("Year Range", int(df_filtered["Data Year"].min()), int(df_filtered["Data Year"].max()), (int(df_filtered["Data Year"].min()), int(df_filtered["Data Year"].max()))) start_year, end_year = year_range df_filtered = df_filtered[(df_filtered['Data Year'] >= start_year) & (df_filtered['Data Year'] <= end_year)] if utilities and selected_rates and selected_service_types: # Get the utility numbers for the selected utilities utility_numbers = df_filtered[df_filtered['Utility Name'].isin(utilities)]['Utility Number'].unique() # Further refine DataFrame based on the selected data df_selected = df_filtered[df_filtered['Utility Number'].isin(utility_numbers)] df_selected = df_selected[df_selected['Service Type'].isin(selected_service_types)] df_selected = df_selected[['Data Year', 'Utility Number', 'Utility Name', 'Service Type'] + selected_rates] # Sort the DataFrame by 'Data Year' df_selected.sort_values(by='Data Year', inplace=True) # Replace 'Utility Name' in the selected data with the single name from our mapping df_selected['Utility Name'] = df_selected['Utility Number'].map(utility_name_mapping) # Prepare the data for plotting df_melted = df_selected.melt(id_vars=['Data Year', 'Utility Name', 'Service Type'], value_vars=selected_rates, var_name='Rate Type', value_name='Rate') # Combine 'Utility Name', 'Rate Type', 'Service Type' into one column for the legend df_melted['Legend'] = df_melted['Utility Name'] + ', ' + df_melted['Rate Type'] + ', ' + df_melted['Service Type'] # Pivot the DataFrame for use with st.line_chart pivot_df = df_melted.pivot_table(index='Data Year', columns='Legend', values='Rate') # Calc CAGR st.header('KPIs') cagr_values = {} for rate_type in selected_rates: if rate_type in df_selected.columns: cagr_values[rate_type] = df_selected.groupby(['Utility Name', 'Service Type'])[rate_type].apply(calculate_cagr) cagr_df = pd.DataFrame(cagr_values).reset_index() # Melt cagr_df to long format cagr_df = cagr_df.melt(id_vars=['Utility Name', 'Service Type'], value_vars=list(cagr_values.keys()), var_name='Rate Type', value_name='CAGR') # Calculate earliest rate for each unique combination of utility, service type, and rate type earliest_year = df_selected['Data Year'].min() earliest_rate_values = df_selected[df_selected['Data Year'] == earliest_year].groupby(['Utility Name', 'Service Type'])[selected_rates].min() earliest_rate_df = earliest_rate_values.reset_index().melt(id_vars=['Utility Name', 'Service Type'], value_vars=selected_rates, var_name='Rate Type', value_name='Earliest Rate') # Calculate percentile for the most recent year for each unique combination of utility, service type, and rate type most_recent_year = df_selected['Data Year'].max() df_most_recent_year = df[df['Data Year'] == most_recent_year] df_most_recent_year = df_most_recent_year.melt(id_vars=['Data Year', 'Utility Number', 'Service Type'], value_vars=rate_types, var_name='Rate Type', value_name='Rate') df_most_recent_year['Percentile Rank'] = df_most_recent_year.groupby(['Service Type', 'Rate Type'])['Rate'].rank(pct=True) # Filter this DataFrame to show only the rows that match the user's selections df_rankings = df_most_recent_year[df_most_recent_year['Utility Number'].isin(df_selected['Utility Number'].unique())] df_rankings = df_rankings[df_rankings['Service Type'].isin(selected_service_types)] df_rankings = df_rankings[df_rankings['Rate Type'].isin(selected_rates)] df_rankings['Utility Name'] = df_rankings['Utility Number'].map(utility_name_mapping) # map utility number to utility name df_rankings.drop(columns=['Utility Number'], inplace=True) # now you can drop the utility number df_rankings['Data Year'] = df_rankings['Data Year'].apply(lambda x: '{:,}'.format(x)) # Remove comma from 'Data Year' df_rankings['Percentile Rank'] = df_rankings['Percentile Rank'].apply(lambda x: '{:.1f}'.format(x * 100)) # Format 'Percentile Rank' as one decimal place # Merge df_rankings, cagr_df, and earliest_rate_df and reformat df_kpis = df_rankings.merge(cagr_df, on=['Utility Name', 'Service Type', 'Rate Type'], how='left') df_kpis = df_kpis.merge(earliest_rate_df, on=['Utility Name', 'Service Type', 'Rate Type'], how='left') df_kpis.drop(columns=['Data Year'], inplace=True) # Drop 'Data Year' df_kpis.rename(columns={'Rate': 'Latest Rate', 'Percentile Rank': 'Percentile'}, inplace=True) # Rename 'Rate' and 'Percentile Rank' df_kpis = df_kpis[['Utility Name', 'Service Type', 'Rate Type', 'Percentile', 'CAGR', 'Latest Rate', 'Earliest Rate']] # Reorder columns df_kpis['Percentile'] = df_kpis['Percentile'].apply(lambda x: str(x) + '%') # add % symbol to percentile st.dataframe(df_kpis) # Drop 'Utility Number' as it's not needed anymore df_selected.drop(columns=['Utility Number'], inplace=True) # remove commma from data year df_selected['Data Year'] = df_selected['Data Year'].apply(lambda x: '{:,}'.format(x)) # Remove comma from 'Data Year' st.header('Historical Rates') tab1, tab2, tab3 = st.tabs(["Chart", "Table", "Download"]) # Display the chart with tab1: st.line_chart(pivot_df) # Display the rates data with tab2: st.dataframe(df_selected) # Allow for CSV download of the selected data with tab3: csv = df_selected.to_csv(index=False).encode('utf-8') st.download_button( label="Download data", data=csv, file_name='utility_rates.csv', mime='text/csv', ) else: st.error("Please select at least one utliity and data types.")
if name == āmainā:
main()