Help with creating a simple permalink

Hey everyone! Brand new programmer here using streamlit to make an app that helps you understand US electricity cost growth over time. I used Matlab in the past while in mechanical engineering school, but that’s been about a decade. Streamlit is such an awesome tool for folks like me who want to produce something usable and don’t want to learn both a back end and front end language/framework.

So here’s my question:

I’d like the app to produce a URL with the current inputs so that you could easily share your results and then the app would interpret that URL to populate those same inputs.

I’ve played around with st.experimental_get_query_params and st.experimental_set_query_params a bunch, to no avail. Pretty confused with how to use it. Anyone want to help take a crack at it with me? I feel like this is a pretty simple thing that I’m just too green to feel confident enough doing.

The six inputs that matter are:

  1. states (a multiselect of state abbreviations)
  2. ownership_types (a multiselect of words/phrases such as ā€œMunicipalā€ or ā€œInvestor Ownedā€
  3. utilities (a multiselect of electricity utility names such as ā€œPacific Gas & Electric Co.ā€ or ā€œSouthern California Edison Coā€
  4. service_types (a multiselect of ā€œBundledā€, ā€œDeliveryā€, or ā€œEnergyā€)
  5. rate_types (a multiselect of ā€œResidential Rateā€, ā€œCommercial Rateā€, or ā€œIndustrial Rateā€)
  6. year_range (a slider that generates start_year and end_year)

Here’s my current version of the app, without any permalink functionality:
www.wattarchive.com

This is my code:

import streamlit as st
import pandas as pd
import plotly.express as px
import base64
import json

Set the page layout to wide

st.set_page_config(layout=ā€˜centered’)

Hide menu bars CSS

hide_default_format = ā€œā€"

#MainMenu {visibility: hidden; }
footer {visibility: hidden;}

ā€œā€"

Function to load the data from the CSV file

@st.cache_data
def load_data():
data = pd.read_csv(ā€˜EIA861_sales_ult_cust_1990_2021.csv’)
return data

def calculate_cagr(group):
try:
initial_value = group.iat[0]
final_value = group.iat[-1]
num_years = len(group) - 1
cagr_value = (final_value / initial_value) ** (1/num_years) - 1
return ā€˜{:.2%}’.format(cagr_value)
except ZeroDivisionError:
return ā€˜0.00%’

Main function to run the application

def main():
# Load the data
df = load_data()

# Create a mapping of Utility Number to a single Utility Name
utility_name_mapping = df.groupby('Utility Number')['Utility Name'].first()

# Set the title of the application
st.title('šŸ“ˆ WattArchive')

# Simple explainer
st.caption('Search and compare average electricity costs over time by utility, service type, and customer class.')

st.header('Search')
tab1, tab2 = st.tabs(["Utilities", "Data Types"])

with tab1: 
    # Allow user to filter by state and ownership before selecting utility
    states = st.multiselect("Filter by states", sorted(df['State'].unique()))
    ownership_types = st.multiselect("Filter by ownership types", df['Ownership'].unique())

    # Filter the DataFrame based on the selected states and ownership types
    df_filtered = df.copy()
    if states:
        df_filtered = df_filtered[df_filtered['State'].isin(states)]
    if ownership_types:
        df_filtered = df_filtered[df_filtered['Ownership'].isin(ownership_types)]

    # Allow user to select utilities from the filtered data
    if 'utilities' in st.session_state:
        utilities = st.multiselect("Select utilities", df_filtered['Utility Name'].unique(), default=st.session_state.utilities)
    else:
        utilities = st.multiselect("Select utilities", df_filtered['Utility Name'].unique())
        st.session_state.utilities = utilities

with tab2:      
    # Allow user to select service types
    service_types = df_filtered['Service Type'].unique()
    selected_service_types = st.multiselect("Service Types", service_types)

    # Allow user to select rate types
    rate_types = ['Residential Rate', 'Commercial Rate', 'Industrial Rate']
    selected_rates = st.multiselect("Rate types", rate_types)

    # Year range selection
    if not df_filtered.empty:
        year_range = st.slider("Year Range", int(df_filtered["Data Year"].min()), int(df_filtered["Data Year"].max()), (int(df_filtered["Data Year"].min()), int(df_filtered["Data Year"].max())))
        start_year, end_year = year_range
        df_filtered = df_filtered[(df_filtered['Data Year'] >= start_year) & (df_filtered['Data Year'] <= end_year)]

if utilities and selected_rates and selected_service_types:

    # Get the utility numbers for the selected utilities
    utility_numbers = df_filtered[df_filtered['Utility Name'].isin(utilities)]['Utility Number'].unique()
    
    # Further refine DataFrame based on the selected data
    df_selected = df_filtered[df_filtered['Utility Number'].isin(utility_numbers)]
    df_selected = df_selected[df_selected['Service Type'].isin(selected_service_types)]
    df_selected = df_selected[['Data Year', 'Utility Number', 'Utility Name', 'Service Type'] + selected_rates]
    
    # Sort the DataFrame by 'Data Year'
    df_selected.sort_values(by='Data Year', inplace=True)

    # Replace 'Utility Name' in the selected data with the single name from our mapping
    df_selected['Utility Name'] = df_selected['Utility Number'].map(utility_name_mapping)       
    
    # Prepare the data for plotting
    df_melted = df_selected.melt(id_vars=['Data Year', 'Utility Name', 'Service Type'], 
                                 value_vars=selected_rates, 
                                 var_name='Rate Type', 
                                 value_name='Rate')
    
    # Combine 'Utility Name', 'Rate Type', 'Service Type' into one column for the legend
    df_melted['Legend'] = df_melted['Utility Name'] + ', ' + df_melted['Rate Type'] + ', ' + df_melted['Service Type']
    
    # Pivot the DataFrame for use with st.line_chart
    pivot_df = df_melted.pivot_table(index='Data Year', columns='Legend', values='Rate')
       
    # Calc CAGR
    st.header('KPIs')
    cagr_values = {}
    
    for rate_type in selected_rates:
        if rate_type in df_selected.columns:
            cagr_values[rate_type] = df_selected.groupby(['Utility Name', 'Service Type'])[rate_type].apply(calculate_cagr)
    
    cagr_df = pd.DataFrame(cagr_values).reset_index()
    
    # Melt cagr_df to long format
    cagr_df = cagr_df.melt(id_vars=['Utility Name', 'Service Type'], value_vars=list(cagr_values.keys()), var_name='Rate Type', value_name='CAGR')
    
    # Calculate earliest rate for each unique combination of utility, service type, and rate type
    earliest_year = df_selected['Data Year'].min()
    earliest_rate_values = df_selected[df_selected['Data Year'] == earliest_year].groupby(['Utility Name', 'Service Type'])[selected_rates].min()
    
    earliest_rate_df = earliest_rate_values.reset_index().melt(id_vars=['Utility Name', 'Service Type'], value_vars=selected_rates, var_name='Rate Type', value_name='Earliest Rate')
    
    # Calculate percentile for the most recent year for each unique combination of utility, service type, and rate type
    most_recent_year = df_selected['Data Year'].max()
    df_most_recent_year = df[df['Data Year'] == most_recent_year]
    df_most_recent_year = df_most_recent_year.melt(id_vars=['Data Year', 'Utility Number', 'Service Type'], value_vars=rate_types, var_name='Rate Type', value_name='Rate')
    df_most_recent_year['Percentile Rank'] = df_most_recent_year.groupby(['Service Type', 'Rate Type'])['Rate'].rank(pct=True)
    
    # Filter this DataFrame to show only the rows that match the user's selections
    df_rankings = df_most_recent_year[df_most_recent_year['Utility Number'].isin(df_selected['Utility Number'].unique())]
    df_rankings = df_rankings[df_rankings['Service Type'].isin(selected_service_types)]
    df_rankings = df_rankings[df_rankings['Rate Type'].isin(selected_rates)]
    df_rankings['Utility Name'] = df_rankings['Utility Number'].map(utility_name_mapping) # map utility number to utility name
    df_rankings.drop(columns=['Utility Number'], inplace=True) # now you can drop the utility number
    df_rankings['Data Year'] = df_rankings['Data Year'].apply(lambda x: '{:,}'.format(x))  # Remove comma from 'Data Year' 
    df_rankings['Percentile Rank'] = df_rankings['Percentile Rank'].apply(lambda x: '{:.1f}'.format(x * 100))  # Format 'Percentile Rank' as one decimal place
    
    # Merge df_rankings, cagr_df, and earliest_rate_df and reformat
    df_kpis = df_rankings.merge(cagr_df, on=['Utility Name', 'Service Type', 'Rate Type'], how='left')
    df_kpis = df_kpis.merge(earliest_rate_df, on=['Utility Name', 'Service Type', 'Rate Type'], how='left')
    df_kpis.drop(columns=['Data Year'], inplace=True) # Drop 'Data Year'
    df_kpis.rename(columns={'Rate': 'Latest Rate', 'Percentile Rank': 'Percentile'}, inplace=True) # Rename 'Rate' and 'Percentile Rank'
    df_kpis = df_kpis[['Utility Name', 'Service Type', 'Rate Type', 'Percentile', 'CAGR', 'Latest Rate', 'Earliest Rate']] # Reorder columns
    df_kpis['Percentile'] = df_kpis['Percentile'].apply(lambda x: str(x) + '%') # add % symbol to percentile
    st.dataframe(df_kpis)

    # Drop 'Utility Number' as it's not needed anymore
    df_selected.drop(columns=['Utility Number'], inplace=True)  

    # remove commma from data year
    df_selected['Data Year'] = df_selected['Data Year'].apply(lambda x: '{:,}'.format(x))  # Remove comma from 'Data Year'

    st.header('Historical Rates')
    tab1, tab2, tab3 = st.tabs(["Chart", "Table", "Download"])
    
    # Display the chart
    with tab1:
        st.line_chart(pivot_df)

    # Display the rates data
    with tab2:
            st.dataframe(df_selected)

    # Allow for CSV download of the selected data
    with tab3:
        csv = df_selected.to_csv(index=False).encode('utf-8')
        st.download_button(
            label="Download data",
            data=csv,
            file_name='utility_rates.csv',
            mime='text/csv',

    )
else:
    st.error("Please select at least one utliity and data types.")

if name == ā€œmainā€:
main()

1 Like

Hi @Duncan_Campbell maybe the st.experimental_get_query_params utility is what you need.

That’s exactly what I’ve been trying to use, but I don’t think I have my head wrapped around it. The documentation is kind of confusing to me. Any good examples of it being used out there?

I figured it out! Here’s the code section that did it:

def main():
# Load the data
df = load_data()

# Create a mapping of Utility Number to a single Utility Name
utility_name_mapping = df.groupby('Utility Number')['Utility Name'].first()

# Set the title of the application
st.title('šŸ“ˆ WattArchive')

# Simple explainer
st.caption('Query and compare average electricity costs over time by utility, service type, and customer class.')

st.header('Query')
tab1, tab2 = st.tabs(["Utilities", "Data Types"])

# Get current query parameters
query_params = st.experimental_get_query_params()

with tab1: 
    # Allow user to filter by state and ownership before selecting utility
    states = st.multiselect("Filter by states", sorted(df['State'].unique()))
    ownership_types = st.multiselect("Filter by ownership types", df['Ownership'].unique())

    # Filter the DataFrame based on the selected states and ownership types
    df_filtered = df.copy()
    if states:
        df_filtered = df_filtered[df_filtered['State'].isin(states)]
    if ownership_types:
        df_filtered = df_filtered[df_filtered['Ownership'].isin(ownership_types)]

    # Allow user to select utilities from the filtered data
    if 'utilities' in query_params:
        utilities = query_params['utilities']
    else:
        utilities = st.multiselect("Select utilities", df_filtered['Utility Name'].unique())

with tab2:      
    # Allow user to select service types
    service_types = df_filtered['Service Type'].unique()
    selected_service_types = query_params['service_types'] if 'service_types' in query_params else st.multiselect("Service Types", service_types)

    # Allow user to select rate types
    rate_types = ['Residential Rate', 'Commercial Rate', 'Industrial Rate']
    selected_rates = query_params['rate_types'] if 'rate_types' in query_params else st.multiselect("Rate types", rate_types)

    # Year range selection
    if not df_filtered.empty:
        min_year = int(df_filtered["Data Year"].min())
        max_year = int(df_filtered["Data Year"].max())
        if 'start_year' in query_params and 'end_year' in query_params:
            year_range = (int(query_params['start_year'][0]), int(query_params['end_year'][0]))
        else:
            year_range = st.slider("Year Range", min_year, max_year, (min_year, max_year))
        start_year, end_year = year_range
        df_filtered = df_filtered[(df_filtered['Data Year'] >= start_year) & (df_filtered['Data Year'] <= end_year)]


if utilities and selected_rates and selected_service_types:    
       
    # Create share permalink
    data = {
        'utilities': utilities,
        'service_types': selected_service_types,
        'rate_types': selected_rates,
        'start_year': year_range[0],
        'end_year': year_range[1],
    }
    query_string = urllib.parse.urlencode(data, doseq=True)
    url = f'https://wattarchive.streamlit.app/?run=1&{query_string}&autoupdate=true'
    st.markdown(f'[šŸ”— Share these rates]({url})', unsafe_allow_html=True)

Awesome.
I tried it with https://wattarchive.streamlit.app/?run=1&utilities=Chugach+Electric+Assn+Inc&utilities=Golden+Valley+Elec+Assn+Inc&service_types=Bundled&rate_types=Commercial+Rate&start_year=2001&end_year=2022&autoupdate=true
That’s really nice :+1:

1 Like

Great trick, @Duncan_Campbell! :raised_hands:

I will definitely use it!

Best,
Charly

This topic was automatically closed 2 days after the last reply. New replies are no longer allowed.