Plotly Performance Issues Despite Caching

Hi,

thanks for your fast response!

I tried out your sample code from last year, and for me, it seems that this was fixed. At least the time differences aren’t that big for me anymore:

I adapted your code and increased the dataframe size to 1.32 MB to better showcase my problem (This is only a small-scale example). Because the elapsed time is not my problem (this is “only” roughly 2 seconds), the problem is the time it takes to show/render the plot:

Adapted Code
import functools
import time

import streamlit as st
import pandas as pd
import plotly.express as px
from plotly.graph_objects import Figure

def timer(func):
    """Decorator to time a function execution.
    See: https://realpython.com/python-timer/#creating-a-python-timer-decorator
    """

    @functools.wraps(func)
    def wrapper_timer(*args, **kwargs):
        tic = time.perf_counter()
        value = func(*args, **kwargs)
        toc = time.perf_counter()
        elapsed_time = toc - tic
        st.write(f"Elapsed time: **{elapsed_time:0.4f} seconds**")
        return value

    return wrapper_timer


@st.cache_resource
def generate_dataframe(convert_to_string_dates: bool) -> pd.DataFrame:
    """Generate a dataframe with datetimes and values. The dataframe is cached
    to avoid re-generating it every time the app is run.
    """

    df = pd.DataFrame(
        {"date": pd.date_range(start="2015-01-01", end="2024-06-01", freq="h")}
    )

    df["value"] = df["date"].apply(lambda x: x.month)

    if convert_to_string_dates:
        df["date"] = df["date"].astype(str)

    return df


@st.cache_resource
def generate_figure(df: pd.DataFrame) -> Figure:
    """Generate a figure with a line chart using the dataframe. The figure is
    cached to avoid re-generating it every time the app is run.
    """
    fig = px.line(df, x="date", y="value")
    return fig


@timer
def st_fig_show(fig: Figure) -> None:
    """Render a plotly figure using streamlit. It will print the elapsed time
    of the function execution, which is only the time it takes Streamlit to
    render the figure.
    """
    st.plotly_chart(fig, use_container_width=True)


def main():
    cols = st.columns(2)

    with cols[0]:
        "## Render figure using datetimes"
        df_datetimes = generate_dataframe(convert_to_string_dates=False)
        dtypes_str = "\n"
        for label, dtype in df_datetimes.dtypes.items():
            dtypes_str += f"- {label}: `{dtype}`\n"
        f"**Data types** {dtypes_str}"
        f"Size of dataframe: {df_datetimes.memory_usage().sum() / 1024:.2f} KB"

        figure = generate_figure(df_datetimes)
        st_fig_show(figure)

    with cols[1]:
        "## Render figure using string-datetimes"
        df_strings = generate_dataframe(convert_to_string_dates=True)
        dtypes_str = "\n"
        for label, dtype in df_strings.dtypes.items():
            dtypes_str += f"- {label}: `{dtype}`\n"
        f"**Data types** {dtypes_str}"
        f"Size of dataframe: {df_strings.memory_usage().sum() / 1024:.2f} KB"

        figure = generate_figure(df_strings)
        st_fig_show(figure)

    if st.button("Rerun `st.plotly_chart`", use_container_width=True):
        st.rerun()


if __name__ == "__main__":
    main()

So sometimes I see these gray boxes loading, sometimes the area just stays white, sometimes scrolling freezes. But the code execution (elapsed time) is already finished.

Concluding, changing it to string-datetime would slightly increase the performance for the “elapsed time” (2s) but not for the overall time (16s) to load my page.