Hi,
thanks for your fast response!
I tried out your sample code from last year, and for me, it seems that this was fixed. At least the time differences aren’t that big for me anymore:
I adapted your code and increased the dataframe size to 1.32 MB to better showcase my problem (This is only a small-scale example). Because the elapsed time is not my problem (this is “only” roughly 2 seconds), the problem is the time it takes to show/render the plot:
Adapted Code
import functools
import time
import streamlit as st
import pandas as pd
import plotly.express as px
from plotly.graph_objects import Figure
def timer(func):
"""Decorator to time a function execution.
See: https://realpython.com/python-timer/#creating-a-python-timer-decorator
"""
@functools.wraps(func)
def wrapper_timer(*args, **kwargs):
tic = time.perf_counter()
value = func(*args, **kwargs)
toc = time.perf_counter()
elapsed_time = toc - tic
st.write(f"Elapsed time: **{elapsed_time:0.4f} seconds**")
return value
return wrapper_timer
@st.cache_resource
def generate_dataframe(convert_to_string_dates: bool) -> pd.DataFrame:
"""Generate a dataframe with datetimes and values. The dataframe is cached
to avoid re-generating it every time the app is run.
"""
df = pd.DataFrame(
{"date": pd.date_range(start="2015-01-01", end="2024-06-01", freq="h")}
)
df["value"] = df["date"].apply(lambda x: x.month)
if convert_to_string_dates:
df["date"] = df["date"].astype(str)
return df
@st.cache_resource
def generate_figure(df: pd.DataFrame) -> Figure:
"""Generate a figure with a line chart using the dataframe. The figure is
cached to avoid re-generating it every time the app is run.
"""
fig = px.line(df, x="date", y="value")
return fig
@timer
def st_fig_show(fig: Figure) -> None:
"""Render a plotly figure using streamlit. It will print the elapsed time
of the function execution, which is only the time it takes Streamlit to
render the figure.
"""
st.plotly_chart(fig, use_container_width=True)
def main():
cols = st.columns(2)
with cols[0]:
"## Render figure using datetimes"
df_datetimes = generate_dataframe(convert_to_string_dates=False)
dtypes_str = "\n"
for label, dtype in df_datetimes.dtypes.items():
dtypes_str += f"- {label}: `{dtype}`\n"
f"**Data types** {dtypes_str}"
f"Size of dataframe: {df_datetimes.memory_usage().sum() / 1024:.2f} KB"
figure = generate_figure(df_datetimes)
st_fig_show(figure)
with cols[1]:
"## Render figure using string-datetimes"
df_strings = generate_dataframe(convert_to_string_dates=True)
dtypes_str = "\n"
for label, dtype in df_strings.dtypes.items():
dtypes_str += f"- {label}: `{dtype}`\n"
f"**Data types** {dtypes_str}"
f"Size of dataframe: {df_strings.memory_usage().sum() / 1024:.2f} KB"
figure = generate_figure(df_strings)
st_fig_show(figure)
if st.button("Rerun `st.plotly_chart`", use_container_width=True):
st.rerun()
if __name__ == "__main__":
main()
So sometimes I see these gray boxes loading, sometimes the area just stays white, sometimes scrolling freezes. But the code execution (elapsed time) is already finished.
Concluding, changing it to string-datetime would slightly increase the performance for the “elapsed time” (2s) but not for the overall time (16s) to load my page.