That is interesting. I do not have an answer for the reason why that happens, but I was able to replicate your findings so I thought it would be good to report. Indeed st.plotly_chart
takes 10x longer to render a figure with datetimes.
Code
import functools
import time
import streamlit as st
import pandas as pd
import plotly.express as px
from plotly.graph_objects import Figure
def timer(func):
"""Decorator to time a function execution.
See: https://realpython.com/python-timer/#creating-a-python-timer-decorator
"""
@functools.wraps(func)
def wrapper_timer(*args, **kwargs):
tic = time.perf_counter()
value = func(*args, **kwargs)
toc = time.perf_counter()
elapsed_time = toc - tic
st.write(f"Elapsed time: **{elapsed_time:0.4f} seconds**")
return value
return wrapper_timer
@st.cache_resource
def generate_dataframe(convert_to_string_dates: bool) -> pd.DataFrame:
"""Generate a dataframe with datetimes and values. The dataframe is cached
to avoid re-generating it every time the app is run.
"""
df = pd.DataFrame(
{"date": pd.date_range(start="2015-01-01", end="2024-06-01", freq="h")}
)
df["value"] = df["date"].apply(lambda x: x.month)
if convert_to_string_dates:
df["date"] = df["date"].astype(str)
return df
@st.cache_resource
def generate_figure(df: pd.DataFrame) -> Figure:
"""Generate a figure with a line chart using the dataframe. The figure is
cached to avoid re-generating it every time the app is run.
"""
fig = px.line(df, x="date", y="value")
return fig
@timer
def st_fig_show(fig: Figure) -> None:
"""Render a plotly figure using streamlit. It will print the elapsed time
of the function execution, which is only the time it takes Streamlit to
render the figure.
"""
st.plotly_chart(fig, use_container_width=True)
def main():
cols = st.columns(2)
with cols[0]:
"## Render figure using datetimes"
df_datetimes = generate_dataframe(convert_to_string_dates=False)
dtypes_str = "\n"
for label, dtype in df_datetimes.dtypes.items():
dtypes_str += f"- {label}: `{dtype}`\n"
f"**Data types** {dtypes_str}"
f"Size of dataframe: {df_datetimes.memory_usage().sum() / 1024:.2f} KB"
figure = generate_figure(df_datetimes)
st_fig_show(figure)
with cols[1]:
"## Render figure using string-datetimes"
df_strings = generate_dataframe(convert_to_string_dates=True)
dtypes_str = "\n"
for label, dtype in df_strings.dtypes.items():
dtypes_str += f"- {label}: `{dtype}`\n"
f"**Data types** {dtypes_str}"
f"Size of dataframe: {df_strings.memory_usage().sum() / 1024:.2f} KB"
figure = generate_figure(df_strings)
st_fig_show(figure)
if st.button("Rerun `st.plotly_chart`", use_container_width=True):
st.rerun()
if __name__ == "__main__":
main()
I would guess that Streamlit makes a copy of the data contained in the plotly Figure, transforms the datetime data into strings (to make a valid JSON), add other custom bits to that JSON, and then passes that to plotly.
------- df_datetimes -------
2978924 function calls (2813219 primitive calls) in 1.480 seconds
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
2 0.507 0.253 0.709 0.355 {method '__deepcopy__' of 'numpy.ndarray' objects}
165549/0 0.213 0.000 0.000 copy.py:118(deepcopy)
82539 0.127 0.000 0.440 0.000 utils.py:85(default)
82537 0.079 0.000 0.079 0.000 {method 'isoformat' of 'datetime.datetime' objects}
578731 0.070 0.000 0.070 0.000 {method 'get' of 'dict' objects}
PS:
I tested converting the Plotly Figure to HTML and render it using streamlit.components.v1.html
. The performance difference is still the same so perhaps the issue is not within Streamlit but within Plotly.