Summary
I have created an app which loads a CSV file hosted on GitHub and 5 images hosted on imagekit. Every day the CSV file gets updated with new data and new images are uploaded to imagekit.
Steps to reproduce
After new data was added to the CSV stored on GitHub, the streamlit app does not update the latest version of the file. Sometimes it helps to start and stop the app. Second the image loading is very slow (5 images about 400kb).
Code snippet:
import streamlit as st
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date
import datetime
import streamlit.components.v1 as components
import calendar
st.set_page_config(layout="wide")
st.title('Meteor Scattering')
col1, col2 = st.columns([2,2], gap="medium")
DATE_COLUMN = 'date/time'
#DATA_URL = ('https://s3-us-west-2.amazonaws.com/'
#'streamlit-demo-data/uber-raw-data-sep14.csv.gz')
DATA_URL = ("https://raw.githubusercontent.com/snowformatics/SuperMeteor/master/supermeteor/test.csv")
DATA_URL_top5 = ("https://raw.githubusercontent.com/snowformatics/SuperMeteor/master/supermeteor/image_out.csv")
#DATA_URL = ("test.csv")
today = date.today()
f = '%Y-%m-%d'
f2 = '%H%M%S'
f3 = f + f2
pd.set_option('display.max_columns', None)
def get_top5_meteors(data):
data['h'] = pd.to_numeric(data['h'], errors='coerce')
data['w'] = pd.to_numeric(data['w'], errors='coerce')
# Group the DataFrame by 'date'
grouped_df = data.groupby('date')
# Extract the two largest 'w' and 'h' values per date
largest_objects_per_day = grouped_df.apply(lambda x: x.nlargest(5, ['w', 'h'])).reset_index(drop=True)
return largest_objects_per_day
@st.cache_data
def load_data(nrows):
data = pd.read_csv(DATA_URL, delimiter='\t', dtype={'time':str})
data['date'] = data["timestemp"].str.slice(stop=10)
data['date'] = pd.to_datetime(data['date'], format=f)
data['time'] = pd.to_datetime(data['time'], format=f2)
data['date'] = data['date'].astype(str)
data['time'] = data['time'].astype(str)
data['time'] = data['time'].str.slice(10)
data[DATE_COLUMN] = pd.to_datetime(data['date'].astype(str) +
data['time'].astype(str))
return data
def ChangeWidgetFontSize(wgt_txt, wch_font_size = '12px'):
htmlstr = """<script>var elements = window.parent.document.querySelectorAll('*'), i;
for (i = 0; i < elements.length; ++i) { if (elements[i].innerText == |wgt_txt|)
{ elements[i].style.fontSize='""" + wch_font_size + """';} } </script> """
htmlstr = htmlstr.replace('|wgt_txt|', "'" + wgt_txt + "'")
components.html(f"{htmlstr}", height=0, width=0)
#data_load_state = st.text('Loading data...')
data = load_data(10000)
largest_objects_per_day = get_top5_meteors(data)
#print (largest_objects_per_day)
with col1:
date_input1 = st.date_input(
"Choose a date",
datetime.date(today.year, today.month, today.day-1))
# Filter by date
st.subheader('Number of meteors by hour')
data2 = data[data['date'] == str(date_input1)]
df_stats2 = data2[['h', 'w']].describe(include="all").transpose()
if st.checkbox('Show raw data', key=2):
st.subheader('Raw data')
st.write(data2)
if st.checkbox('Show statistics', key=3):
st.subheader('Statistics')
st.write(df_stats2)
hist_values1 = np.histogram(data2[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
st.bar_chart(hist_values1)
with col2:
st.subheader('Top 5 Meteor images')
top_meteors = pd.read_csv(DATA_URL_top5, header=None)
top_meteors.columns = ['url']
top_meteor_list = []
for index, row in data2.iterrows():
id_all = row['image_file'][0:25]
for index1, row1 in top_meteors.iterrows():
id_top5 = row1['url'].split('/')[4][0:25]
if id_all == id_top5:
if id_top5 not in top_meteor_list:
top_meteor_list.append(id_top5)
st.image(row1['url'],width=600)
st.bar_chart(hist_values2)
ChangeWidgetFontSize('Show raw data', '22px')
ChangeWidgetFontSize('Show statistics', '22px')
ChangeWidgetFontSize('Choose a date', '22px')
If applicable, please provide the steps we should take to reproduce the error or specified behavior.
Expected behavior:
The app should show the latest data as soon as the CSV file on GitHub gets updated.
Actual behavior:
Data and images are not updated.
Debug info
- Streamlit version: streamlit 1.24.0
- Python version: 3.9
- Conda
- Windows 10
- Browser version: Chrome 114.0.5735.90
Requirements file
Links
- Link to your GitHub repo: GitHub - snowformatics/SuperMeteor
- Link to your deployed app: https://snowformatics-supermeteor-meteor-app-344cin.streamlit.app/
Notes
Data are available in the CSV file until 28.6.2023 but the app show data till 26.6.2023. Select a date before 26.6.2023 to see some data.
Thanks