Summary
I’m trying to load around 13 images in a row, which loads ok. but it does not load more than 2 pages via the + sign, where each page has 10 rows. although I can achieve it via reloading the streamlit back to page 1 and entering the number of the page that I want to go to. but I have to refresh it every time. can somebody help me with a fix? My DB is around 15 GB
Large image dataset not loading and throws error as below
Connection error Failed to process a Websocket message (RangeError: Invalid string length)
Steps to reproduce
Code snippet:
import json
import logging
import datetime
import streamlit as st
import sqlite3
import requests
import pandas as pd
import streamlit as st
from st_aggrid import GridOptionsBuilder, AgGrid, JsCode
from st_aggrid.shared import ColumnsAutoSizeMode
st.set_page_config(
page_title="Visual Search Log Dashboard",
page_icon="🧊",
layout="wide",
initial_sidebar_state="expanded",
menu_items={
'Get Help': 'https://www.extremelycoolapp.com/help',
'Report a bug': "https://www.extremelycoolapp.com/bug",
'About': "Interact with Catalyst "
}
)
st.markdown('<div style="text-align: center;font-size:30px;">Visual Search Log</div>', unsafe_allow_html=True)
st.markdown("""---""")
st.markdown('')
st.markdown('')
st.cache_data.clear()
start_date = st.sidebar.date_input(
"start-date",(datetime.datetime.today()),min_value=datetime.date(2023,7,26),max_value=datetime.datetime.today())
end_date = st.sidebar.date_input(
"end-date",(datetime.datetime.today()),min_value=datetime.date(2023,7,26),max_value=datetime.datetime.today())
if not start_date or not end_date:
st.error("Please provide a date range....",icon="❌")
image_names =[]
input_image_bytes =[]
segmented_image_bytes =[]
skus =[]
scores =[]
pred_img_bytes =[]
dates =[]
input_coordinates_from_api = []
internal_coordinates =[]
db_path = 'visual_search_log.db'
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
current_page = st.sidebar.number_input("Page", min_value=1, value=1)
page_size = 10
start_offset = (current_page - 1) * page_size
query = f"""
SELECT *
FROM logs
LIMIT {page_size} OFFSET {start_offset}
"""
data = cursor.execute(query).fetchall()
cursor.close()
conn.close()
sku_score_image =[]
prediction_image_1 =[]
prediction_image_2 =[]
prediction_image_3 =[]
prediction_image_4 =[]
prediction_image_5 =[]
prediction_image_6 =[]
prediction_image_7 =[]
prediction_image_8 =[]
prediction_image_9 =[]
prediction_image_10 =[]
prediction_sku_score_1 =[]
prediction_sku_score_2 =[]
prediction_sku_score_3 =[]
prediction_sku_score_4 =[]
prediction_sku_score_5 =[]
prediction_sku_score_6 =[]
prediction_sku_score_7 =[]
prediction_sku_score_8 =[]
prediction_sku_score_9 =[]
prediction_sku_score_10 =[]
full_skus = []
is_found = []
retrain =[]
if data:
st.write(f"Displaying records {1 + start_offset} to {start_offset + len(data)}")
if current_page > 1:
# st.button("Previous", key="prev") I have commented this line as this button does not do anything
st.write(f"Page {current_page}")
if len(data) == page_size:
pass
# st.button("Next", key="next") I have commented this line as this button does not do anything
for row in data:
try:
dates.append(row[0])
image_names.append(row[1])
extn = row[1].split('.')[-1]
input_image_bytes.append(f"data:image/{extn};base64,{row[2]}")
segmented_image_bytes.append(f"data:image/{extn};base64,{row[3]}")
input_coordinates_from_api.append(f"data:image/{extn};base64,{row[4]}")
internal_coordinates.append(f"data:image/{extn};base64,{row[5]}")
full_skus.append(row[6])
is_found.append(row[7])
retrain.append(row[8])
except Exception as e:
logging.info(f'error for the row {row} {str(e)}')
data_set = {"date":dates,"image_name":image_names,"input_image":input_image_bytes,"segmented_input_image_result":segmented_image_bytes,
'bbox_from_api':input_coordinates_from_api,'bbox_from_internal':internal_coordinates,
'full_sku':full_skus,"is_found":is_found,"want_to_retrain":retrain,
}
df = pd.DataFrame(data_set)
ob = GridOptionsBuilder.from_dataframe(df)
ob.configure_column(
"input_image", "input_image",
cellRenderer=JsCode("""
class UrlCellRenderer {
init(params) {
//this.eGui = document.createElement('a');
//this.eGui.innerText = 'SomeText';
//this.eGui.setAttribute('href', params.value);
//this.eGui.setAttribute('style', "text-decoration:none");
//this.eGui.setAttribute('target', "_blank");
this.eGui = document.createElement('img');
this.eGui.setAttribute('src', params.value);
this.eGui.setAttribute('width', '300');
this.eGui.setAttribute('height', '300');
}
getGui() {
return this.eGui;
}
}
""")
)
ob.configure_column(
"segmented_input_image_result", "segmented_input_image_result",
cellRenderer=JsCode("""
class UrlCellRenderer {
init(params) {
//this.eGui = document.createElement('a');
//this.eGui.innerText = 'SomeText';
//this.eGui.setAttribute('href', params.value);
//this.eGui.setAttribute('style', "text-decoration:none");
//this.eGui.setAttribute('target', "_blank");
this.eGui = document.createElement('img');
this.eGui.setAttribute('src', params.value);
this.eGui.setAttribute('width', '300');
this.eGui.setAttribute('height', '300');
}
getGui() {
return this.eGui;
}
}
""")
)
ob.configure_column(
"bbox_from_api", "bbox_from_api",
cellRenderer=JsCode("""
class UrlCellRenderer {
init(params) {
//this.eGui = document.createElement('a');
//this.eGui.innerText = 'SomeText';
//this.eGui.setAttribute('href', params.value);
//this.eGui.setAttribute('style', "text-decoration:none");
//this.eGui.setAttribute('target', "_blank");
this.eGui = document.createElement('img');
this.eGui.setAttribute('src', params.value);
this.eGui.setAttribute('width', '300');
this.eGui.setAttribute('height', '300');
}
getGui() {
return this.eGui;
}
}
""")
)
ob.configure_column(
"bbox_from_internal", "bbox_from_internal",
cellRenderer=JsCode("""
class UrlCellRenderer {
init(params) {
//this.eGui = document.createElement('a');
//this.eGui.innerText = 'SomeText';
//this.eGui.setAttribute('href', params.value);
//this.eGui.setAttribute('style', "text-decoration:none");
//this.eGui.setAttribute('target', "_blank");
this.eGui = document.createElement('img');
this.eGui.setAttribute('src', params.value);
this.eGui.setAttribute('width', '300');
this.eGui.setAttribute('height', '300');
}
getGui() {
return this.eGui;
}
}
""")
)
low_supply = JsCode("""
function(params) {
if (params.value == "not_found") {
return {
'color': 'red',
'font-size': '20px'
};
}
else {
return {
'color': 'green',
'font-size': '20px'
};
}
}""")
ob.configure_column("is_found", cellStyle=low_supply)
ob.configure_grid_options(rowHeight=300)
ob.configure_column(field="image_name", header_name="image_name", cellStyle={"white-space": 'normal'})
ob.configure_column(field='full_sku',editable=True)
ob.configure_column(field="want_to_retrain",editable=True)
for col in df.columns:
if col not in ['full_sku',"want_to_retrain"]:
ob.configure_column(col, editable=False)
ob.configure_column("full_sku", editable=True, groupable=True)
ob.configure_column("want_to_retrain", editable=True, groupable=True)
ob.configure_selection(selection_mode='multiple')
ob.configure_pagination(enabled=True)
grid_options = ob.build()
st.markdown('#### Data')
grid_return = AgGrid(
df,
grid_options,
allow_unsafe_jscode=True,
key='products',
custom_css={'.ag-row .ag-cell': {'display': 'flex',
'justify-content': 'center',
'align-items': 'center',
},
'.ag-header-cell-label': {'justify-content': 'center'},
"#gridToolBar": {"padding-bottom": "0px !important"}}
)
st.markdown("""---""")
sel_rows = grid_return["selected_rows"]
if st.button('Update', key=2):
conn = sqlite3.connect(db_path)
cur = conn.cursor()
for row in sel_rows:
id = row['image_name']
retrain = row['want_to_retrain']
cur.execute('UPDATE logs SET retrain=? WHERE image_name=?', (retrain, id))
conn.commit()
cur.close()
conn.close()
st.experimental_rerun()
st.success("Updated the SKU")
st.markdown("""---""")
if you already have large image db you might need this command to run streamlit
streamlit run --server.maxMessageSize=1200 dashboard.py as it does not take the regular run cmd
Expected behavior:
I want the data frame that is loading for the first 2 pages the same should work for all the pages.
Actual behavior:
loads only the first 2 pages or needs to refresh to page 1 and enter the page number in st.sidebar.number_input
Debug info
- Streamlit version: (get it with
$ 1.25
) - Python version: (get it with
$ 3.10.6
) - Using pip
- Ubuntu:
- Chrome: