Large image dataset not loading and throws error as below Connection error Failed to process a Websocket message (RangeError: Invalid string length)

Summary

I’m trying to load around 13 images in a row, which loads ok. but it does not load more than 2 pages via the + sign, where each page has 10 rows. although I can achieve it via reloading the streamlit back to page 1 and entering the number of the page that I want to go to. but I have to refresh it every time. can somebody help me with a fix? My DB is around 15 GB

Large image dataset not loading and throws error as below

Connection error Failed to process a Websocket message (RangeError: Invalid string length)

Steps to reproduce

Code snippet:

import json
import logging
import datetime
import streamlit as st
import sqlite3
import requests
import pandas as pd
import streamlit as st
from st_aggrid import GridOptionsBuilder, AgGrid, JsCode
from st_aggrid.shared import ColumnsAutoSizeMode

st.set_page_config(
    page_title="Visual Search Log Dashboard",
    page_icon="🧊",
    layout="wide",
    initial_sidebar_state="expanded",
    menu_items={
        'Get Help': 'https://www.extremelycoolapp.com/help',
        'Report a bug': "https://www.extremelycoolapp.com/bug",
        'About': "Interact with Catalyst "
    }
)
st.markdown('<div style="text-align: center;font-size:30px;">Visual Search Log</div>', unsafe_allow_html=True)
st.markdown("""---""")
st.markdown('')
st.markdown('')
st.cache_data.clear()
start_date = st.sidebar.date_input(
    "start-date",(datetime.datetime.today()),min_value=datetime.date(2023,7,26),max_value=datetime.datetime.today())
end_date = st.sidebar.date_input(
    "end-date",(datetime.datetime.today()),min_value=datetime.date(2023,7,26),max_value=datetime.datetime.today())

if not start_date or not end_date:
    st.error("Please provide a date range....",icon="❌")

image_names =[]
input_image_bytes =[]
segmented_image_bytes =[]
skus =[]
scores =[]
pred_img_bytes =[]
dates =[]
input_coordinates_from_api = []
internal_coordinates =[]

db_path = 'visual_search_log.db'
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

current_page = st.sidebar.number_input("Page", min_value=1, value=1)

page_size = 10 
start_offset = (current_page - 1) * page_size

query = f"""
    SELECT * 
    FROM logs 
    LIMIT {page_size} OFFSET {start_offset}
"""
data = cursor.execute(query).fetchall()

cursor.close()
conn.close()

sku_score_image =[]
prediction_image_1 =[]
prediction_image_2 =[]
prediction_image_3 =[]
prediction_image_4 =[]
prediction_image_5 =[]
prediction_image_6 =[]
prediction_image_7 =[]
prediction_image_8 =[]
prediction_image_9 =[]
prediction_image_10 =[]
prediction_sku_score_1 =[]
prediction_sku_score_2 =[]
prediction_sku_score_3 =[]
prediction_sku_score_4 =[]
prediction_sku_score_5 =[]
prediction_sku_score_6 =[]
prediction_sku_score_7 =[]
prediction_sku_score_8 =[]
prediction_sku_score_9 =[]
prediction_sku_score_10 =[]
full_skus = []
is_found = []
retrain =[]

if data:
    st.write(f"Displaying records {1 + start_offset} to {start_offset + len(data)}")
    if current_page > 1:
        # st.button("Previous", key="prev") I have commented this line as this button does not do anything
        st.write(f"Page {current_page}")
    if len(data) == page_size:
        pass
        # st.button("Next", key="next") I have commented this line as this button does not do anything
        for row in data:
            try:
                dates.append(row[0])
                image_names.append(row[1])
                extn = row[1].split('.')[-1]
                input_image_bytes.append(f"data:image/{extn};base64,{row[2]}")
                segmented_image_bytes.append(f"data:image/{extn};base64,{row[3]}")
                input_coordinates_from_api.append(f"data:image/{extn};base64,{row[4]}")
                internal_coordinates.append(f"data:image/{extn};base64,{row[5]}")
                full_skus.append(row[6])
                is_found.append(row[7])
                retrain.append(row[8])
            except Exception as e:
                logging.info(f'error for the row {row} {str(e)}')
    data_set = {"date":dates,"image_name":image_names,"input_image":input_image_bytes,"segmented_input_image_result":segmented_image_bytes,
                'bbox_from_api':input_coordinates_from_api,'bbox_from_internal':internal_coordinates,
                'full_sku':full_skus,"is_found":is_found,"want_to_retrain":retrain,
                }
                
    df = pd.DataFrame(data_set)
    ob = GridOptionsBuilder.from_dataframe(df)
    ob.configure_column(
        "input_image", "input_image",
        cellRenderer=JsCode("""
            class UrlCellRenderer {
              init(params) {
                //this.eGui = document.createElement('a');
                //this.eGui.innerText = 'SomeText';
                //this.eGui.setAttribute('href', params.value);
                //this.eGui.setAttribute('style', "text-decoration:none");
                //this.eGui.setAttribute('target', "_blank");
                this.eGui = document.createElement('img');
                this.eGui.setAttribute('src', params.value);
                this.eGui.setAttribute('width', '300');
                this.eGui.setAttribute('height', '300');
              }
              getGui() {
                return this.eGui;
              }
              }
            
        """)
    )
    ob.configure_column(
        "segmented_input_image_result", "segmented_input_image_result",
        cellRenderer=JsCode("""
        
            class UrlCellRenderer {
              init(params) {
                //this.eGui = document.createElement('a');
                //this.eGui.innerText = 'SomeText';
                //this.eGui.setAttribute('href', params.value);
                //this.eGui.setAttribute('style', "text-decoration:none");
                //this.eGui.setAttribute('target', "_blank");
                this.eGui = document.createElement('img');
                this.eGui.setAttribute('src', params.value);
                this.eGui.setAttribute('width', '300');
                this.eGui.setAttribute('height', '300');
              }
              getGui() {
                return this.eGui;
              }
            }
        """)
    )
    ob.configure_column(
        "bbox_from_api", "bbox_from_api",
        cellRenderer=JsCode("""
            class UrlCellRenderer {
              init(params) {
                //this.eGui = document.createElement('a');
                //this.eGui.innerText = 'SomeText';
                //this.eGui.setAttribute('href', params.value);
                //this.eGui.setAttribute('style', "text-decoration:none");
                //this.eGui.setAttribute('target', "_blank");
                this.eGui = document.createElement('img');
                this.eGui.setAttribute('src', params.value);
                this.eGui.setAttribute('width', '300');
                this.eGui.setAttribute('height', '300');
              }
              getGui() {
                return this.eGui;
              }
              }
            
        """)
    )
    ob.configure_column(
        "bbox_from_internal", "bbox_from_internal",
        cellRenderer=JsCode("""
            class UrlCellRenderer {
              init(params) {
                //this.eGui = document.createElement('a');
                //this.eGui.innerText = 'SomeText';
                //this.eGui.setAttribute('href', params.value);
                //this.eGui.setAttribute('style', "text-decoration:none");
                //this.eGui.setAttribute('target', "_blank");
                this.eGui = document.createElement('img');
                this.eGui.setAttribute('src', params.value);
                this.eGui.setAttribute('width', '300');
                this.eGui.setAttribute('height', '300');
              }
              getGui() {
                return this.eGui;
              }
              }
            
        """)
    )
    low_supply = JsCode("""
        function(params) {
            if (params.value == "not_found") {
                return {
                    'color': 'red',
                    'font-size': '20px'
                };
            }
            else {
                return {
                    'color': 'green',
                    'font-size': '20px'
                };
            }
        }""")
    ob.configure_column("is_found", cellStyle=low_supply)
    ob.configure_grid_options(rowHeight=300)
    ob.configure_column(field="image_name", header_name="image_name", cellStyle={"white-space": 'normal'})
    ob.configure_column(field='full_sku',editable=True)
    ob.configure_column(field="want_to_retrain",editable=True)
    for col in df.columns:
        if col not in ['full_sku',"want_to_retrain"]:
            ob.configure_column(col, editable=False)
    ob.configure_column("full_sku", editable=True, groupable=True)
    ob.configure_column("want_to_retrain", editable=True, groupable=True)
    ob.configure_selection(selection_mode='multiple')
    ob.configure_pagination(enabled=True)
    grid_options = ob.build()
    st.markdown('#### Data')
    grid_return = AgGrid(
        df,
        grid_options,
        allow_unsafe_jscode=True,
        key='products',
        custom_css={'.ag-row .ag-cell': {'display': 'flex',
                                         'justify-content': 'center',
                                         'align-items': 'center',
                                         },
                    '.ag-header-cell-label': {'justify-content': 'center'},
                    "#gridToolBar": {"padding-bottom": "0px !important"}}
    )
    st.markdown("""---""")
    sel_rows = grid_return["selected_rows"]
    if st.button('Update', key=2):
        conn = sqlite3.connect(db_path)
        cur = conn.cursor()
        for row in sel_rows:
            id = row['image_name']
            retrain = row['want_to_retrain']
            cur.execute('UPDATE logs SET retrain=? WHERE image_name=?', (retrain, id))
        conn.commit()
        cur.close()
        conn.close()
        st.experimental_rerun()
        st.success("Updated the SKU")
    st.markdown("""---""")

if you already have large image db you might need this command to run streamlit

streamlit run --server.maxMessageSize=1200 dashboard.py as it does not take the regular run cmd

Expected behavior:

I want the data frame that is loading for the first 2 pages the same should work for all the pages.

Actual behavior:

loads only the first 2 pages or needs to refresh to page 1 and enter the page number in st.sidebar.number_input

Debug info

  • Streamlit version: (get it with $ 1.25)
  • Python version: (get it with $ 3.10.6)
  • Using pip
  • Ubuntu:
  • Chrome: