Null arrays printing on app while showing st.image in a loop

Running it locally on ubuntu, I have created a system which tracks my finger movement and when a closed shape is created by the tracking line, it draws a circle over it. On streamlit app, when the circle is drawn, it starts printing null arrays below the frame. I am not printing anything and cannot identify the issue. Sharing the code below:

import streamlit as st
import cv2
import numpy as np
import time
import mediapipe as mp
import ffmpeg
import librosa
from helper_functions import *
from classes import STT_and_class_detector

st.session_state.output_video_file = 'output_video.avi'
st.session_state.output_directory = 'frames_output'
st.session_state.output_audio_file = 'output_audio.wav'
st.session_state.audio_format = 'wav'

def run_recording(placeholder):
    annotations = [[]]
    annotationNumber = -1
    annotationStart = False

    mp_drawing = mp.solutions.drawing_utils
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)

    frame_width = 640
    frame_height = 480

    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(st.session_state.output_video_file, fourcc, 20, (frame_width, frame_height))
    frames = []

    os.makedirs(st.session_state.output_directory, exist_ok=True)

    frame_delay = 0
    counter_list = []
    unique_circles = []
    final_circles = []
    previous_length = 0
    list_of_focus = []
    frame_count = 0
    circle_stay = 0
    last_circle = 0

    ffmpeg_cmd = (
        ffmpeg
        .input('default', format='alsa', channels=1)
        .output(st.session_state.output_audio_file, format=st.session_state.audio_format)
        .overwrite_output()
        .run_async(pipe_stdout=True, pipe_stderr=True)
    )

    tracking_lost_counter = 0
    tracking_lost_threshold = 10

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.flip(frame, 1)
        second_frame = frame.copy()
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)

        if results.multi_hand_landmarks:
            tracking_lost_counter = 0
            for hand_landmarks in results.multi_hand_landmarks:
                index_fingertip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
                x = int(index_fingertip.x * frame.shape[1])
                y = int(index_fingertip.y * frame.shape[0])

                is_highest = True
                for landmark_id, landmark in enumerate(hand_landmarks.landmark):
                    if landmark_id != 8 and landmark.y * frame.shape[0] < y:
                        is_highest = False
                        break

                for landmark_id, landmark in enumerate(hand_landmarks.landmark):
                    if landmark_id == 17 and landmark.y * frame.shape[0] < y:
                        is_highest = True
                        break

                if is_highest:
                    if not annotationStart:
                        annotationStart = True
                        annotationNumber += 1
                        annotations.append([])
                    annotations[annotationNumber].append((x, y))
                    cv2.circle(frame, (x, y), 5, (255, 0, 255), -1)
                else:
                    annotationStart = False
        else:
            tracking_lost_counter += 1
            if tracking_lost_counter > tracking_lost_threshold:
                annotations = [[]]
                annotationNumber = -1
                annotationStart = False
                frame_delay = 0
                counter_list = []
                tracking_lost_counter = 0

        for i, annotation in enumerate(annotations):
            for j in range(len(annotation)):
                if j != 0:
                    cv2.line(frame, annotation[j - 1], annotation[j], (0, 0, 200), 2)

        if len(annotations) > 1:
            list_of_focus = annotations[-2]

        if len(list_of_focus) > 1 and len(list_of_focus) != previous_length:
            num_points = 10
            x_values = np.linspace(list_of_focus[-2][0], list_of_focus[-1][0], num_points)
            y_values = np.linspace(list_of_focus[-2][1], list_of_focus[-1][1], num_points)

            for k in range(num_points):
                counter_list.append((int(x_values[k]), int(y_values[k])))
            previous_length = len(list_of_focus)

        extracted_portion = selected_region(counter_list)
        try:
            circle_values = make_circle(second_frame, extracted_portion)

            if circle_values is not None:
                final_circles.append(circle_values)

            if final_circles:
                [unique_circles.append(item) for item in final_circles if item not in unique_circles]

            filtered_circles = [circle for i, circle in enumerate(unique_circles)
                                if all(center_distance(circle, other_circle) >= 2 for other_circle in unique_circles[:i])]

            if filtered_circles and len(filtered_circles) > 0:
                if len(filtered_circles) > last_circle:
                    circle_stay = 30

                if circle_stay > 0:
                    cv2.ellipse(second_frame, filtered_circles[-1][3], (0, 255, 0), 3)
                    circle_stay -= 1

                last_circle = len(filtered_circles)

            if circle_values is not None:
                frame_delay += 1
                if frame_delay > 5:
                    annotations = [[]]
                    annotationNumber = -1
                    annotationStart = False
                    frame_delay = 0
                    counter_list = []

            frame = cv2.resize(frame, (frame_width, frame_height))
            second_frame = cv2.resize(second_frame, (frame_width, frame_height))
            combined_frame = cv2.addWeighted(frame, 0.5, second_frame, 0.5, 0)
            stframe = cv2.cvtColor(combined_frame, cv2.COLOR_BGR2RGB)
            placeholder.image(stframe, caption="Live Video Recording")
            frames.append(combined_frame)
            out.write(combined_frame)

            frame_filename = os.path.join(st.session_state.output_directory, f'frame_{frame_count}.png')
            cv2.imwrite(frame_filename, second_frame)
            frame_count += 1
            st.session_state.frame_count = frame_count

            if st.session_state.get("complete_recording", False):
                cap.release()
                out.release()
                ffmpeg_cmd.terminate()
                break
        except Exception as e:
            pass

st.title("Object Interaction and Voice Command Interface")

if "page" not in st.session_state:
    st.session_state.page = 0

if 'frame_count' not in st.session_state:
    st.session_state.frame_count = 0

if 'duration' not in st.session_state:
    st.session_state.duration = 0

if 'fps' not in st.session_state:
    st.session_state.fps = 0

if 'transcript' not in st.session_state:
    st.session_state.transcript = 'none'

if 'obj_list' not in st.session_state:
    st.session_state.obj_list = None

if 'obj_timestamp' not in st.session_state:
    st.session_state.obj_timestamp = None


def nextpage(): st.session_state.page += 1
def restart(): st.session_state.page = 0

placeholder = st.empty()

if st.session_state.page == 0:
    placeholder.title("Select Connectivity Type")
    col1, col2 = st.columns(2)
    with col1:
        st.button("WiFi", key="wifi")
    with col2:
        st.button("Ethernet", key="ethernet")
    
elif st.session_state.page == 1:
    placeholder.title("Select Use Case")
    col1, col2, col3 = st.columns(3)

    with col1:
        st.button("Defect Detection", key="defect_detection")
    with col2:
        st.button("Object Counting", key="obj_counting")
    with col3:
        st.button("Cycle Time Tracking", key="cyc_time_tracking")

elif st.session_state.page == 2:
# Replace the chart with several elements:
    with placeholder.container():
        st.text("1. Point to the object")
        time.sleep(2)
        st.text("2. Speak while pointing and tell me what you want to do")
        time.sleep(2)
        st.text("3. Make sure to point your index to the object")
        time.sleep(2)

elif st.session_state.page == 3:
    st.title("Live Drawing")
    col1, col2 = st.columns(2)
    with col1:
        if st.button("Start Recording", key="start_recording"):
            st.text("Video recording started")
            placeholder = st.empty()
            st.session_state.complete_recording = False

            run_recording(placeholder)
    
    with col2:
        if st.button("Complete"):
            st.session_state.complete_recording = True
else:
    progress_bar = st.progress(0, "Processing Video: 0%")
    
    progress_bar.progress(10, "Processing Video: 10%\t\tGetting Audio Duration")
    duration = librosa.get_duration(path=st.session_state.output_audio_file)

    progress_bar.progress(20, "Processing Video: 20%\t\tGetting FPS")
    fps = st.session_state.frame_count / duration
    
    progress_bar.progress(40, "Processing Video: 40%\t\tInitializing Class Detector")
    obj1 = STT_and_class_detector(st.session_state.output_video_file, "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")  # Define the class object
    
    progress_bar.progress(60, "Processing Video: 60%\t\tTranscribing the Audio")
    transcript = transcribe_audio(st.session_state.output_audio_file)
    
    progress_bar.progress(70, "Processing Video: 70%\t\tDetecting Class")
    obj1.class_detector_GPT('gpt-4', transcript[0])  # Detect class objects using GPT API

    progress_bar.progress(85, "Processing Video: 85%\t\tFinding Timestampes")
    obj1.find_timestamps(transcript[1])
    
    progress_bar.progress(100, "Processing Video: 100%\t\tMaking Class Objects Directories")
    obj1.object_directory(fps, st.session_state.output_directory)  # Make class objects directories

    print(str(st.session_state.frame_count))
    print(str(duration))
    print(str(fps))
    print(str(transcript))
    print(obj1.objects_list)
    print(obj1.phrase_timestamps)

    st.text("Frame Count: " + str(st.session_state.frame_count))
    st.text("Duration: " + str(duration))
    st.text("FPS: " + str(fps))
    st.text("Transcript: " + str(transcript))
    st.text("Objects List: " + str(obj1.objects_list))
    st.text("Phrase Timestamps: " + str(obj1.phrase_timestamps))

button_center_css = """
    <style>
    .center-button {
        display: flex;
        justify-content: center;
    }
    </style>
"""

# Inject the CSS into the Streamlit app
st.markdown(button_center_css, unsafe_allow_html=True)

# Use the class to center the button
st.markdown('<div class="center-button">', unsafe_allow_html=True)
st.button("Next", on_click=nextpage, disabled=(st.session_state.page > 3), key="next")
st.markdown('</div>', unsafe_allow_html=True)

My best guess is that one of the functions that you are calling is returning an array (e.g. cv2.line(…)) and that streamlit magic is then writing that output in the app https://docs.streamlit.io/develop/api-reference/write-magic/magic

If that’s correct, then you can disable magic like this Magic - Streamlit Docs and see if that resolves the issue

It worked. Thank you so much

1 Like

This topic was automatically closed 2 days after the last reply. New replies are no longer allowed.