Running it locally on ubuntu, I have created a system which tracks my finger movement and when a closed shape is created by the tracking line, it draws a circle over it. On streamlit app, when the circle is drawn, it starts printing null arrays below the frame. I am not printing anything and cannot identify the issue. Sharing the code below:
import streamlit as st
import cv2
import numpy as np
import time
import mediapipe as mp
import ffmpeg
import librosa
from helper_functions import *
from classes import STT_and_class_detector
st.session_state.output_video_file = 'output_video.avi'
st.session_state.output_directory = 'frames_output'
st.session_state.output_audio_file = 'output_audio.wav'
st.session_state.audio_format = 'wav'
def run_recording(placeholder):
    annotations = [[]]
    annotationNumber = -1
    annotationStart = False
    mp_drawing = mp.solutions.drawing_utils
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)
    frame_width = 640
    frame_height = 480
    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(st.session_state.output_video_file, fourcc, 20, (frame_width, frame_height))
    frames = []
    os.makedirs(st.session_state.output_directory, exist_ok=True)
    frame_delay = 0
    counter_list = []
    unique_circles = []
    final_circles = []
    previous_length = 0
    list_of_focus = []
    frame_count = 0
    circle_stay = 0
    last_circle = 0
    ffmpeg_cmd = (
        ffmpeg
        .input('default', format='alsa', channels=1)
        .output(st.session_state.output_audio_file, format=st.session_state.audio_format)
        .overwrite_output()
        .run_async(pipe_stdout=True, pipe_stderr=True)
    )
    tracking_lost_counter = 0
    tracking_lost_threshold = 10
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.flip(frame, 1)
        second_frame = frame.copy()
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)
        if results.multi_hand_landmarks:
            tracking_lost_counter = 0
            for hand_landmarks in results.multi_hand_landmarks:
                index_fingertip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
                x = int(index_fingertip.x * frame.shape[1])
                y = int(index_fingertip.y * frame.shape[0])
                is_highest = True
                for landmark_id, landmark in enumerate(hand_landmarks.landmark):
                    if landmark_id != 8 and landmark.y * frame.shape[0] < y:
                        is_highest = False
                        break
                for landmark_id, landmark in enumerate(hand_landmarks.landmark):
                    if landmark_id == 17 and landmark.y * frame.shape[0] < y:
                        is_highest = True
                        break
                if is_highest:
                    if not annotationStart:
                        annotationStart = True
                        annotationNumber += 1
                        annotations.append([])
                    annotations[annotationNumber].append((x, y))
                    cv2.circle(frame, (x, y), 5, (255, 0, 255), -1)
                else:
                    annotationStart = False
        else:
            tracking_lost_counter += 1
            if tracking_lost_counter > tracking_lost_threshold:
                annotations = [[]]
                annotationNumber = -1
                annotationStart = False
                frame_delay = 0
                counter_list = []
                tracking_lost_counter = 0
        for i, annotation in enumerate(annotations):
            for j in range(len(annotation)):
                if j != 0:
                    cv2.line(frame, annotation[j - 1], annotation[j], (0, 0, 200), 2)
        if len(annotations) > 1:
            list_of_focus = annotations[-2]
        if len(list_of_focus) > 1 and len(list_of_focus) != previous_length:
            num_points = 10
            x_values = np.linspace(list_of_focus[-2][0], list_of_focus[-1][0], num_points)
            y_values = np.linspace(list_of_focus[-2][1], list_of_focus[-1][1], num_points)
            for k in range(num_points):
                counter_list.append((int(x_values[k]), int(y_values[k])))
            previous_length = len(list_of_focus)
        extracted_portion = selected_region(counter_list)
        try:
            circle_values = make_circle(second_frame, extracted_portion)
            if circle_values is not None:
                final_circles.append(circle_values)
            if final_circles:
                [unique_circles.append(item) for item in final_circles if item not in unique_circles]
            filtered_circles = [circle for i, circle in enumerate(unique_circles)
                                if all(center_distance(circle, other_circle) >= 2 for other_circle in unique_circles[:i])]
            if filtered_circles and len(filtered_circles) > 0:
                if len(filtered_circles) > last_circle:
                    circle_stay = 30
                if circle_stay > 0:
                    cv2.ellipse(second_frame, filtered_circles[-1][3], (0, 255, 0), 3)
                    circle_stay -= 1
                last_circle = len(filtered_circles)
            if circle_values is not None:
                frame_delay += 1
                if frame_delay > 5:
                    annotations = [[]]
                    annotationNumber = -1
                    annotationStart = False
                    frame_delay = 0
                    counter_list = []
            frame = cv2.resize(frame, (frame_width, frame_height))
            second_frame = cv2.resize(second_frame, (frame_width, frame_height))
            combined_frame = cv2.addWeighted(frame, 0.5, second_frame, 0.5, 0)
            stframe = cv2.cvtColor(combined_frame, cv2.COLOR_BGR2RGB)
            placeholder.image(stframe, caption="Live Video Recording")
            frames.append(combined_frame)
            out.write(combined_frame)
            frame_filename = os.path.join(st.session_state.output_directory, f'frame_{frame_count}.png')
            cv2.imwrite(frame_filename, second_frame)
            frame_count += 1
            st.session_state.frame_count = frame_count
            if st.session_state.get("complete_recording", False):
                cap.release()
                out.release()
                ffmpeg_cmd.terminate()
                break
        except Exception as e:
            pass
st.title("Object Interaction and Voice Command Interface")
if "page" not in st.session_state:
    st.session_state.page = 0
if 'frame_count' not in st.session_state:
    st.session_state.frame_count = 0
if 'duration' not in st.session_state:
    st.session_state.duration = 0
if 'fps' not in st.session_state:
    st.session_state.fps = 0
if 'transcript' not in st.session_state:
    st.session_state.transcript = 'none'
if 'obj_list' not in st.session_state:
    st.session_state.obj_list = None
if 'obj_timestamp' not in st.session_state:
    st.session_state.obj_timestamp = None
def nextpage(): st.session_state.page += 1
def restart(): st.session_state.page = 0
placeholder = st.empty()
if st.session_state.page == 0:
    placeholder.title("Select Connectivity Type")
    col1, col2 = st.columns(2)
    with col1:
        st.button("WiFi", key="wifi")
    with col2:
        st.button("Ethernet", key="ethernet")
    
elif st.session_state.page == 1:
    placeholder.title("Select Use Case")
    col1, col2, col3 = st.columns(3)
    with col1:
        st.button("Defect Detection", key="defect_detection")
    with col2:
        st.button("Object Counting", key="obj_counting")
    with col3:
        st.button("Cycle Time Tracking", key="cyc_time_tracking")
elif st.session_state.page == 2:
# Replace the chart with several elements:
    with placeholder.container():
        st.text("1. Point to the object")
        time.sleep(2)
        st.text("2. Speak while pointing and tell me what you want to do")
        time.sleep(2)
        st.text("3. Make sure to point your index to the object")
        time.sleep(2)
elif st.session_state.page == 3:
    st.title("Live Drawing")
    col1, col2 = st.columns(2)
    with col1:
        if st.button("Start Recording", key="start_recording"):
            st.text("Video recording started")
            placeholder = st.empty()
            st.session_state.complete_recording = False
            run_recording(placeholder)
    
    with col2:
        if st.button("Complete"):
            st.session_state.complete_recording = True
else:
    progress_bar = st.progress(0, "Processing Video: 0%")
    
    progress_bar.progress(10, "Processing Video: 10%\t\tGetting Audio Duration")
    duration = librosa.get_duration(path=st.session_state.output_audio_file)
    progress_bar.progress(20, "Processing Video: 20%\t\tGetting FPS")
    fps = st.session_state.frame_count / duration
    
    progress_bar.progress(40, "Processing Video: 40%\t\tInitializing Class Detector")
    obj1 = STT_and_class_detector(st.session_state.output_video_file, "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")  # Define the class object
    
    progress_bar.progress(60, "Processing Video: 60%\t\tTranscribing the Audio")
    transcript = transcribe_audio(st.session_state.output_audio_file)
    
    progress_bar.progress(70, "Processing Video: 70%\t\tDetecting Class")
    obj1.class_detector_GPT('gpt-4', transcript[0])  # Detect class objects using GPT API
    progress_bar.progress(85, "Processing Video: 85%\t\tFinding Timestampes")
    obj1.find_timestamps(transcript[1])
    
    progress_bar.progress(100, "Processing Video: 100%\t\tMaking Class Objects Directories")
    obj1.object_directory(fps, st.session_state.output_directory)  # Make class objects directories
    print(str(st.session_state.frame_count))
    print(str(duration))
    print(str(fps))
    print(str(transcript))
    print(obj1.objects_list)
    print(obj1.phrase_timestamps)
    st.text("Frame Count: " + str(st.session_state.frame_count))
    st.text("Duration: " + str(duration))
    st.text("FPS: " + str(fps))
    st.text("Transcript: " + str(transcript))
    st.text("Objects List: " + str(obj1.objects_list))
    st.text("Phrase Timestamps: " + str(obj1.phrase_timestamps))
button_center_css = """
    <style>
    .center-button {
        display: flex;
        justify-content: center;
    }
    </style>
"""
# Inject the CSS into the Streamlit app
st.markdown(button_center_css, unsafe_allow_html=True)
# Use the class to center the button
st.markdown('<div class="center-button">', unsafe_allow_html=True)
st.button("Next", on_click=nextpage, disabled=(st.session_state.page > 3), key="next")
st.markdown('</div>', unsafe_allow_html=True)
