App rerun on a component hit (even after involving session state)

sathyapriyaa-sketch · February 13, 2024, 1:25pm

from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.indexes import VectorstoreIndexCreator
from langchain_experimental.agents.agent_toolkits.csv.base import create_csv_agent
from langchain.agents.agent_types import AgentType
from langchain.memory import ConversationBufferMemory
from io import BytesIO
import tiktoken
import time
import textwrap
import os
import pandas as pd
import requests
import streamlit as st
import json
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader

B_INST, E_INST = “[INST]”, “[/INST]”
B_SYS, E_SYS = “<>\n”, “\n<>\n\n”
DEFAULT_SYSTEM_PROMPT = “”"
“”"

def wrap_text_preserve_newlines(text, width=110):
# Split the input text into lines based on newline characters
lines = text.split(‘\n’)

       # Wrap each line individually
       wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

       # Join the wrapped lines back together using newline characters
       wrapped_text = '\n'.join(wrapped_lines)

       return wrapped_text

def process_llm_response(llm_response):
return(wrap_text_preserve_newlines(llm_response[‘result’]))
#print(‘\n\nSources:’)
#for source in llm_response[“source_documents”]:
#print(source.metadata[‘source’])

def get_prompt(instruction, new_result_str_modified, new_system_prompt=DEFAULT_SYSTEM_PROMPT):
SYSTEM_PROMPT = B_SYS + new_system_prompt + new_result_str_modified + E_SYS
prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
return prompt_template

def extract_key_value_pairs(data, parent_key=‘’, sep=‘_’):
items =
if isinstance(data, dict):
for key, value in data.items():
new_key = f"{parent_key}{sep}{key}" if parent_key else key
items.extend(extract_key_value_pairs(value, new_key, sep=sep))
else:
items.append((parent_key, data))
return items

Function to extract key-value pairs from JSON

def extract_child_json(json_file):
if json_file:
# Read the content of the uploaded JSON file
file_contents = json_file.read()

    try:
        # Decode the content using 'utf-8' or 'latin-1' (try both)
        decoded_content = file_contents.decode("utf-8")
    except UnicodeDecodeError:
        decoded_content = file_contents.decode("latin-1")
    
    try:
        # Load the JSON content into a Python dictionary
        json_obj = json.loads(decoded_content)
        
        # Return the JSON content as it is without any filtering
        child_json = json_obj
        
        print(type(child_json))
        return child_json
    except json.JSONDecodeError as e:
        st.error("Error: Invalid JSON file")
        return None

def process_pdf(pdf_file):
if pdf_file:
# Read the content of the uploaded PDF file
pdf_contents = pdf_file.read()

    # Save the uploaded PDF file to a temporary location
    with open("temp.pdf", "wb") as temp_pdf:
        temp_pdf.write(pdf_contents)

    # Process the uploaded PDF using Langchain operations
    txt_file_path = "temp.pdf"  # Provide the path to the temporary PDF file
    loader = PyPDFLoader(file_path=txt_file_path)
    data = loader.load()
    
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    processed_data = text_splitter.split_documents(data)

    # Delete the temporary PDF file after processing
    os.remove("temp.pdf")

    return processed_data

def process_with_nanonet(pdf_file):
if pdf_file:
pdf_contents = pdf_file.read()
print(“Here’s your PDF Content:”)
print(pdf_contents)
with open(“temp.pdf”, “wb”) as temp_pdf:
temp_pdf.write(pdf_contents)

#url = ''
url = ''

data = {'file': open("temp.pdf", 'rb')}

response = requests.post(url, auth=requests.auth.HTTPBasicAuth('', ''), files=data)
response_json = response.json()
for page in response_json['result']:
   if page['page'] == 0:
     required_json = page['request_metadata']
     print(required_json)
os.remove("temp.pdf")
return required_json

def get_processing_type():
return “openai”
def main():
processing_type = get_processing_type()
st.title(“Jules BL Checker”)

available_json_files = [os.path.splitext(filename)[0] for filename in os.listdir('.') if filename.endswith('.json')]

# Create a dropdown for the user to select a JSON file without displaying the '.json' extension
selected_json_file_stem = st.selectbox("Select BL Instructions:", available_json_files)

# Get the full filename by adding the '.json' extension back
selected_json_filename = selected_json_file_stem + ".json"

st.write("Upload BL Draft in PDF:")
pdf_file = st.file_uploader("", accept_multiple_files=False, type='pdf')

# Initialize new_result_str_modified before the PROCESS button
new_result_str_modified = ""
show_instructions = st.checkbox("Show BL Instructions", value=False)
show_filled_template = st.checkbox("Show the Processed BL Draft", value=False)

if 'show_feedback' not in st.session_state:
    st.session_state.show_feedback = False

# Add PROCESS button
if st.button("PROCESS"):
    child_json = None
    processed_text = None

    # Attempt to read the JSON file safely
    try:
        with open(selected_json_filename, 'r') as json_file:
            child_json = json.load(json_file)
    except Exception as e:
        st.error(f"An error occurred while reading the JSON file: {e}")

    # Process PDF if a file was uploaded
    if pdf_file:
        file_buffer = BytesIO(pdf_file.read())

        # Reset the pointer of the original pdf_file, if needed
        pdf_file.seek(0)
        processed_text = process_pdf(pdf_file)    

    if child_json:
        # Extract key-value pairs if child_json is loaded successfully
        # Display BL Instructions if the checkbox is checked
        if show_instructions:
            st.write("Here's the BL Instructions:")
            st.json(child_json)
        key_value_pairs = extract_key_value_pairs(child_json)
        result = {key: value for key, value in key_value_pairs}
        result_str = str(result)
        new_result_str_modified = result_str.replace('{', '{{').replace('}', '}}')

    if processed_text:
        api_key = ""
        os.environ["OPENAI_API_KEY"] = api_key
        if processing_type == "openai": 
            t1 = time.perf_counter()        
            # Display the processed text in Streamlit
            #st.write("Processed text:")
            #st.write(processed_text)  # Display processed text 
            # Set the API key as an environment variable
            with st.spinner("Processing your request..."):
                embeddings = OpenAIEmbeddings()
                vectorstore = FAISS.from_documents(processed_text, embedding=embeddings)
            
                sys_prompt = """\ You're a dictionary generator. Fill up the below details of the template from the document given.
                And print the filled template. An example of filled up template is as shown - ."""
                instruction = """CONTEXT:/n/n {context}/n
                Response: Display the filled template """
                #get_prompt(instruction, new_result_str_modified, sys_prompt)
                from langchain.prompts import PromptTemplate
                prompt_template = get_prompt(instruction, new_result_str_modified, sys_prompt)

                prompt = PromptTemplate(
                template=prompt_template, input_variables=["context"]
                )
                print(prompt)
                chain_type_kwargs = {"prompt": prompt}
                llm = ChatOpenAI(temperature=0.7, model_name='gpt-4-turbo-preview')
                memory = ConversationBufferMemory(
                memory_key='chat_history', return_messages=True)
                conversation_chain = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                chain_type_kwargs=chain_type_kwargs,
                retriever=vectorstore.as_retriever(),
                memory=memory
                )
                query = 'display the filled template'
                final_result = conversation_chain({'query': query})
                response_pdf = process_llm_response(final_result) 
                t2 = time.perf_counter()
                print(f"processing with openai took {t2-t1} seconds")       
        elif processing_type == "nanonet":
            file_buffer.seek(0) 
            with st.spinner("Processing your request..."):
                t1 = time.perf_counter()         
                response_pdf = process_with_nanonet(file_buffer)  
                t2 = time.perf_counter()
                print(f"processing with nanonets took {t2-t1} seconds")        
        print('\n\nFilled Template:', response_pdf)
        if response_pdf:
            st.success("Processing complete!")  
            if show_filled_template:
                st.write("Here's the Processed BL Draft:")
                st.write(response_pdf)
            from langchain.chains import LLMChain
            from langchain_core.prompts import PromptTemplate
            prompt_template = "Compare the dictionary {result_pdf} with {result_app} and print the mismatch values in JSON format(Example for keys and values in JSON - 'PRE_CARRIAGE_BY': ['BARCELONA', ''],  'OCEAN_VESSEL_VOYAGE_NO': ['AFIF 020E', '348E'], etc). Output SHOULD be a VALID JSON. REMEMBER not to display code in the answer, if you want you can process the code internally and produce the mismatch by comparing each and every key."
            prompt = PromptTemplate(
            input_variables=["result_pdf", "result_app"], template=prompt_template
            )
            llm = ChatOpenAI(temperature=0.7, model_name='gpt-4-turbo-preview')
            llm = LLMChain(llm=llm, prompt=prompt)
            with st.spinner("Generating response..."):
                # Generation of the comparison between the PDF contents and the JSON instructions
                t3 = time.perf_counter()       
                completion = llm.predict(result_pdf=response_pdf, result_app=new_result_str_modified)
                t4 = time.perf_counter()
                print(f"comparison prompt took {t4-t3} seconds")        

            # Display the completion outcome
            print(completion)
            st.success("Response generated!")
            
            # Remove the "json " prefix from the JSON string
            completion = completion.strip('`').replace('json\n', '').strip()    
            # Parse JSON data into a Python dictionary
            json_data = json.loads(completion)

            # Function to flatten the nested JSON and capture mismatches
            def extract_mismatches(data, key=None):
                mismatches = []
                for k, v in data.items():
                    # Construct a key path to identify nested items
                    new_key = f"{key}.{k}" if key else k
                    # Check if the value is a list with more than one unique value (mismatch)
                    if isinstance(v, list) and len(set(map(str, v))) > 1:
                        mismatches.append({
                            'Field': new_key,
                            'Expected Value': v[0],
                            'Actual Value': v[1]
                        })
                    # If the value is another dictionary, recurse into it
                    elif isinstance(v, dict):
                        mismatches.extend(extract_mismatches(v, new_key))
                return mismatches

            def get_corrected_llm_response(completion, result_pdf, result_app, user_suggestions):
                # Generate a response using the LLMChain with the user's suggestions
                user_corrected_prompt_template = "You're a response corrector. We earlier had you compute the mismatches {completion} comparing {result_pdf} and {result_app}. You've to now take the suggestions of the user: {user_suggestions} and provide the response mismatch (corrected considering user's suggestions). REMEMBER not to display code in the answer, if you want you can process the code internally and produce the corrected mismatch."
                prompt = PromptTemplate(
                    input_variables=["completion", "result_pdf", "result_app", "user_suggestions"], 
                    template=user_corrected_prompt_template
                )
                llm = ChatOpenAI(temperature=0.7, model_name='gpt-4-turbo-preview')
                llm = LLMChain(llm=llm, prompt=prompt)
                
                llm_response = llm.predict(completion=str(completion), result_pdf=response_pdf, result_app=new_result_str_modified, user_suggestions=user_suggestions)
                return llm_response    

            # Using the function to extract mismatches
            mismatches = extract_mismatches(json_data)

            # Convert the mismatches into a DataFrame
            df = pd.DataFrame(mismatches)
            if not df.empty:
               st.write("Hey, these are the mismatches:") 
               st.dataframe(df)
            else:
            # If the DataFrame is empty, print the message
               st.write("Hey, no mismatch found!")
            #user_message = st.text_input("Type your message:", key="user_input")
            if 'corrected_llm_response' not in st.session_state:
                  st.session_state.corrected_llm_response = None    
            with st.form(key='suggestions_form'):
                user_suggestions = st.text_area("Any suggestions that can improve response?", key="user_suggestions")
                submit_button = st.form_submit_button(label='Submit Suggestions') 
            # When the user submits a message, call the LLM
            if submit_button:
                 st.session_state.corrected_llm_response = get_corrected_llm_response(str(completion), result_pdf, result_app, user_suggestions)
            if st.session_state.corrected_llm_response:     
                # Remove the "json " prefix from the JSON string
                 st.session_state.corrected_llm_response = st.session_state.corrected_llm_response.strip('`').replace('json\n', '').strip()    
                 # Parse JSON data into a Python dictionary
                 corrected_json_data = json.loads(st.session_state.corrected_llm_response)
                 corrected_mismatches = extract_mismatches(corrected_json_data)
                 df = pd.DataFrame(corrected_mismatches)
                 if not df.empty:
                    st.write("Hey, these are the corrected mismatches:") 
                    st.dataframe(df)
                 else:
                    # If the DataFrame is empty, print the message
                    st.write("Hey, no mismatch found on corrections!")
            st.session_state.show_feedback = True
from trubrics.integrations.streamlit import FeedbackCollector

collector = FeedbackCollector(
    project="default",
    email="",
    password="",
    )
# Display the DataFrame using Streamlit
if st.session_state.show_feedback:
   collector.st_feedback(
    component="default",
    feedback_type="thumbs",
    model="gpt-3.5-turbo",
    prompt_id=None,  # see prompts to log prompts and model generations
    open_feedback_label='[Optional] Provide additional feedback'
    )

if name == “main”:
main() → here’s the code of my streamlit app, on hit of “submit suggestions”, the app reruns totally with no response – even after involving session state - can somebody help here?

Topic		Replies	Views
App Reruns upon User Input Custom Components session-state , streamlit-cloud	11	2606	January 12, 2023
Undesirable behavior where all actions trigger an app refresh and the question and answer process is rerun Using Streamlit session-state	7	806	August 29, 2023
Speeach-to-Text query. Audio processing retriggers on every state change or user interaction with Streamlit app Using Streamlit session-state , debugging	3	88	November 18, 2024
Flow issue with the app Using Streamlit session-state	3	507	March 5, 2024
New library: streamlit-server-state, a new way to share states across sessions on the server Show the Community!	29	11212	April 16, 2023

App rerun on a component hit (even after involving session state)

Function to extract key-value pairs from JSON

Related topics

Hello there 👋🏻

Cookie settings

Strictly necessary cookies

Performance cookies

Functional cookies

Targeting cookies