Using LCEL memory results in the printing of system messages.. why?

import streamlit as st
.....
class ChatCallbackHandler(BaseCallbackHandler):
		message = ""
  		def on_llm_start(self, *args, **kwargs):
				self.message_box = st.empty() 
				with st.sidebar:
						st.write("llm started!")
  
		def on_llm_end(self, *args, **kwargs):
				save_message(self.message, "ai") 
    
				with st.sidebar:
						st.write("llm ended!")
        
		def on_llm_new_token(self, token, *args, **kwargs):
				self.message += token 
				self.message_box.markdown(self.message)
   

llm = ChatOpenAI(
  temperature=0.1,
  streaming = True,
  callbacks=[
		ChatCallbackHandler(),	
	]
)

# LCEL based memory
@st.cache_resource
def init_memory(_llm):
		return ConversationSummaryBufferMemory(
				llm=_llm,
				max_token_limit=120,
				return_messages=True,  
		)
  
memory = init_memory(llm)
  

def load_memory(_):
    return memory.load_memory_variables({})["history"]

@st.cache_data(show_spinner="Embedding file...")  
def embed_file(file):
    file_content = file.read()
    file_path = f"./.cache/files/{file.name}"

    with open(file_path, "wb") as f:
        f.write(file_content)
        cache_dir = LocalFileStore(f"./.cache/embeddings/{file.name}")

        splitter = CharacterTextSplitter.from_tiktoken_encoder(
            separator="\n",  
            chunk_size=600,
            chunk_overlap=100,
        )

        loader = UnstructuredFileLoader(file_path) 
        docs = loader.load_and_split(
            text_splitter=splitter
        ) 

        embeddings = OpenAIEmbeddings() 
        cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
            embeddings, cache_dir
        )

        vectorstore = FAISS.from_documents(
            docs, cached_embeddings
        )  

        retriever = vectorstore.as_retriever() 
        return retriever


if "messages" not in st.session_state:
    st.session_state["messages"] = []

def save_message(message, role):
		st.session_state["messages"].append({"message": message, "role": role})

#
def send_message(message, role, save=True):
    with st.chat_message(role):
        st.markdown(message)
    if save:
        save_message(message, role)


def paint_history():
    for message in st.session_state["messages"]:
        send_message(
            message["message"],
            message["role"],
            save=False,
        )

def format_docs(docs):
		return "\n\n".join(document.page_content for document in docs)


prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
						Answer the question using ONLY the following context.
						If you don't know the answer just say you dont't know.
						Don't make anything up.

						Context: {context}
						""",
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)


st.title("Document GPT")
st.markdown(
    """
Welcome!!            
Use this chatbot to ask questions to an AI about your files!
            
"""
)

with st.sidebar:  
    file = st.file_uploader(
        "Upload a .txt, .pdf or .docx file",
        type=["txt", "pdf", "docx", "csv"],
    )

if file:
    # st.write(file)
    retriever = embed_file(file)
    send_message("I'm ready! Ask away!", "ai", save=False)
    paint_history()

    message = st.chat_input("Ask anything about your file..")
    if message:
        send_message(message, "human") 
        
        chain = {
						"context": retriever | RunnableLambda(format_docs),
						"question": RunnablePassthrough(),
						"history": RunnableLambda(load_memory), 
				} | prompt | llm
        
        with st.chat_message("ai"):
          response = chain.invoke(message)

        memory.save_context({"input":message}, {"output":response.content}) 
        print(memory.load_memory_variables({})["history"])
        
        
else:
    st.session_state["messages"] = [] 
    memory.clear()
    memory.save_context(
        {"input": ""},
        {"output": ""},
    )

I used LCEL memory to add more memory.
At first, the messages were printing o

ut fine, but every time a summary of the memory occurs, system messages are definitely being printed out.

It also seems like the callback is executed one more time.
I’ve been looking at it for a while and I can’t figure out the cause.
Help me… please…

This topic was automatically closed 180 days after the last reply. New replies are no longer allowed.