Hello, I created a chatbot using Streamlit recommended tutorial, its all working nice and lovely, but for some reason the chatbot loses context in message history, what I mean by this is:
- Ask it a question who is the main Caracter of the book - Bot answers correctly
- Ask how old is he - Bot will view question as standalone questions and will not know whose age you are asking about, it loses context on question chain.
I am uncertain if Streamlit is somehow losing session data on each new question or not.
Here is the code:
def parse_pdf(file_path: str) -> List[str]:
loader = PyMuPDFLoader(file_path)
data = loader.load()
output = [doc.page_content for doc in data]
return output
def extract_text(file_path: str) -> List[str]:
try:
if file_path.endswith('.pdf'):
return parse_pdf(file_path)
else:
print(f"Unsupported file type: {file_path}")
return []
except Exception as e:
print(f"Error processing {file_path}: {e}")
return []
def text_to_docs(text: List[str]) -> List[str]:
combined_text = ' '.join(text)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
# separators=["\n\n", "\n", ".", "!", "?",","," "],
chunk_overlap=200,
)
return text_splitter.split_text(combined_text)
all_texts = []
for dirpath, dirnames, filenames in os.walk(data_path):
for filename in filenames:
file_path = os.path.join(dirpath, filename)
print(f"processing {file_path}")
try:
texts = extract_text(file_path)
all_texts.extend(texts)
except Exception as e:
print(f"Error processing {file_path}: {e}") # Error level log
documents = text_to_docs(all_texts)
vector_store = FAISS.from_texts(documents, embedding=embeddings)
vector_store.save_local(faiss_file_path)
if os.path.exists(faiss_file_path):
vector_store = FAISS.load_local(faiss_file_path, embeddings)
_template = """ given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
template = """You are an helpful AI assistant, if someone says HI, hello or any other greeting, try to answer in polite and mannered way as a human would.
Once asked the question try to answer with as much details as possible.
Question: {question}
=========
{context}
=========
Answer in Markdown:"""
QA_PROMPT = PromptTemplate(template=template, input_variables=[
"question", "context"])
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
llm = ChatAnthropic(temperature=temperature, max_tokens=1000, model_name=model_name="claude-2")
qa = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(),condense_question_prompt=CONDENSE_QUESTION_PROMPT,memory=memory,combine_docs_chain_kwargs={"prompt": QA_PROMPT})
if "messages" not in st.session_state:
st.session_state.messages = []
if not st.session_state.messages:
welcome_message = {"role": "assistant", "content": "Good day I am helpful chatbot please ask me anything"}
st.session_state.messages.append(welcome_message)
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("State your question"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
result = qa.invoke({"question": prompt, "chat_history": [(message["role"], message["content"]) for message in st.session_state.messages]})
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = result["answer"]
message_placeholder.markdown(full_response + "|")
col1, col2, col3, col4 = st.columns([3, 3, 0.5, 0.5])
with col3:
with st.container():
st.button(":thumbsup:", on_click=save_likes_to_txt)
with col4:
with st.container():
st.button(":thumbsdown:", on_click=save_dislikes_to_txt)
col3.write(' ')
col4.write(' ')
message_placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})