st.header(" Chat with PDF ")
Upload multiple PDF files
uploaded_files = st.file_uploader(“Upload your PDFs”, type=‘pdf’, accept_multiple_files=True)
if st.button(“Process”):
if uploaded_files is not None:
docs =
for pdf in uploaded_files:
pdf_reader = PdfReader(pdf)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20,length_function=len)
chunks = text_splitter.split_text(text=text)
print(f"Chunks for {pdf.name}: {chunks}")
print("0")
# print('**************Implementing Similarity Search Using Vector Database***************************')
# hugging face embeddings
embeddings = HuggingFaceEmbeddings()
persist_directory = 'chromaDB'
# create vector db
vectordb = Chroma.from_texts(texts=chunks,
embedding=embeddings,
persist_directory=persist_directory)
# persist db to disk
vectordb.persist()
# using the persist chroma db
new_db = Chroma(persist_directory=persist_directory,embedding_function=embeddings)
# intantiate retreive object results from multiple indexes from vector database
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
# intantiate LLM model
llm=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512})
# creating chain to answer questions using vector database (chromadb) directly
qa_chain = RetrievalQA.from_chain_type(llm=llm,
chain_type="stuff",
retriever=retriever,
input_key="question")
st.header("Ask your data")
user_q = st.text_area("Enter your questions here")
if st.button("Get Response"):
try:
with st.spinner("Model is working on it..."):
result = qa_chain({"question":user_q}, return_only_outputs=True)
st.subheader('Your response:')
st.write(result['answer'])
st.subheader('Source pages:')
st.write(result['sources'])
except Exception as e:
st.error(f"An error occurred: {e}")
st.error('Oops, the GPT response resulted in an error :( Please try again with a different question.')