IndexError: list index out of range

Hii All I have been using streamlit to build a pdf summarisation tool , I am encountering with this error of list out of index

The problem is that once the user select the index number the system crash and display the below error:

File "/opt/homebrew/lib/python3.11/site-packages/streamlit/runtime/scriptrunner/script_runner.py", line 534, in _run_script
    exec(code, module.__dict__)
File "/Users/sandesh/Coding /Python/AI SUMM PBL/jupyter nb/app2.py", line 105, in <module>
    main()
File "/Users/sandesh/Coding /Python/AI SUMM PBL/jupyter nb/app2.py", line 82, in main
    handle_userinput(user_question)
File "/Users/sandesh/Coding /Python/AI SUMM PBL/jupyter nb/app2.py", line 56, in handle_userinput
    response = st.session_state.conversation({'question': user_question})
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 166, in __call__
    raise e
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 160, in __call__
    self._call(inputs, run_manager=run_manager)
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/conversational_retrieval/base.py", line 114, in _call
    answer = self.combine_docs_chain.run(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 293, in run
    return self(kwargs, callbacks=callbacks, tags=tags)[_output_key]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 166, in __call__
    raise e
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 160, in __call__
    self._call(inputs, run_manager=run_manager)
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/combine_documents/base.py", line 84, in _call
    output, extra_return_dict = self.combine_docs(
                                ^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/combine_documents/stuff.py", line 87, in combine_docs
    return self.llm_chain.predict(callbacks=callbacks, **inputs), {}
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/llm.py", line 252, in predict
    return self(kwargs, callbacks=callbacks)[self.output_key]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 166, in __call__
    raise e
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 160, in __call__
    self._call(inputs, run_manager=run_manager)
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/llm.py", line 93, in _call
    return self.create_outputs(response)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/llm.py", line 217, in create_outputs
    result = [
             ^
File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/llm.py", line 220, in <listcomp>
    self.output_key: self.output_parser.parse_result(generation),
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/langchain/schema.py", line 355, in parse_result
    return self.parse(result[0].text)
                      ~~~~~~^^^

this is my code


    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )
    return conversation_chain


def handle_userinput(user_question):
    response = st.session_state.conversation({'question': user_question})
    st.session_state.chat_history = response['chat_history']

     for i, message in enumerate(st.session_state.chat_history):
            if i % 2 == 0:
                st.write(user_template.replace(
                    "{{MSG}}", message.content), unsafe_allow_html=True)
            else:
                st.write(bot_template.replace(
                    "{{MSG}}", message.content), unsafe_allow_html=True)


def main():
    load_dotenv()
    st.set_page_config(page_title="Chat with multiple PDFs",
                       page_icon=":books:")
    st.write(css, unsafe_allow_html=True)

    if "conversation" not in st.session_state:
        st.session_state.conversation = None
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = None

    st.header("Chat with multiple PDFs :books:")
    user_question = st.text_input("Ask a question about your documents:")
    if user_question:
        handle_userinput(user_question)

    with st.sidebar:
        st.subheader("Your documents")
        pdf_docs = st.file_uploader(
            "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
        if st.button("Process"):
            with st.spinner("Processing"):
                # get pdf text
                raw_text = get_pdf_text(pdf_docs)

                # get the text chunks
                text_chunks = get_text_chunks(raw_text)

                # create vector store
                vectorstore = get_vectorstore(text_chunks)

                # create conversation chain
                st.session_state.conversation = get_conversation_chain(
                    vectorstore)


if __name__ == '__main__':
    main()

Hi @SANDESH_BUCHKUL

The provided code snippet seems to be incomplete, could you share a minimal working example of the app so that the community could reproduce the error.

Also youโ€™ve mentioned about selecting the index number, could you elaborate more on this by pointing out which specific code blocks that you are referring to.

Thanks!

Yes I have been getting the same error
โ€™ โ€™ โ€™
Traceback (most recent call last):
File โ€œJ:\Projects\LLM\PDF.venv\Lib\site-packages\streamlit\runtime\scriptrunner\script_runner.pyโ€, line 534, in _run_script
exec(code, module.dict)
File โ€œJ:\Projects\LLM\PDF\index.pyโ€, line 106, in
main()
File โ€œJ:\Projects\LLM\PDF\index.pyโ€, line 98, in main
vectorstore = get_vectorstore(text_chunks)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File โ€œJ:\Projects\LLM\PDF\index.pyโ€, line 37, in get_vectorstore
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File โ€œJ:\Projects\LLM\PDF.venv\Lib\site-packages\langchain_community\vectorstores\faiss.pyโ€, line 915, in from_texts
return cls.__from(
^^^^^^^^^^^
File โ€œJ:\Projects\LLM\PDF.venv\Lib\site-packages\langchain_community\vectorstores\faiss.pyโ€, line 874, in __from
index = faiss.IndexFlatL2(len(embeddings[0]))
~~~~~~~~~~^^^
IndexError: list index out of range
โ€™ โ€™ โ€™

here is the code
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI, ChatGooglePalm
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from HTMLtemplates import css, bot_template, user_template
from langchain.llms import HuggingFaceHub
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

def get_pdf_text(pdf_docs):
text = โ€œโ€
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text

def get_text_chunks(text):
text_splitter = CharacterTextSplitter(
separator=โ€œ\nโ€,
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
return chunks

def get_vectorstore(text_chunks):
# embeddings = OpenAIEmbeddings()
embeddings = HuggingFaceEmbeddings(model_name=โ€œall-MiniLM-L6-v2โ€)
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore

def get_conversation_chain(vectorstore):

llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})

memory = ConversationBufferMemory(
    memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    memory=memory
)
return conversation_chain

def handle_userinput(user_question):
response = st.session_state.conversation({โ€˜questionโ€™: user_question})
st.session_state.chat_history = response[โ€˜chat_historyโ€™]

for i, message in enumerate(st.session_state.chat_history):
    if i % 2 == 0:
        st.write(user_template.replace(
            "{{MSG}}", message.content), unsafe_allow_html=True)
    else:
        st.write(bot_template.replace(
            "{{MSG}}", message.content), unsafe_allow_html=True)

def main():
load_dotenv()
st.set_page_config(page_title=โ€œChat with multiple PDFsโ€,
page_icon=โ€œ:books:โ€)
st.write(css, unsafe_allow_html=True)

if "conversation" not in st.session_state:
    st.session_state.conversation = None
if "chat_history" not in st.session_state:
    st.session_state.chat_history = None

st.header("Chat with multiple PDFs ๐Ÿ•ฎ")
st.caption("Which you can't do with girls when you have a girlfriend ๐Ÿ’€")
user_question = st.text_input("Ask any questions related to the PDF. PS: They don't get offended ๐Ÿ˜‚")
if user_question:
    handle_userinput(user_question)

with st.sidebar:
    st.subheader("Your documents")
    pdf_docs = st.file_uploader(
        "Upload your PDFs here", accept_multiple_files=True)
    if st.button("Start Chat"):
        with st.spinner("Processing"):
            # get pdf text
            raw_text = get_pdf_text(pdf_docs)

            # get the text chunks
            text_chunks = get_text_chunks(raw_text)

            # create vector store
            vectorstore = get_vectorstore(text_chunks)

            # create conversation chain
            st.session_state.conversation = get_conversation_chain(
                vectorstore)

if name == โ€˜mainโ€™:
main()