Trying to implement context chat engine for my chatbot using llama index HELP!

I am trying to create a chatbot using Llama index that only answers only within the knowledge base that i give it using llama 2-70b model the issue i am facing while trying to using context chat engine i am getting an error:-
NotImplementedError: Messages passed in must be of odd length.

but when i use the condense question chat engine my code works fine its just that the questions it rephrases sometimes dont make any sense that is why it returns I dont know.

here are the code snippet:-

  1. Loading/ embedding of data
PERSIST_DIR = "./storage"  
 
@st.cache_resource(show_spinner=False)
def load_data():
    with st.spinner(text="Loading and indexing the E-books – hang tight! This should take 1-2 minutes."):
        # reader = SimpleDirectoryReader(input_dir="./data", recursive=True)
        # docs = reader.load_data()
        llm = HuggingFaceInferenceAPI(
                generate_kwargs={"temperature": 0.0},
                model_name="meta-llama/Llama-2-7b-chat-hf",
        )
        model_name = "BAAI/bge-large-en"
        model_kwargs = {'device': 'cpu'}
        encode_kwargs = {'normalize_embeddings': False}
        embed_model = HuggingFaceBgeEmbeddings(
                model_name=model_name,
                model_kwargs=model_kwargs,
                encode_kwargs=encode_kwargs
        )
        service_context=ServiceContext.from_defaults(
                    chunk_size=1000,
                    chunk_overlap=100,
                    embed_model=embed_model,
                    llm=llm
        )
        set_global_service_context(service_context)
        if not os.path.exists(PERSIST_DIR):  
            reader = SimpleDirectoryReader(input_dir="./data", recursive=True)
            Settings.llm = llm
            Settings.embed_model = embed_model
            docs = reader.load_data()
            index = VectorStoreIndex.from_documents(documents=docs, 
            service_context=service_context)
            index.storage_context.persist(persist_dir=PERSIST_DIR)
            st.write("LoadEmbedding>>>", index)
            return index
        else:
        # load the existing index
            Settings.llm = llm
            Settings.embed_model = embed_model
            storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
            index = load_index_from_storage(storage_context)
            st.write("StoredEmbedding>>>", index)
            return index
 
 
index = load_data()
  1. initializing the chat engine
if "chat_engine" not in st.session_state.keys(): # Initialize the chat engine
       
        query_engine=(
        index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template, llm=Settings.llm)
 
        )
        from llama_index.core.memory import ChatMemoryBuffer
 
        memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
       # chat_engine = CondenseQuestionChatEngine.from_defaults(
      #      query_engine=query_engine,
       #     condense_question_prompt=custom_prompt,
       #     chat_history=custom_chat_history,
        #    verbose=True,
       # )
         retriever = index.as_retriever()

         chat_engine = ContextChatEngine.from_defaults(
             retriever=retriever,
             memory=memory,
             system_prompt=prompt_template,
             llm=Settings.llm,
             verbose= True
 
         )
        # response = chat_engine.chat("where is mumbai",)
        # st.session_state.messages = []
        st.session_state.chat_engine = chat_engine
 
if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})
 
for message in st.session_state.messages: # Display the prior chat messages
    with st.chat_message(message["role"]):
        st.write(message["content"])
 
# If last message is not from assistant, generate a new response
if st.session_state.messages[-1]["role"] != "assistant":
    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            print(st.session_state.messages)
            response = st.session_state.chat_engine.chat(prompt)
            # response = st.session_state.chat_engine.stream_chat(prompt,chat_engine.chat_history)
            st.write(response.response)
            message = {"role": "assistant", "content": response.response}
            st.session_state.messages.append(message) # Add response to message history

I tried the same code i got from the official llama index website as well for context engine but even that is not working as it is. Here is the link i reffered to:-
https://docs.llamaindex.ai/en/stable/examples/chat_engine/chat_engine_context/?h=context+chat

Just stuck at this point in the project most of the code is working properly just this context chat engine is giving an issue i am fine even working with condense question chat engine if it just rephrases the questions properly.
Kindly help asked this question at many different website but to no avail.
Thanks in advance

Hi @Prathamesh_Sawant,

i am currently trying the same and the documentation of both streamlit and llama index is less than ideal. Have you found a solution to this issue?