I am creating a bot which will Write stories using Langchain agent. It will use Text to speech for telling out the story and Speech to text for taking some feedbacks from the user after it has framed a paragraph.
code:
memory = ConversationBufferWindowMemory(return_messages=True, memory_key='chat_history', input_key='Topic', k=1)
prompt_template = ChatPromptTemplate.from_messages(
[
("system", "You are an Story writer bot. Take users feed back after every paraghraph you frame"),
MessagesPlaceholder(variable_name="chat_history"),
("user", "{Feedback}"),
MessagesPlaceholder(variable_name="agent_scratchpad"),
]
)
# Create the agent
agent = (
{
"JD": lambda x: x["JD"],
"agent_scratchpad": lambda x: format_to_openai_tool_messages(x["intermediate_steps"]),
"chat_history": lambda x: x["chat_history"],
}
| prompt_template
| llm_with_tools
| OpenAIToolsAgentOutputParser()
)
# AgentExecutor
agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=True)
Abstract = st.text_area("Enter some Abstract for the story")
# Store the state for the first question
if "first_paragraph" not in st.session_state:
st.session_state.first_paragraph = False
# Button to start the interview
if st.button("Start Framing"):
if JD and not st.session_state.first_paragraph:
# Generate the first paragraph
question = agent_executor.invoke({"Topic": Topic})['output']
st.session_state.first_paragraph = para
st.write(f"AI Story teller: {para}")
text_to_speech(para)
st.session_state.first_question_asked = True
elif not JD:
st.write("Please provide an Abstract.")
# Button for the user to provide a response
if st.session_state.first_paragraph:
#user_response = speech_to_text()
text = streamlit_mic_recorder.speech_to_text(language='en', use_container_width=True, just_once=True, key='STT')
if text:
st.session_state.text_received.append(text)
for text in st.session_state.text_received:
st.text(text)
if text is not None:
user_response = text
st.write(f"You: {user_response}")
# Generate the next question based on the user's input
next_paragraph = agent_executor.invoke({"Topic": Topic, "input": user_response})['output']
st.write(f"AI Interviewer: {next_question}")
text_to_speech(next_paragraph)```
The problem is in speech to text for which i am using streamlit_mic_recorder.speech_to_text. Here i want the session_state to wait until the user fully finishes talking.
But with the current code, the streamlit_mic_recorder returns None initially.
I want the state to be based on button, so that until user does not stop the recording, the streamlit_mic_recorder should wait to capture the full audio.