I am trying use the LIDA Python API with Streamlit. I have a form in the sidebar and that collections the initial information to create the LIDA object. Once I create it, I have a second form that asks the user to select the generated question. But in the second form, it can’t access the LIDA object.
I am confused about how to fix the scope of my LIDA object.
Here is my code:
import streamlit as st
from lida import Manager, TextGenerationConfig, llm
from lida.datamodel import Goal
import os
import pandas as pd
models = ["gpt-4", "gpt-3.5-turbo", "gpt-3.5-turbo-16k"]
datasets = [
{"label": "Cars", "url": "https://raw.githubusercontent.com/uwdata/draco/master/data/cars.csv"},
{"label": "Iris", "url": "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"},
{"label": "Titanic", "url": "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv"},
{"label": "Penguins", "url": "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"},
]
summarization_methods = [
{"label": "llm",
"description":
"Uses the LLM to generate annotate the default summary, adding details such as semantic types for columns and dataset description"},
{"label": "default",
"description": "Uses dataset column statistics and column names as the summary"},
{"label": "columns", "description": "Uses the dataset column names as the summary"}
]
st.write("## Exploratory Data Analysis with LIDA 📊 :bulb:")
if 'form_one_complete' not in st.session_state:
st.session_state['form_one_complete'] = False
with st.sidebar:
with st.form(key ='Form1'):
"""
initial form in sidebar to collect inputs to create LIDA object
"""
openai_key = st.text_input("Enter OpenAI API key:")
selected_model = st.selectbox(
'Choose a model',
options=models,
index=1
)
temperature = st.slider(
"Temperature",
min_value=0.0,
max_value=1.0,
value=0.0)
use_cache = st.checkbox("Use cache", value=True)
selected_dataset_label = st.selectbox(
'Choose a dataset',
options=[dataset["label"] for dataset in datasets],
index=1
)
num_goals = st.slider(
"Number of goals to generate",
min_value=1,
max_value=10,
value=4)
selected_method_label = st.selectbox(
'Choose a method',
options=[method["label"] for method in summarization_methods],
index=0
)
selected_method = summarization_methods[[
method["label"] for method in summarization_methods].index(selected_method_label)]["label"]
# add description of selected method in very small font to sidebar
selected_summary_method_description = summarization_methods[[
method["label"] for method in summarization_methods].index(selected_method_label)]["description"]
if selected_method:
st.markdown(
f"<span> {selected_summary_method_description} </span>",
unsafe_allow_html=True)
submit_button = st.form_submit_button(label='Submit')
""" create LIDA object after first submit and update session state """
if submit_button:
st.session_state['form_one_complete'] = True
lida = Manager(text_gen=llm("openai", api_key=openai_key))
textgen_config = TextGenerationConfig(
n=1,
temperature=temperature,
model=selected_model,
use_cache=use_cache)
# display lida summary of data
st.write("## Summary")
# **** lida.summarize *****
summary = lida.summarize(
selected_dataset,
summary_method=selected_method,
textgen_config=textgen_config)
if "dataset_description" in summary:
st.write(summary["dataset_description"])
if "fields" in summary:
fields = summary["fields"]
nfields = []
for field in fields:
flatted_fields = {}
flatted_fields["column"] = field["column"]
# flatted_fields["dtype"] = field["dtype"]
for row in field["properties"].keys():
if row != "samples":
flatted_fields[row] = field["properties"][row]
else:
flatted_fields[row] = str(field["properties"][row])
# flatted_fields = {**flatted_fields, **field["properties"]}
nfields.append(flatted_fields)
nfields_df = pd.DataFrame(nfields)
st.write(nfields_df)
else:
st.write(str(summary))```
""" create second form to select goals """
if st.session_state['form_one_complete']:
with st.form(key ='GoalForm'):
goals = lida.goals(summary, n=num_goals, textgen_config=textgen_config)
default_goal = goals[0].question
goal_questions = [goal.question for goal in goals]
selected_goal = st.selectbox('Choose a generated goal',
options=goal_questions, index=0)
if st.form_submit_button("Submit"):
st.write("## Goal")
I get the error:
NameError: name 'lida' is not defined
Traceback:
File "/Users/indapa/miniconda3/lib/python3.9/site-packages/streamlit/scriptrunner/script_runner.py", line 557, in _run_script
exec(code, module.__dict__)
File "/Users/indapa/lida-streamlit/main2.py", line 145, in <module>
goals = lida.goals(summary, n=num_goals, textgen_config=textgen_config)
```
I'm using Streamlit, version 1.29.0 with Python 3.11
data:image/s3,"s3://crabby-images/728cf/728cf84743b471aa6422f01e894a6bdcecf232dc" alt="Screen Shot 2024-01-25 at 5.22.44 AM|690x415"
data:image/s3,"s3://crabby-images/a1b64/a1b646888b139301102c92157207697917e4c1fe" alt="Screen Shot 2024-01-25 at 5.22.32 AM|690x435"