I discovered that you can actually pass a custom tqdm progress bar, and we can customize that tqdm bar so that it also updates a streamlit st.progress_bar.
Hereβs a complete example that worked for me:
import streamlit as st
from datasets import load_dataset
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas import EvaluationDataset, SingleTurnSample, evaluate
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import AspectCritic
from tqdm.auto import tqdm as std_tqdm
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))
evaluator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())
test_data = {
"user_input": "summarize given text\nThe company reported an 8% rise in Q3 2024, driven by strong performance in the Asian market. Sales in this region have significantly contributed to the overall growth. Analysts attribute this success to strategic marketing and product localization. The positive trend in the Asian market is expected to continue into the next quarter.",
"response": "The company experienced an 8% increase in Q3 2024, largely due to effective marketing strategies and product adaptation, with expectations of continued growth in the coming quarter.",
}
metric = AspectCritic(
name="summary_accuracy",
llm=evaluator_llm,
definition="Verify if the summary is accurate.",
)
test_data = SingleTurnSample(**test_data)
metric.single_turn_score(test_data)
eval_dataset_raw = load_dataset(
"explodinggradients/earning_report_summary", split="train"
)
eval_dataset = EvaluationDataset.from_hf_dataset(eval_dataset_raw)
st.write("Features in dataset:", eval_dataset.features())
n_samples = len(eval_dataset)
st.write("Total samples in dataset:", n_samples)
progress_bar = st.progress(0, text="Evaluation progress")
class TqdmExt(std_tqdm):
def update(self, n=1):
displayed = super().update(n)
if displayed:
progress_bar.progress(
self.n / n_samples, text=f"Evaluating sample {self.n} of {n_samples}"
)
return displayed
custom_tqdm = TqdmExt()
results = evaluate(eval_dataset, metrics=[metric], _pbar=custom_tqdm)
st.write(results.to_pandas())