Hello everybody, i am having an error in my current app and would like some advice!
My app has been running for some time, but for some reason this week i got a Lookup error with the following message:
Lookup error:
File "/home/adminuser/venv/lib/python3.9/site-packages/streamlit/runtime/scriptrunner/exec_code.py", line 88, in exec_func_with_error_handling
result = func()
File "/home/adminuser/venv/lib/python3.9/site-packages/streamlit/runtime/scriptrunner/script_runner.py", line 590, in code_to_exec
exec(code, module.__dict__)
File "/mount/src/spotchecks/SpotChecks.py", line 754, in <module>
run()
File "/mount/src/spotchecks/SpotChecks.py", line 751, in run
tab_SC()
File "/mount/src/spotchecks/SpotChecks.py", line 296, in tab_SC
checked_df = check_dataframe_similarity(df, similarity=True)
File "/mount/src/spotchecks/SpotChecks.py", line 143, in check_dataframe_similarity
df[section + '_processed'] = df[section].apply(process_text)
File "/home/adminuser/venv/lib/python3.9/site-packages/pandas/core/series.py", line 4917, in apply
return SeriesApply(
File "/home/adminuser/venv/lib/python3.9/site-packages/pandas/core/apply.py", line 1427, in apply
return self.apply_standard()
File "/home/adminuser/venv/lib/python3.9/site-packages/pandas/core/apply.py", line 1507, in apply_standard
mapped = obj._map_values(
File "/home/adminuser/venv/lib/python3.9/site-packages/pandas/core/base.py", line 921, in _map_values
return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert)
File "/home/adminuser/venv/lib/python3.9/site-packages/pandas/core/algorithms.py", line 1743, in map_array
return lib.map_infer(values, mapper, convert=convert)
File "lib.pyx", line 2972, in pandas._libs.lib.map_infer
File "/mount/src/spotchecks/SpotChecks.py", line 102, in process_text
tokens = word_tokenize(text)
File "/home/adminuser/venv/lib/python3.9/site-packages/nltk/tokenize/__init__.py", line 142, in word_tokenize
sentences = [text] if preserve_line else sent_tokenize(text, language)
File "/home/adminuser/venv/lib/python3.9/site-packages/nltk/tokenize/__init__.py", line 119, in sent_tokenize
tokenizer = _get_punkt_tokenizer(language)
File "/home/adminuser/venv/lib/python3.9/site-packages/nltk/tokenize/__init__.py", line 105, in _get_punkt_tokenizer
return PunktTokenizer(language)
File "/home/adminuser/venv/lib/python3.9/site-packages/nltk/tokenize/punkt.py", line 1744, in __init__
self.load_lang(lang)
File "/home/adminuser/venv/lib/python3.9/site-packages/nltk/tokenize/punkt.py", line 1749, in load_lang
lang_dir = find(f"tokenizers/punkt_tab/{lang}/")
File "/home/adminuser/venv/lib/python3.9/site-packages/nltk/data.py", line 579, in find
raise LookupError(resource_not_found)
my requirements file has the following versions:
streamlit
openpyxl
pandas
spacy
numpy
en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.1.0/en_core_web_lg-3.1.0.tar.gz
scikit-learn
nltk
streamlit_option_menu
wordcloud
xlrd
and inside my app i am using the following libraries:
import streamlit as st
import pandas as pd
import numpy as np
import io
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
nltk.download(‘stopwords’)
nltk.download(‘punkt’)
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
from streamlit_option_menu import option_menu