If you’re creating a debugging post, please include the following info:
- Share the link to the public app (deployed on Community Cloud).
- Share the link to your app’s public GitHub repository (including a requirements file).
- Share the full text of the error message (not a screenshot).
- Share the Streamlit and Python versions.
1.link of the app: Streamlit
2.link of app Github repository with requirements: GitHub - Stephaniette/NLP_app
3. full text of the error: ```
[nltk_data] Downloading package punkt to /home/appuser/nltk_data…
[nltk_data] Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data] /home/appuser/nltk_data…
[nltk_data] Package stopwords is already up-to-date!
/usr/local/bin/python: No module named spacy
────────────────────── Traceback (most recent call last) ───────────────────────
/mount/src/nlp_app/NLP.py:22 in
19
20 # Ensure the spaCy model is downloaded
21 try:
❱ 22 │ nlp = spacy.load(“en_core_web_sm”)
23 except OSError:
24 │ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_
25 │ nlp = spacy.load("en_core_web_sm")
/home/adminuser/venv/lib/python3.12/site-packages/spacy/init.py:51 in
load
48 │ │ keyed by section values in dot notation.
49 │ RETURNS (Language): The loaded nlp object.
50 │ """
❱ 51 │ return util.load_model(
52 │ │ name,
53 │ │ vocab=vocab,
54 │ │ disable=disable,
/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in
load_model
469 │ │ return load_model_from_path(name, **kwargs) # type: ignore[a
470 │ if name in OLD_MODEL_SHORTCUTS:
471 │ │ raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH
❱ 472 │ raise IOError(Errors.E050.format(name=name))
473
474
475 def load_model_from_package(
────────────────────────────────────────────────────────────────────────────────
OSError: [E050] Can’t find model ‘en_core_web_sm’. It doesn’t seem to be a
Python package or a valid path to a data directory.
During handling of the above exception, another exception occurred:
────────────────────── Traceback (most recent call last) ───────────────────────
/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru
nner/exec_code.py:121 in exec_func_with_error_handling
/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru
nner/script_runner.py:591 in code_to_exec
/mount/src/nlp_app/NLP.py:25 in
22 │ nlp = spacy.load("en_core_web_sm")
23 except OSError:
24 │ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_
❱ 25 │ nlp = spacy.load(“en_core_web_sm”)
26
27
28 def tokenize_text(text):
/home/adminuser/venv/lib/python3.12/site-packages/spacy/init.py:51 in
load
48 │ │ keyed by section values in dot notation.
49 │ RETURNS (Language): The loaded nlp object.
50 │ """
❱ 51 │ return util.load_model(
52 │ │ name,
53 │ │ vocab=vocab,
54 │ │ disable=disable,
/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in
load_model
469 │ │ return load_model_from_path(name, **kwargs) # type: ignore[a
470 │ if name in OLD_MODEL_SHORTCUTS:
471 │ │ raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH
❱ 472 │ raise IOError(Errors.E050.format(name=name))
473
474
475 def load_model_from_package(
────────────────────────────────────────────────────────────────────────────────
OSError: [E050] Can’t find model ‘en_core_web_sm’. It doesn’t seem to be a
Python package or a valid path to a data directory.
2025-02-20 20:14:01.361 503 GET /script-health-check (127.0.0.1) 437.31ms
[nltk_data] Downloading package punkt to /home/appuser/nltk_data…
[nltk_data] Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data] /home/appuser/nltk_data…
[nltk_data] Package stopwords is already up-to-date!
/usr/local/bin/python: No module named spacy
────────────────────── Traceback (most recent call last) ───────────────────────
/mount/src/nlp_app/NLP.py:22 in
19
20 # Ensure the spaCy model is downloaded
21 try:
❱ 22 │ nlp = spacy.load(“en_core_web_sm”)
23 except OSError:
24 │ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_
25 │ nlp = spacy.load("en_core_web_sm")
/home/adminuser/venv/lib/python3.12/site-packages/spacy/init.py:51 in
load
48 │ │ keyed by section values in dot notation.
49 │ RETURNS (Language): The loaded nlp object.
50 │ """
❱ 51 │ return util.load_model(
52 │ │ name,
53 │ │ vocab=vocab,
54 │ │ disable=disable,
/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in
load_model
469 │ │ return load_model_from_path(name, **kwargs) # type: ignore[a
470 │ if name in OLD_MODEL_SHORTCUTS:
471 │ │ raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH
❱ 472 │ raise IOError(Errors.E050.format(name=name))
473
474
475 def load_model_from_package(
────────────────────────────────────────────────────────────────────────────────
OSError: [E050] Can’t find model ‘en_core_web_sm’. It doesn’t seem to be a
Python package or a valid path to a data directory.
During handling of the above exception, another exception occurred:
────────────────────── Traceback (most recent call last) ───────────────────────
/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru
nner/exec_code.py:121 in exec_func_with_error_handling
/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru
nner/script_runner.py:591 in code_to_exec
/mount/src/nlp_app/NLP.py:25 in
22 │ nlp = spacy.load("en_core_web_sm")
23 except OSError:
24 │ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_
❱ 25 │ nlp = spacy.load(“en_core_web_sm”)
26
27
28 def tokenize_text(text):
/home/adminuser/venv/lib/python3.12/site-packages/spacy/init.py:51 in
load
48 │ │ keyed by section values in dot notation.
49 │ RETURNS (Language): The loaded nlp object.
50 │ """
❱ 51 │ return util.load_model(
52 │ │ name,
53 │ │ vocab=vocab,
54 │ │ disable=disable,
/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in
load_model
469 │ │ return load_model_from_path(name, **kwargs) # type: ignore[a
470 │ if name in OLD_MODEL_SHORTCUTS:
471 │ │ raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH
❱ 472 │ raise IOError(Errors.E050.format(name=name))
473
474
475 def load_model_from_package(
────────────────────────────────────────────────────────────────────────────────
OSError: [E050] Can’t find model ‘en_core_web_sm’. It doesn’t seem to be a
Python package or a valid path to a data directory.
2025-02-20 20:14:06.373 503 GET /script-health-check (127.0.0.1) 438.65ms
[nltk_data] Downloading package punkt to /home/appuser/nltk_data...
[nltk_data] Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data] /home/appuser/nltk_data...
[nltk_data] Package stopwords is already up-to-date!
/usr/local/bin/python: No module named spacy
────────────────────── Traceback (most recent call last) ───────────────────────
/mount/src/nlp_app/NLP.py:22 in <module>
19
20 # Ensure the spaCy model is downloaded
21 try:
❱ 22 │ nlp = spacy.load("en_core_web_sm")
23 except OSError:
24 │ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_
25 │ nlp = spacy.load("en_core_web_sm")
/home/adminuser/venv/lib/python3.12/site-packages/spacy/__init__.py:51 in
load
48 │ │ keyed by section values in dot notation.
49 │ RETURNS (Language): The loaded nlp object.
50 │ """
❱ 51 │ return util.load_model(
52 │ │ name,
53 │ │ vocab=vocab,
54 │ │ disable=disable,
/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in
load_model
469 │ │ return load_model_from_path(name, **kwargs) # type: ignore[a
470 │ if name in OLD_MODEL_SHORTCUTS:
471 │ │ raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH
❱ 472 │ raise IOError(Errors.E050.format(name=name))
473
474
475 def load_model_from_package(
────────────────────────────────────────────────────────────────────────────────
OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a
Python package or a valid path to a data directory.
During handling of the above exception, another exception occurred:
────────────────────── Traceback (most recent call last) ───────────────────────
/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru
nner/exec_code.py:121 in exec_func_with_error_handling
/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru
nner/script_runner.py:591 in code_to_exec
/mount/src/nlp_app/NLP.py:25 in <module>
22 │ nlp = spacy.load("en_core_web_sm")
23 except OSError:
24 │ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_
❱ 25 │ nlp = spacy.load("en_core_web_sm")
26
27
28 def tokenize_text(text):
/home/adminuser/venv/lib/python3.12/site-packages/spacy/__init__.py:51 in
load
48 │ │ keyed by section values in dot notation.
49 │ RETURNS (Language): The loaded nlp object.
50 │ """
❱ 51 │ return util.load_model(
52 │ │ name,
53 │ │ vocab=vocab,
54 │ │ disable=disable,
/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in
load_model
469 │ │ return load_model_from_path(name, **kwargs) # type: ignore[a
470 │ if name in OLD_MODEL_SHORTCUTS:
471 │ │ raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH
❱ 472 │ raise IOError(Errors.E050.format(name=name))
473
474
475 def load_model_from_package(
────────────────────────────────────────────────────────────────────────────────
OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a
Python package or a valid path to a data directory.
2025-02-20 20:14:01.361 503 GET /script-health-check (127.0.0.1) 437.31ms
[nltk_data] Downloading package punkt to /home/appuser/nltk_data...
[nltk_data] Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data] /home/appuser/nltk_data...
[nltk_data] Package stopwords is already up-to-date!
/usr/local/bin/python: No module named spacy
────────────────────── Traceback (most recent call last) ───────────────────────
/mount/src/nlp_app/NLP.py:22 in <module>
19
20 # Ensure the spaCy model is downloaded
21 try:
❱ 22 │ nlp = spacy.load("en_core_web_sm")
23 except OSError:
24 │ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_
25 │ nlp = spacy.load("en_core_web_sm")
/home/adminuser/venv/lib/python3.12/site-packages/spacy/__init__.py:51 in
load
48 │ │ keyed by section values in dot notation.
49 │ RETURNS (Language): The loaded nlp object.
50 │ """
❱ 51 │ return util.load_model(
52 │ │ name,
53 │ │ vocab=vocab,
54 │ │ disable=disable,
/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in
load_model
469 │ │ return load_model_from_path(name, **kwargs) # type: ignore[a
470 │ if name in OLD_MODEL_SHORTCUTS:
471 │ │ raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH
❱ 472 │ raise IOError(Errors.E050.format(name=name))
473
474
475 def load_model_from_package(
────────────────────────────────────────────────────────────────────────────────
OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a
Python package or a valid path to a data directory.
During handling of the above exception, another exception occurred:
────────────────────── Traceback (most recent call last) ───────────────────────
/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru
nner/exec_code.py:121 in exec_func_with_error_handling
/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru
nner/script_runner.py:591 in code_to_exec
/mount/src/nlp_app/NLP.py:25 in <module>
22 │ nlp = spacy.load("en_core_web_sm")
23 except OSError:
24 │ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_
❱ 25 │ nlp = spacy.load("en_core_web_sm")
26
27
28 def tokenize_text(text):
/home/adminuser/venv/lib/python3.12/site-packages/spacy/__init__.py:51 in
load
48 │ │ keyed by section values in dot notation.
49 │ RETURNS (Language): The loaded nlp object.
50 │ """
❱ 51 │ return util.load_model(
52 │ │ name,
53 │ │ vocab=vocab,
54 │ │ disable=disable,
/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in
load_model
469 │ │ return load_model_from_path(name, **kwargs) # type: ignore[a
470 │ if name in OLD_MODEL_SHORTCUTS:
471 │ │ raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH
❱ 472 │ raise IOError(Errors.E050.format(name=name))
473
474
475 def load_model_from_package(
────────────────────────────────────────────────────────────────────────────────
OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a
Python package or a valid path to a data directory.
2025-02-20 20:14:06.373 503 GET /script-health-check (127.0.0.1) 438.65ms
4. streamlit version: import streamlit as st # Create the web-based interactive UI
import nltk # Provides tokenization of words and sentences
# Downloading required nltk files
nltk.download('punkt')
nltk.download('stopwords')
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
import spacy # Handles part-of-speech tagging
from textblob import TextBlob # Perform sentiment analysis
import pdfplumber # Extract text from PDF documents
from gtts import gTTS # Convert text into audio/speech
import os
#nlp = spacy.load('en_core_web_sm') # python -m spacy download en_core_web_sm
import subprocess
import spacy
# Ensure the spaCy model is downloaded
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
def tokenize_text(text):
words = word_tokenize(text)
sentences = sent_tokenize(text)
return words, sentences
def remove_stopwords(text):
stop_words = set(stopwords.words('english'))
words = word_tokenize(text)
filtered_text = [word for word in words if word.lower() not in stop_words]
return " ".join(filtered_text)
def pos_tagging(text):
doc = nlp(text)
return [(token.text, token.pos_) for token in doc]
def name_entity_recognition(text):
doc = nlp(text)
return [(ent.text, ent.label_) for ent in doc.ents]
def sentiment_analysis(text):
analysis = TextBlob(text) # Perform sentiment analysis
return analysis.sentiment.polarity # Polarity ranges from (-1 to 1)
def pdf_to_audio(pdf_file):
pdf_text = "" # Initialize empty string for text storage
with pdfplumber.open(pdf_file) as pdf:
for page in pdf.pages:
text = page.extract_text()
if text:
pdf_text += text + " " # Append text correctly
if not pdf_text.strip():
return None # Handle empty PDF content
tts = gTTS(text=pdf_text, lang='fr')
audio_path = "audiobook.mp3"
tts.save(audio_path)
return audio_path
# Streamlit App
st.set_page_config(page_title="NLP Mini Project", layout="wide") # Set the page title and layout
st.title("NLP Mini Project") # Set the heading
st.write("Explore various NLP tasks with this interactive app")
option = st.sidebar.selectbox("Select an NLP task:", ['Tokenization', 'Stop Word Removal', 'POS Tagging', 'Name Entity Recognition', 'Sentiment Analysis', 'PDF to Audio'])
if option == "PDF to Audio":
st.header("PDF to Audio Conversion")
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"], help="Only text-based PDFs are supported")
if uploaded_file is not None:
audio_file = pdf_to_audio(uploaded_file)
if audio_file:
st.audio(audio_file, format='audio/mp3')
st.success("Audio Generated Successfully!")
else:
st.error("Could not extract text from the PDF. Ensure it is not a scanned document")
else:
st.header(f'{option}') # Display the selected NLP task
text_input = st.text_area('Enter text here') # User input
if st.button('Run NLP task'):
if not text_input.strip():
st.warning('Please enter some text before running an NLP task')
else:
if option == 'Tokenization':
words, sentences = tokenize_text(text_input)
st.write('Words:', words)
st.write('Sentences:', sentences)
elif option == 'Stop Word Removal':
st.write('Filtered Text:', remove_stopwords(text_input))
elif option == 'POS Tagging':
st.write('POS Tags:', pos_tagging(text_input))
elif option == 'Name Entity Recognition':
st.write('Named Entities:', name_entity_recognition(text_input))
elif option == 'Sentiment Analysis':
polarity = sentiment_analysis(text_input)
st.write('Sentiment Score:', polarity)