Streamlit-Cloud will not download a spacy model during the building phase

If you’re creating a debugging post, please include the following info:

  1. Share the link to the public app (deployed on Community Cloud).
  2. Share the link to your app’s public GitHub repository (including a requirements file).
  3. Share the full text of the error message (not a screenshot).
  4. Share the Streamlit and Python versions.

1.link of the app: Streamlit
2.link of app Github repository with requirements: GitHub - Stephaniette/NLP_app
3. full text of the error: ```
[nltk_data] Downloading package punkt to /home/appuser/nltk_data…


[nltk_data] Package punkt is already up-to-date!


[nltk_data] Downloading package stopwords to


[nltk_data] /home/appuser/nltk_data…


[nltk_data] Package stopwords is already up-to-date!


/usr/local/bin/python: No module named spacy


────────────────────── Traceback (most recent call last) ───────────────────────


/mount/src/nlp_app/NLP.py:22 in



 19                                                                         

 20 # Ensure the spaCy model is downloaded                                  

 21 try:                                                                    

❱ 22 │ nlp = spacy.load(“en_core_web_sm”)


 23 except OSError:                                                         

 24 │   subprocess.run(["python", "-m", "spacy", "download", "en_core_web_  

 25 │   nlp = spacy.load("en_core_web_sm")                                  


/home/adminuser/venv/lib/python3.12/site-packages/spacy/init.py:51 in


load



48 │   │   keyed by section values in dot notation.                         

49 │   RETURNS (Language): The loaded nlp object.                           

50 │   """                                                                  

❱ 51 │ return util.load_model(


52 │   │   name,                                                            

53 │   │   vocab=vocab,                                                     

54 │   │   disable=disable,                                                 


/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in


load_model



 469 │   │   return load_model_from_path(name, **kwargs)  # type: ignore[a  

 470 │   if name in OLD_MODEL_SHORTCUTS:                                    

 471 │   │   raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH  

❱ 472 │ raise IOError(Errors.E050.format(name=name))


 473                                                                        

 474                                                                        

 475 def load_model_from_package(                                           

────────────────────────────────────────────────────────────────────────────────


OSError: [E050] Can’t find model ‘en_core_web_sm’. It doesn’t seem to be a


Python package or a valid path to a data directory.



During handling of the above exception, another exception occurred:



────────────────────── Traceback (most recent call last) ───────────────────────


/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru


nner/exec_code.py:121 in exec_func_with_error_handling



/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru


nner/script_runner.py:591 in code_to_exec



/mount/src/nlp_app/NLP.py:25 in



 22 │   nlp = spacy.load("en_core_web_sm")                                  

 23 except OSError:                                                         

 24 │   subprocess.run(["python", "-m", "spacy", "download", "en_core_web_  

❱ 25 │ nlp = spacy.load(“en_core_web_sm”)


 26                                                                         

 27                                                                         

 28 def tokenize_text(text):                                                


/home/adminuser/venv/lib/python3.12/site-packages/spacy/init.py:51 in


load



48 │   │   keyed by section values in dot notation.                         

49 │   RETURNS (Language): The loaded nlp object.                           

50 │   """                                                                  

❱ 51 │ return util.load_model(


52 │   │   name,                                                            

53 │   │   vocab=vocab,                                                     

54 │   │   disable=disable,                                                 


/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in


load_model



 469 │   │   return load_model_from_path(name, **kwargs)  # type: ignore[a  

 470 │   if name in OLD_MODEL_SHORTCUTS:                                    

 471 │   │   raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH  

❱ 472 │ raise IOError(Errors.E050.format(name=name))


 473                                                                        

 474                                                                        

 475 def load_model_from_package(                                           

────────────────────────────────────────────────────────────────────────────────


OSError: [E050] Can’t find model ‘en_core_web_sm’. It doesn’t seem to be a


Python package or a valid path to a data directory.


2025-02-20 20:14:01.361 503 GET /script-health-check (127.0.0.1) 437.31ms


[nltk_data] Downloading package punkt to /home/appuser/nltk_data…


[nltk_data] Package punkt is already up-to-date!


[nltk_data] Downloading package stopwords to


[nltk_data] /home/appuser/nltk_data…


[nltk_data] Package stopwords is already up-to-date!


/usr/local/bin/python: No module named spacy


────────────────────── Traceback (most recent call last) ───────────────────────


/mount/src/nlp_app/NLP.py:22 in



 19                                                                         

 20 # Ensure the spaCy model is downloaded                                  

 21 try:                                                                    

❱ 22 │ nlp = spacy.load(“en_core_web_sm”)


 23 except OSError:                                                         

 24 │   subprocess.run(["python", "-m", "spacy", "download", "en_core_web_  

 25 │   nlp = spacy.load("en_core_web_sm")                                  


/home/adminuser/venv/lib/python3.12/site-packages/spacy/init.py:51 in


load



48 │   │   keyed by section values in dot notation.                         

49 │   RETURNS (Language): The loaded nlp object.                           

50 │   """                                                                  

❱ 51 │ return util.load_model(


52 │   │   name,                                                            

53 │   │   vocab=vocab,                                                     

54 │   │   disable=disable,                                                 


/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in


load_model



 469 │   │   return load_model_from_path(name, **kwargs)  # type: ignore[a  

 470 │   if name in OLD_MODEL_SHORTCUTS:                                    

 471 │   │   raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH  

❱ 472 │ raise IOError(Errors.E050.format(name=name))


 473                                                                        

 474                                                                        

 475 def load_model_from_package(                                           

────────────────────────────────────────────────────────────────────────────────


OSError: [E050] Can’t find model ‘en_core_web_sm’. It doesn’t seem to be a


Python package or a valid path to a data directory.



During handling of the above exception, another exception occurred:



────────────────────── Traceback (most recent call last) ───────────────────────


/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru


nner/exec_code.py:121 in exec_func_with_error_handling



/home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru


nner/script_runner.py:591 in code_to_exec



/mount/src/nlp_app/NLP.py:25 in



 22 │   nlp = spacy.load("en_core_web_sm")                                  

 23 except OSError:                                                         

 24 │   subprocess.run(["python", "-m", "spacy", "download", "en_core_web_  

❱ 25 │ nlp = spacy.load(“en_core_web_sm”)


 26                                                                         

 27                                                                         

 28 def tokenize_text(text):                                                


/home/adminuser/venv/lib/python3.12/site-packages/spacy/init.py:51 in


load



48 │   │   keyed by section values in dot notation.                         

49 │   RETURNS (Language): The loaded nlp object.                           

50 │   """                                                                  

❱ 51 │ return util.load_model(


52 │   │   name,                                                            

53 │   │   vocab=vocab,                                                     

54 │   │   disable=disable,                                                 


/home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in


load_model



 469 │   │   return load_model_from_path(name, **kwargs)  # type: ignore[a  

 470 │   if name in OLD_MODEL_SHORTCUTS:                                    

 471 │   │   raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH  

❱ 472 │ raise IOError(Errors.E050.format(name=name))


 473                                                                        

 474                                                                        

 475 def load_model_from_package(                                           

────────────────────────────────────────────────────────────────────────────────


OSError: [E050] Can’t find model ‘en_core_web_sm’. It doesn’t seem to be a


Python package or a valid path to a data directory.


2025-02-20 20:14:06.373 503 GET /script-health-check (127.0.0.1) 438.65ms

[nltk_data] Downloading package punkt to /home/appuser/nltk_data...

[nltk_data]   Package punkt is already up-to-date!

[nltk_data] Downloading package stopwords to

[nltk_data]     /home/appuser/nltk_data...

[nltk_data]   Package stopwords is already up-to-date!

/usr/local/bin/python: No module named spacy

────────────────────── Traceback (most recent call last) ───────────────────────

  /mount/src/nlp_app/NLP.py:22 in <module>                                      

                                                                                

     19                                                                         

     20 # Ensure the spaCy model is downloaded                                  

     21 try:                                                                    

  ❱  22 │   nlp = spacy.load("en_core_web_sm")                                  

     23 except OSError:                                                         

     24 │   subprocess.run(["python", "-m", "spacy", "download", "en_core_web_  

     25 │   nlp = spacy.load("en_core_web_sm")                                  

                                                                                

  /home/adminuser/venv/lib/python3.12/site-packages/spacy/__init__.py:51 in     

  load                                                                          

                                                                                

    48 │   │   keyed by section values in dot notation.                         

    49 │   RETURNS (Language): The loaded nlp object.                           

    50 │   """                                                                  

  ❱ 51 │   return util.load_model(                                              

    52 │   │   name,                                                            

    53 │   │   vocab=vocab,                                                     

    54 │   │   disable=disable,                                                 

                                                                                

  /home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in        

  load_model                                                                    

                                                                                

     469 │   │   return load_model_from_path(name, **kwargs)  # type: ignore[a  

     470 │   if name in OLD_MODEL_SHORTCUTS:                                    

     471 │   │   raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH  

  ❱  472 │   raise IOError(Errors.E050.format(name=name))                       

     473                                                                        

     474                                                                        

     475 def load_model_from_package(                                           

────────────────────────────────────────────────────────────────────────────────

OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a 

Python package or a valid path to a data directory.


During handling of the above exception, another exception occurred:


────────────────────── Traceback (most recent call last) ───────────────────────

  /home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru  

  nner/exec_code.py:121 in exec_func_with_error_handling                        

                                                                                

  /home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru  

  nner/script_runner.py:591 in code_to_exec                                     

                                                                                

  /mount/src/nlp_app/NLP.py:25 in <module>                                      

                                                                                

     22 │   nlp = spacy.load("en_core_web_sm")                                  

     23 except OSError:                                                         

     24 │   subprocess.run(["python", "-m", "spacy", "download", "en_core_web_  

  ❱  25 │   nlp = spacy.load("en_core_web_sm")                                  

     26                                                                         

     27                                                                         

     28 def tokenize_text(text):                                                

                                                                                

  /home/adminuser/venv/lib/python3.12/site-packages/spacy/__init__.py:51 in     

  load                                                                          

                                                                                

    48 │   │   keyed by section values in dot notation.                         

    49 │   RETURNS (Language): The loaded nlp object.                           

    50 │   """                                                                  

  ❱ 51 │   return util.load_model(                                              

    52 │   │   name,                                                            

    53 │   │   vocab=vocab,                                                     

    54 │   │   disable=disable,                                                 

                                                                                

  /home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in        

  load_model                                                                    

                                                                                

     469 │   │   return load_model_from_path(name, **kwargs)  # type: ignore[a  

     470 │   if name in OLD_MODEL_SHORTCUTS:                                    

     471 │   │   raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH  

  ❱  472 │   raise IOError(Errors.E050.format(name=name))                       

     473                                                                        

     474                                                                        

     475 def load_model_from_package(                                           

────────────────────────────────────────────────────────────────────────────────

OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a 

Python package or a valid path to a data directory.

2025-02-20 20:14:01.361 503 GET /script-health-check (127.0.0.1) 437.31ms

[nltk_data] Downloading package punkt to /home/appuser/nltk_data...

[nltk_data]   Package punkt is already up-to-date!

[nltk_data] Downloading package stopwords to

[nltk_data]     /home/appuser/nltk_data...

[nltk_data]   Package stopwords is already up-to-date!

/usr/local/bin/python: No module named spacy

────────────────────── Traceback (most recent call last) ───────────────────────

  /mount/src/nlp_app/NLP.py:22 in <module>                                      

                                                                                

     19                                                                         

     20 # Ensure the spaCy model is downloaded                                  

     21 try:                                                                    

  ❱  22 │   nlp = spacy.load("en_core_web_sm")                                  

     23 except OSError:                                                         

     24 │   subprocess.run(["python", "-m", "spacy", "download", "en_core_web_  

     25 │   nlp = spacy.load("en_core_web_sm")                                  

                                                                                

  /home/adminuser/venv/lib/python3.12/site-packages/spacy/__init__.py:51 in     

  load                                                                          

                                                                                

    48 │   │   keyed by section values in dot notation.                         

    49 │   RETURNS (Language): The loaded nlp object.                           

    50 │   """                                                                  

  ❱ 51 │   return util.load_model(                                              

    52 │   │   name,                                                            

    53 │   │   vocab=vocab,                                                     

    54 │   │   disable=disable,                                                 

                                                                                

  /home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in        

  load_model                                                                    

                                                                                

     469 │   │   return load_model_from_path(name, **kwargs)  # type: ignore[a  

     470 │   if name in OLD_MODEL_SHORTCUTS:                                    

     471 │   │   raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH  

  ❱  472 │   raise IOError(Errors.E050.format(name=name))                       

     473                                                                        

     474                                                                        

     475 def load_model_from_package(                                           

────────────────────────────────────────────────────────────────────────────────

OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a 

Python package or a valid path to a data directory.


During handling of the above exception, another exception occurred:


────────────────────── Traceback (most recent call last) ───────────────────────

  /home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru  

  nner/exec_code.py:121 in exec_func_with_error_handling                        

                                                                                

  /home/adminuser/venv/lib/python3.12/site-packages/streamlit/runtime/scriptru  

  nner/script_runner.py:591 in code_to_exec                                     

                                                                                

  /mount/src/nlp_app/NLP.py:25 in <module>                                      

                                                                                

     22 │   nlp = spacy.load("en_core_web_sm")                                  

     23 except OSError:                                                         

     24 │   subprocess.run(["python", "-m", "spacy", "download", "en_core_web_  

  ❱  25 │   nlp = spacy.load("en_core_web_sm")                                  

     26                                                                         

     27                                                                         

     28 def tokenize_text(text):                                                

                                                                                

  /home/adminuser/venv/lib/python3.12/site-packages/spacy/__init__.py:51 in     

  load                                                                          

                                                                                

    48 │   │   keyed by section values in dot notation.                         

    49 │   RETURNS (Language): The loaded nlp object.                           

    50 │   """                                                                  

  ❱ 51 │   return util.load_model(                                              

    52 │   │   name,                                                            

    53 │   │   vocab=vocab,                                                     

    54 │   │   disable=disable,                                                 

                                                                                

  /home/adminuser/venv/lib/python3.12/site-packages/spacy/util.py:472 in        

  load_model                                                                    

                                                                                

     469 │   │   return load_model_from_path(name, **kwargs)  # type: ignore[a  

     470 │   if name in OLD_MODEL_SHORTCUTS:                                    

     471 │   │   raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SH  

  ❱  472 │   raise IOError(Errors.E050.format(name=name))                       

     473                                                                        

     474                                                                        

     475 def load_model_from_package(                                           

────────────────────────────────────────────────────────────────────────────────

OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a 

Python package or a valid path to a data directory.

2025-02-20 20:14:06.373 503 GET /script-health-check (127.0.0.1) 438.65ms

4. streamlit version: import streamlit as st  # Create the web-based interactive UI
import nltk  # Provides tokenization of words and sentences
# Downloading required nltk files
nltk.download('punkt')
nltk.download('stopwords')
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
import spacy  # Handles part-of-speech tagging
from textblob import TextBlob  # Perform sentiment analysis
import pdfplumber  # Extract text from PDF documents
from gtts import gTTS  # Convert text into audio/speech
import os



#nlp = spacy.load('en_core_web_sm')  # python -m spacy download en_core_web_sm
import subprocess
import spacy

# Ensure the spaCy model is downloaded
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
    nlp = spacy.load("en_core_web_sm")


def tokenize_text(text):
    words = word_tokenize(text)
    sentences = sent_tokenize(text)
    return words, sentences

def remove_stopwords(text):
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text)
    filtered_text = [word for word in words if word.lower() not in stop_words]
    return " ".join(filtered_text)

def pos_tagging(text):
    doc = nlp(text)
    return [(token.text, token.pos_) for token in doc]

def name_entity_recognition(text):
    doc = nlp(text)
    return [(ent.text, ent.label_) for ent in doc.ents]

def sentiment_analysis(text):
    analysis = TextBlob(text)  # Perform sentiment analysis
    return analysis.sentiment.polarity  # Polarity ranges from (-1 to 1)

def pdf_to_audio(pdf_file):
    pdf_text = ""  # Initialize empty string for text storage
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text = page.extract_text()
            if text:
                pdf_text += text + " "  # Append text correctly
    if not pdf_text.strip():
        return None  # Handle empty PDF content
    
    tts = gTTS(text=pdf_text, lang='fr')
    audio_path = "audiobook.mp3"
    tts.save(audio_path)
    return audio_path

# Streamlit App
st.set_page_config(page_title="NLP Mini Project", layout="wide")  # Set the page title and layout
st.title("NLP Mini Project")  # Set the heading
st.write("Explore various NLP tasks with this interactive app")

option = st.sidebar.selectbox("Select an NLP task:", ['Tokenization', 'Stop Word Removal', 'POS Tagging', 'Name Entity Recognition', 'Sentiment Analysis', 'PDF to Audio'])

if option == "PDF to Audio":
    st.header("PDF to Audio Conversion")
    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"], help="Only text-based PDFs are supported")
    if uploaded_file is not None:
        audio_file = pdf_to_audio(uploaded_file)
        if audio_file:
            st.audio(audio_file, format='audio/mp3')
            st.success("Audio Generated Successfully!")
        else:
            st.error("Could not extract text from the PDF. Ensure it is not a scanned document")
else:
    st.header(f'{option}')  # Display the selected NLP task
    text_input = st.text_area('Enter text here')  # User input
    
    if st.button('Run NLP task'):
        if not text_input.strip():
            st.warning('Please enter some text before running an NLP task')
        else:
            if option == 'Tokenization':
                words, sentences = tokenize_text(text_input)
                st.write('Words:', words)
                st.write('Sentences:', sentences)
            elif option == 'Stop Word Removal':
                st.write('Filtered Text:', remove_stopwords(text_input))
            elif option == 'POS Tagging':
                st.write('POS Tags:', pos_tagging(text_input))
            elif option == 'Name Entity Recognition':
                st.write('Named Entities:', name_entity_recognition(text_input))
            elif option == 'Sentiment Analysis':
                polarity = sentiment_analysis(text_input)
                st.write('Sentiment Score:', polarity)