NameError: name 'pydub' is not defined

Wassup Guys,

I try to create a document file uploader that also supports audio files (wav, mp3).
(Want to save uploaded file temporarily locally so I can use them in later in order to transcript them as text).
However, my code is below, everything needed is installed.
I get the follwing error from my application:

NameError: name ‘pydub’ is not defined

I checked my local system for installed packages (including virtualenv) and everything needed should be installed (using pip freeze)

a) local:

altair==4.1.0
appnope==0.1.2
argon2-cffi==21.3.0
argon2-cffi-bindings==21.2.0
astor==0.8.1
async-generator==1.10
attrs==21.4.0
backcall==0.2.0
backports.zoneinfo==0.2.1
base58==2.1.1
bleach==4.1.0
blinker==1.4
cachetools==4.2.4
certifi==2021.10.8
cffi==1.15.0
chardet==4.0.0
charset-normalizer==2.0.10
click==7.1.2
cryptography==36.0.1
dataclasses==0.8
decorator==5.1.1
defusedxml==0.7.1
distlib==0.3.4
docx2txt==0.8
entrypoints==0.3
filelock==3.4.1
gitdb==4.0.9
GitPython==3.1.18
google-api-core==2.3.2
google-auth==2.3.3
google-cloud-texttospeech==2.9.0
googleapis-common-protos==1.54.0
grpcio==1.43.0
grpcio-status==1.43.0
huggingface-hub==0.4.0
idna==3.3
importlib-metadata==4.8.3
importlib-resources==5.4.0
ipykernel==5.5.6
ipython==7.16.2
ipython-genutils==0.2.0
ipywidgets==7.6.5
jedi==0.17.2
Jinja2==3.0.3
joblib==1.1.0
jsonschema==3.2.0
jupyter-client==7.1.0
jupyter-core==4.9.1
jupyterlab-pygments==0.1.2
jupyterlab-widgets==1.0.2
MarkupSafe==2.0.1
mistune==0.8.4
nbclient==0.5.9
nbconvert==6.0.7
nbformat==5.1.3
nest-asyncio==1.5.4
notebook==6.4.6
numpy==1.19.5
packaging==21.3
pandas==1.1.5
pandocfilters==1.5.0
parso==0.7.1
pdfminer.six==20211012
pdfplumber==0.6.0
pexpect==4.8.0
pi==0.1.2
pickleshare==0.7.5
Pillow==8.4.0
platformdirs==2.4.0
prometheus-client==0.12.0
prompt-toolkit==3.0.24
proto-plus==1.19.8
protobuf==3.19.3
ptyprocess==0.7.0
pyarrow==6.0.1
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycparser==2.21
pydeck==0.6.2
pydub==0.25.1
Pygments==2.11.2
Pympler==1.0.1
pyparsing==3.0.6
PyPDF2==1.26.0
pyrsistent==0.18.0
python-dateutil==2.8.2
pytz==2021.3
pytz-deprecation-shim==0.1.0.post0
PyYAML==6.0
pyzmq==22.3.0
regex==2021.11.10
requests==2.27.1
rsa==4.8
sacremoses==0.0.47
Send2Trash==1.8.0
simpleaudio==1.0.4
six==1.16.0
smmap==5.0.0
SpeechRecognition==3.8.1
streamlit==1.3.1
terminado==0.12.1
testpath==0.5.0
tokenizers==0.10.3
toml==0.10.2
toolz==0.11.2
torch==1.10.1
tornado==6.1
tqdm==4.62.3
traitlets==4.3.3
transformers==4.15.0
typing_extensions==4.0.1
tzdata==2021.5
tzlocal==4.1
urllib3==1.26.8
validators==0.18.2
virtualenv==20.13.0
Wand==0.6.7
wcwidth==0.2.5
webencodings==0.5.1
widgetsnbextension==3.5.2
zipp==3.6.0

b) virtualenv env:

pydub==0.25.1

What did I miss? Below is my code:

import speech_recognition as sr
import streamlit as st
import pandas as pd
import numpy as np
from pydub import AudioSegment
from io import StringIO
import streamlit.components.v1 as stc

import docx2txt
from PIL import Image
from PyPDF2 import PdfFileReader
import pdfplumber

# example sound
filename = "sound.wav"

# initialize the recognizer
r = sr.Recognizer()

def handle_uploaded_audio_file(uploaded_file):
    a = pydub.AudioSegment.from_wav(uploaded_file)

    st.write(a.sample_width)

    samples = a.get_array_of_samples()
    fp_arr = np.array(samples).T.astype(np.float32)
    fp_arr /= np.iinfo(samples.typecode).max
    st.write(fp_arr.shape)

    return fp_arr, 22050

def read_pdf(file):
    pdfReader = PdfFileReader(file)
    count = pdfReader.numPages
    all_page_text = ""
    for i in range(count):
        page = pdfReader.getPage(i)
        all_page_text += page.extractText()

    return all_page_text


def read_pdf_with_pdfplumber(file):
    with pdfplumber.open(file) as pdf:
        page = pdf.pages[0]
        return page.extract_text()

# import fitz  # this is pymupdf

# def read_pdf_with_fitz(file):
# 	with fitz.open(file) as doc:
# 		text = ""
# 		for page in doc:
# 			text += page.getText()
# 		return text

# Fxn


@st.cache
def load_image(image_file):
    img = Image.open(image_file)
    return img


def main():
    st.title("File Upload Tutorial")

    menu = ["Home", "Dataset", "DocumentFiles", "Audio", "About"]
    choice = st.sidebar.selectbox("Menu", menu)

    if choice == "Home":
        st.subheader("Home")
        image_file = st.file_uploader(
            "Upload Image", type=['png', 'jpeg', 'jpg'])
        if image_file is not None:

            # To See Details
            # st.write(type(image_file))
            # st.write(dir(image_file))
            file_details = {"Filename": image_file.name,
                            "FileType": image_file.type, "FileSize": image_file.size}
            st.write(file_details)

            img = load_image(image_file)
            st.image(img, width=250, height=250)

    elif choice == "Dataset":
        st.subheader("Dataset")
        data_file = st.file_uploader("Upload CSV", type=['csv'])
        if st.button("Process"):
            if data_file is not None:
                file_details = {"Filename": data_file.name,
                                "FileType": data_file.type, "FileSize": data_file.size}
                st.write(file_details)

                df = pd.read_csv(data_file)
                st.dataframe(df)

    elif choice == "DocumentFiles":
        st.subheader("DocumentFiles")
        docx_file = st.file_uploader(
            "Upload File", type=['txt', 'docx', 'pdf'])
        if st.button("Process"):
            if docx_file is not None:
                file_details = {"Filename": docx_file.name,
                                "FileType": docx_file.type, "FileSize": docx_file.size}
                st.write(file_details)
                # Check File Type
                if docx_file.type == "text/plain":
                    # raw_text = docx_file.read() # read as bytes
                    # st.write(raw_text)
                    # st.text(raw_text) # fails
                    st.text(str(docx_file.read(), "utf-8"))  # empty
                    # works with st.text and st.write,used for futher processing
                    raw_text = str(docx_file.read(), "utf-8")
                    # st.text(raw_text) # Works
                    st.write(raw_text)  # works
                elif docx_file.type == "application/pdf":
                    # raw_text = read_pdf(docx_file)
                    # st.write(raw_text)
                    try:
                        with pdfplumber.open(docx_file) as pdf:
                            page = pdf.pages[0]
                            st.write(page.extract_text())
                    except:
                        st.write("None")

                elif docx_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                    # Use the right file processor ( Docx,Docx2Text,etc)
                    # Parse in the uploadFile Class directory
                    raw_text = docx2txt.process(docx_file)
                    st.write(raw_text)
    elif choice == "Audio":
        st.subheader("Audio")
        fileObject = st.file_uploader(
            "Please upload your file ", type=['wav', 'mp3'])

        if fileObject is not None:
            id = fileObject.id
            name = fileObject.name
            type = fileObject.type
            size = fileObject.size
        
        handle_uploaded_audio_file(fileObject)
        if (type == 'mp3'):
            print('mp3')
    
        st.write(id, name, type, size)
        """ sound = AudioSegment.from_mp3("/path/to/file.mp3")
        sound.export("/output/path/file.wav", format="wav") """
    else:
        st.subheader("About")
        st.info("Built with Streamlit")
        st.info("Jesus Saves @JCharisTech")
        st.text("Jesse E.Agbe(JCharis)")


if __name__ == '__main__':
    main()


It looks like you’re using VSCode…in the bottom left of the screen, it will tell you which environment you are pointing to to execute the code. It doesn’t necessarily have to be the same as the Terminal where you are running Streamlit:

image

In this picture, VSCode will use environment st39 to look for packages/code linting. Running streamlit run file.py will execute in the base environment.

Best,
Randy

1 Like

Thanks, I switched to my installed pyenv version 3.6.5. and the error disappeared!

2 Likes