I would like to use selenium and chromedriver in streamlit. But it has always failed in streamlit Cloud whereas it works on my PC. Can you help me with this issue?
Many thanks for your help.
Here is the piece of code which does not work
async def convert_to_pdf_and_extract_text(url: str) → Optional[Tuple[str, str]]:
pdf_path = “temp_file.pdf”
options = webdriver.ChromeOptions()
# Removed --headless to run in non-headless mode
options.add_argument(“–no-sandbox”)
options.add_argument(“–disable-dev-shm-usage”)
options.add_argument(“–disable-gpu”)
options.add_argument(“window-size=1920x1080”) # Set a common desktop viewport size
options.add_argument(“user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36”)
try:
with webdriver.Chrome(options=options) as driver:
driver.get(url)
driver.implicitly_wait(30)
result = driver.execute_cdp_cmd("Page.printToPDF", {"landscape": False})
with open(pdf_path, "wb") as f:
f.write(base64.b64decode(result['data']))
text = extract_text_from_pdf(pdf_path)
if word_count(text) < 200:
st.caption(f"Failure: not enough words in pdf extraction of {url} - nb of words:{word_count(text)} ")
print(f"Failure: not enough words in pdf extraction of {url} - nb of words:{word_count(text)} ")
return None
st.caption(f"Success: pdf extraction of {url}")
print(f"Success: pdf extraction of {url}")
return text, url
except Exception as e:
print(f"Error: {e}")
st.caption(f"Failure: pdf extraction of {url} because {e}")
print(f"Failure: pdf extraction of {url} because {e}")
return None
Here is the error message whereas the code works perfectly on my machine
Message: session not created: Chrome failed to start: exited normally. (session not created: DevToolsActivePort file doesn’t exist) (The process started from chrome location /usr/bin/chromium is no longer running, so ChromeDriver is assuming that Chrome has crashed.)
requirements.txt:
selenium
seleniumbase
webdriver_manager
packages.txt
chromium
chromium-driver