My Streamlit app is deployed locally (i.e., testing phase). However, I have created an app that first and foremost, collect data from a web page using BeautifulSoup “web scraping method”. Then this data is displayed as a dataframe in a table format that allows the user to select a list of items in the displayed table as shown below :
import requests, smtplib
import pandas as pd, streamlit as st, datetime
from bs4 import BeautifulSoup as bs
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
def main():
st.set_page_config(
page_title="Web Scraping Data",
layout="wide",
initial_sidebar_state="auto")
st.header("Data")
cur_date = datetime.datetime.now()
formatted_date = cur_date.strftime('%d/%m/%Y')
st.write("Date", cur_date)
# get the content of URL using request module and store it in page variable
# create a soup object from HTML data
URL = "https://liphy.univ-grenoble-alpes.fr/fr/actualites"
page = requests.get(URL).content
soup = bs(page,'html.parser')
actualite = soup.find("ul", {"class":"liste__objets liste__actualites flex-oui flex-2-3"})
# extract all titles, contents, date and links
pub_date = []
titre = []
contente = []
lien = []
# link is not used here but can be extracted if needed later on for further analysis purposes
for lst in actualite.find_all("li", {"class":"liste__objets__style0004"}):
title = lst.find("em").get_text(strip=True) # extracting title
content = lst.find("div", {"class":"liste__objets__resume"}, recursive=True).get_text(strip=True)
# Iterate through each link and extract its href attribute and text
link = lst.find("a", href=True)["href"]
pub_date.append(date)
titre.append(title)
contente.append(content)
lien.append(link)
df = pd.DataFrame({'Date':pub_date, 'Titre':titre, 'Résumé':contente, 'Lien':lien})
df1 = df.replace(r'\r+|\n+|\t+','', regex=True)
selected_rows = selected_rows_from_dataframe(df1)
st.divider()
st.subheader("Transform")
container = st.container()
for index, row in selected_rows.iterrows():
with container:
# write the message header and semi-text
st.markdown(f'**<p style="font-size:20px;">{row["Titre"]}</p>**', unsafe_allow_html=True, help=None)
st.markdown(f'{row["Résumé"]}')
st.markdown(f'<a href="{row["Lien"]}" target="_blank" style="font-size:15px;">Lire la suite</a>', unsafe_allow_html=True)
if __name__ == "__main__":
main()
From the code above, the second section that begins with “for index” i would like to convert the st.markdown’s output as a string without losing the formatted text and send it as a mail.
Thank you for your input and in case there is a better alternative to achieve this, I would welcome all suggestions