Edited Dataframe - save data in Google BigQuery

Hi everyone! Firstly, I wanted to mention that I’m a beginner, but I hope my question won’t be too silly :slight_smile: I would like to create an application in which you can edit a table and then save those changes to Google BigQuery.

I managed to do it in a way that replaces the entire dataframe, but is there an option to replace only a specific row? What if I filter the dataframe?

I’ve checked the documentation for session state, but I don’t fully understand how it works because the generated JSON still shows me the row number.

I would be very grateful for your help. I’m also attaching my code:

import streamlit as st
from google.cloud import bigquery
from google.oauth2 import service_account
import datetime
import pandas as pd
import io

st.set_page_config(layout="wide")

hide_streamlit_style = """
    <style>
    #MainMenu {visibility: hidden;}
    footer {visibility: hidden;}
    </style>
    """
st.markdown(hide_streamlit_style, unsafe_allow_html=True)

# Ustaw ścieżkę do pliku JSON
credentials = service_account.Credentials.from_service_account_file(
    'bq3.json',
    scopes=["https://www.googleapis.com/auth/bigquery", "https://www.googleapis.com/auth/drive"]
)

# Tworzenie klienta BigQuery z uwierzytelnieniem
client = bigquery.Client(credentials=credentials)

st.title("Ewidencja pracowników")

# Lista miesięcy
months = ['Styczeń', 'Luty', 'Marzec', 'Kwiecień', 'Maj', 'Czerwiec',
          'Lipiec', 'Sierpień', 'Wrzesień', 'Październik', 'Listopad', 'Grudzień']
current_month = datetime.datetime.now().month
# Wybór miesiąca z listy rozwijanej
selected_month = st.sidebar.selectbox("Wybierz miesiąc:", months, index=current_month-1)

years = ['2023', '2024', '2025']
current_year = datetime.datetime.now().year
# Wybór roku z listy rozwijanej
selected_year = st.sidebar.selectbox("Wybierz rok:", years, index=years.index(str(current_year)))

query = f"""
SELECT
  Imie_nazwisko,
  Data_start,
  Data_koniec,
  TIMESTAMP_DIFF(Data_koniec,Data_start, MINUTE)/60 AS Czas_pracy,
  id_rekordu
FROM
  `beton-390614.Aktualizacje.Ewidencja`
  order by Data_start
"""

# Pobieranie danych z BigQuery
df = pd.read_gbq(query, project_id=credentials.project_id, credentials=credentials)

# Konwersja kolumn z datami do odpowiedniego formatu

query_xlsx = f"""
SELECT
  Imie_nazwisko,
  Data_start,
  Data_koniec,
  TIMESTAMP_DIFF(Data_koniec,Data_start, MINUTE)/60 AS Czas_pracy
FROM
  `beton-390614.Aktualizacje.Ewidencja`
WHERE EXTRACT(MONTH FROM Data_start) = {months.index(selected_month) + 1}
    AND EXTRACT(YEAR FROM Data_start) = {selected_year}
order by Data_start
"""


if df.empty:
    st.warning("Brak danych do wyświetlenia")
else:
    edited_df = st.data_editor(df.iloc[:, 0:5], key='dt',num_rows='dynamic',width=1000,column_config={
        "Imie_nazwisko":st.column_config.SelectboxColumn(
            "Imie_nazwisko",
            width="medium",
            options=list(df['Imie_nazwisko'].unique()),
            required=True
        )
    })

    
    dict2 = st.write( st.session_state['dt'] )
    dict3 = st.json(dict(enumerate(df["id_rekordu"])))


    st.write( dict2['edited_rows'] )




  # Saving changes to the "Zaliczki" table after clicking the "Save changes" button
    if st.button('Zapisz zmiany'):
          table_urlopy='beton-390614.Aktualizacje.Ewidencja'
          edited_df.to_gbq(table_urlopy, project_id=credentials.project_id, credentials=credentials, if_exists='replace', table_schema=[{'name': 'Imie_nazwisko', 'type': 'STRING'}, {'name': 'Data_start', 'type': 'TIMESTAMP'}, {'name': 'Data_koniec', 'type': 'TIMESTAMP'}])
          st.session_state.edited_df = edited_df.copy()
          st.success("Zmiany zostały zapisane pomyślnie.")

    with io.BytesIO() as output:
    # Zamiast df, użyj wyników zapytania query_xlsx
        query_xlsx_df = pd.read_gbq(query_xlsx, project_id=credentials.project_id, credentials=credentials)
        query_xlsx_df['Data_start'] = pd.to_datetime(query_xlsx_df['Data_start']).dt.strftime('%Y-%m-%d %H:%M')
        query_xlsx_df['Data_koniec'] = pd.to_datetime(query_xlsx_df['Data_koniec']).dt.strftime('%Y-%m-%d %H:%M')
        with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
            # Zapisanie DataFrame do pliku Excel
            query_xlsx_df.to_excel(writer, sheet_name='ewidencja', index=False)

            # Uzyskanie obiektu xlsxwriter.Workbook i arkusza
            workbook = writer.book
            worksheet = writer.sheets['ewidencja']

            # Ustawienie szerokości kolumny na 110 pikseli
            worksheet.set_column('A:Z', 35)

        output.seek(0)

        # Generowanie nazwy pliku
        filename = f"ewidencja_{selected_month}_{selected_year}.xlsx"

        # Pobieranie pliku Excel
        st.download_button(
            label="💾 Pobierz do Excel",
            data=output,
            file_name=filename,
            mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
        )


temat_input = st.text_input("Wprowadź temat")
notatka_input = st.text_area("Wprowadź notatkę")
zapisz_button = st.button("Zapisz notatkę")
table_name = 'beton-390614.Aktualizacje.Notatki'

if zapisz_button:
    data_czas = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    full_temat = temat_input+' '+str(data_czas)

    # Dodawanie rekordu do tabeli w BigQuery
    insert_query = f"""
        INSERT INTO `{table_name}` (Temat, Notatka, Data_czas)
        VALUES ('{full_temat}', '{notatka_input}', '{data_czas}')
    """
    client.query(insert_query)
    st.success("Notatka została zapisana")

notatki_query = f"""
    SELECT *
    FROM `{table_name}`
    WHERE EXTRACT(MONTH FROM Data_czas) = {months.index(selected_month) + 1}
    AND EXTRACT(YEAR FROM Data_czas) = {selected_year}
"""
notatki_df = pd.read_gbq(notatki_query, project_id=credentials.project_id, credentials=credentials)

st.text(" ")
st.title("Notatki")

if notatki_df.empty:
    st.warning("Brak notatek do wyświetlenia")
else:
    for i, row in notatki_df.iterrows():
        with st.expander(row['Temat'], expanded=False):
            st.write("Data i czas:", row['Data_czas'])
            st.write("Notatka:", row['Notatka'])



It looks like you have a Streamlit application that allows you to edit a table and then save those changes to Google BigQuery. Your code is quite extensive, but I’ll focus on the specific part where you want to save changes to BigQuery while preserving only the edited rows, and not replacing the entire dataframe.

In your code, you’re using st.data_editor to allow users to edit the dataframe, and then you’re attempting to save the changes to BigQuery. To save only the edited rows, you can follow these steps:

  1. After the user edits the dataframe using st.data_editor, you can access the edited dataframe using st.session_state. You’ve already done this with dict2 = st.write( st.session_state['dt'] ).
  2. You can then identify the rows that have been edited by comparing the edited dataframe with the original dataframe.
  3. Once you have identified the edited rows, you can update those rows in your BigQuery table.

Here’s an example of how you can modify your code to achieve this:

# ... (previous code)

# Create a copy of the original dataframe
original_df = df.copy()

# Display the editable dataframe
edited_df = st.data_editor(df.iloc[:, 0:5], key='dt', num_rows='dynamic', width=1000, column_config={
    "Imie_nazwisko": st.column_config.SelectboxColumn(
        "Imie_nazwisko",
        width="medium",
        options=list(df['Imie_nazwisko'].unique()),
        required=True
    )
})

# Check if the user has edited the dataframe
if 'dt' in st.session_state and st.session_state.dt is not None:
    edited_rows = st.session_state.dt

    # Identify which rows have been edited
    edited_row_indices = [i for i, row in enumerate(original_df.itertuples(), 1)
                          if row not in edited_rows.itertuples()]

    # Get the edited rows from the original dataframe
    edited_rows_df = original_df.iloc[edited_row_indices]

    # Saving changes to BigQuery for the edited rows
    if not edited_rows_df.empty:
        table_urlopy = 'beton-390614.Aktualizacje.Ewidencja'
        edited_rows_df.to_gbq(table_urlopy, project_id=credentials.project_id, credentials=credentials, if_exists='append', table_schema=[
            {'name': 'Imie_nazwisko', 'type': 'STRING'},
            {'name': 'Data_start', 'type': 'TIMESTAMP'},
            {'name': 'Data_koniec', 'type': 'TIMESTAMP'},
            {'name': 'Czas_pracy', 'type': 'FLOAT'},
            {'name': 'id_rekordu', 'type': 'INTEGER'}
        ])
        st.success("Zmiany zostały zapisane pomyślnie.")

# ... (rest of your code)

This code checks for changes in the st.session_state.dt dataframe, identifies the edited rows, and saves only those edited rows back to BigQuery without replacing the entire dataframe.

Please make sure you adjust the BigQuery schema and other details as needed for your specific use case.

Hi, Thank you for help but now i have that error:

if df.empty:
    st.warning("Brak danych do wyświetlenia")
else:
    original_df = df.copy()

    # Display the editable dataframe
    edited_df = st.data_editor(df.iloc[:, 0:5], key='dt', num_rows='dynamic', width=1000, column_config={
        "Imie_nazwisko": st.column_config.SelectboxColumn(
            "Imie_nazwisko",
            width="medium",
            options=list(df['Imie_nazwisko'].unique()),
            required=True
        )
    })

    # Check if the user has edited the dataframe
    if 'dt' in st.session_state and st.session_state.dt is not None:
        edited_rows = st.session_state.dt

        # Identify which rows have been edited
        edited_row_indices = [i for i, row in enumerate(original_df.itertuples(), 1)
                            if row not in edited_rows.itertuples()]

        # Get the edited rows from the original dataframe
        edited_rows_df = original_df.iloc[edited_row_indices]

        # Saving changes to BigQuery for the edited rows
        if not edited_rows_df.empty:
            table_urlopy = 'beton-390614.Aktualizacje.Ewidencja'
            edited_rows_df.to_gbq(table_urlopy, project_id=credentials.project_id, credentials=credentials, if_exists='append', table_schema=[
                {'name': 'Imie_nazwisko', 'type': 'STRING'},
                {'name': 'Data_start', 'type': 'TIMESTAMP'},
                {'name': 'Data_koniec', 'type': 'TIMESTAMP'},
                {'name': 'Czas_pracy', 'type': 'FLOAT'},
                {'name': 'id_rekordu', 'type': 'INTEGER'}
            ])
            st.success("Zmiany zostały zapisane pomyślnie.")

AttributeError: 'dict' object has no attribute 'itertuples'
Traceback:
File "/Users/dawidzywica/infolinia2/venv/lib/python3.11/site-packages/streamlit/runtime/scriptrunner/script_runner.py", line 552, in _run_script
    exec(code, module.__dict__)
File "/Users/dawidzywica/infolinia2/pages/6_🙋🏻‍♂️_Ewidencja.py", line 94, in <module>
    edited_row_indices = [i for i, row in enumerate(original_df.itertuples(), 1)
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dawidzywica/infolinia2/pages/6_🙋🏻‍♂️_Ewidencja.py", line 95, in <listcomp>
    if row not in edited_rows.itertuples()]
                  ^^^^^^^^^^^^^^^^^^^^^^

Can you help me solve it? I will be glad

if df.empty:
    st.warning("Brak danych do wyświetlenia")
else:
    original_df = df.copy()

    # Display the editable dataframe
    edited_df = st.data_editor(df.iloc[:, 0:5], key='dt', num_rows='dynamic', width=1000, column_config={
        "Imie_nazwisko": st.column_config.SelectboxColumn(
            "Imie_nazwisko",
            width="medium",
            options=list(df['Imie_nazwisko'].unique()),
            required=True
        )
    })

    # Check if the user has edited the dataframe
    if 'dt' in st.session_state and st.session_state.dt is not None:
        edited_rows = st.session_state.dt

        # Check if original_df and edited_rows are DataFrames
        if isinstance(original_df, pd.DataFrame) and isinstance(edited_rows, pd.DataFrame):
            # Identify which rows have been edited
            edited_row_indices = [i for i, row in enumerate(original_df.itertuples(), 1)
                                if row not in edited_rows.itertuples()]

            # Get the edited rows from the original dataframe
            edited_rows_df = original_df.iloc[edited_row_indices]

            # Saving changes to BigQuery for the edited rows
            if not edited_rows_df.empty:
                table_urlopy = 'beton-390614.Aktualizacje.Ewidencja'
                edited_rows_df.to_gbq(table_urlopy, project_id=credentials.project_id, credentials=credentials, if_exists='append', table_schema=[
                    {'name': 'Imie_nazwisko', 'type': 'STRING'},
                    {'name': 'Data_start', 'type': 'TIMESTAMP'},
                    {'name': 'Data_koniec', 'type': 'TIMESTAMP'},
                    {'name': 'Czas_pracy', 'type': 'FLOAT'},
                    {'name': 'id_rekordu', 'type': 'INTEGER'}
                ])
                st.success("Zmiany zostały zapisane pomyślnie.")
        else:
            st.warning("original_df and edited_rows are not valid DataFrames.")

Hello, Can you check this code. It first checks if original_df and edited_rows are DataFrames before attempting to use the itertuples method, avoiding the AttributeError you encountered. Please ensure that both original_df and edited_rows are valid DataFrames in your application logic. Thank you!

Hello,
I I’ve checked, and orginal_df is indeed a dataframe, but edited_rows is a dictionary. Is there any way we can improve this? I tried on my own, but I couldn’t make it work. I would greatly appreciate your assistance.