Google Drive CSV file link to Pandas DataFrame

How to read in a csv file from Google Drive to a Pandas DataFrame:

url = ' google drive sharing link'
path = 'https://drive.google.com/uc?export=download&id='+url.split('/')[-2]
df = pd.read_csv(path)

Show DataFrame output on Streamlit app:

 st.write(df)

If you run into any issues, try making the CSV file link public on Google Drive. This could be a potential workaround for uploading large files.

6 Likes

@Cervone23

Works great, any chance there is a way to upload files in a similar way (without google API), overwriting the file? I would like to read the file, append it and the save it back to google drive and still use the same URL.

Thanks in advance!

since the suggested method seems to be unreliable I ended up using a google service account like so

from pandas import read_csv
from pandas import DataFrame
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from io import BytesIO

def google_drive(id:str) -> DataFrame:
    SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
    creds = Credentials.from_service_account_file(
        "service-account.json",
        scopes=SCOPES,
    )
    service = build(
        "drive",
        "v3",
        credentials=creds,
        cache_discovery=False,
    )
    request = service.files().get_media(fileId=id)
    file = BytesIO()
    downloader = MediaIoBaseDownload(file,request)
    done = False
    while done is False: _,done = downloader.next_chunk()
    file.seek(0)
    return read_csv(file)
2 Likes

Great code, thanks for sharing!

@metasemantic skyAR app (mentioned in how-to-download-large-model-files-to-the-sharing-app) implements a download_file_from_google_drive function that may also be of interest as it doesn’t require googleapiclient or google:

import requests


def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination) 
2 Likes