-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #28 from TidierOrg/gsheet-prlim
Read Google Sheets
- Loading branch information
Showing
8 changed files
with
300 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,21 @@ | ||
name = "TidierFiles" | ||
uuid = "8ae5e7a9-bdd3-4c93-9cc3-9df4d5d947db" | ||
authors = ["Daniel Rizk <[email protected]> and contributors"] | ||
version = "0.2.2" | ||
version = "0.3.0" | ||
|
||
[deps] | ||
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" | ||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" | ||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" | ||
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" | ||
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" | ||
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" | ||
Parquet2 = "98572fba-bba0-415d-956f-fa77e587d26d" | ||
RData = "df47a6cb-8c03-5eed-afd8-b6050d6c41da" | ||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" | ||
ReadStatTables = "52522f7a-9570-4e34-8ac6-c005c74d4b84" | ||
Reexport = "189a3867-3050-52da-a836-e630ba90ab69" | ||
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc" | ||
XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" | ||
|
||
[compat] | ||
|
@@ -22,11 +25,14 @@ DataFrames = "1.5" | |
Dates = "1.9" | ||
Documenter = "0.27, 1" | ||
HTTP = "1.10" | ||
JSON3 = "1.1" | ||
Parquet2 = "0.2" | ||
RData = "0.8, 1.0" | ||
ReadStatTables = "0.3" | ||
Reexport = "0.2, 1" | ||
Sockets = "1.9 - 1.11" | ||
Random = "0.5 - 10" | ||
XLSX = "0.10" | ||
RData = "0.8, 1.0" | ||
julia = "1.10" | ||
|
||
[extras] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# The `read_gsheet` function allows enables TidierFiles to read Google Sheets in as DataFrames. | ||
# `read_gsheet` supports various arguements in line with other TidierFiles readers. | ||
|
||
# ## Secrets | ||
# At this time, to use `read_gsheet` the user will need to have a client ID and and client secret. | ||
# To get these, go to the Google Cloud Console -> APIs and Services -> Credentials (in sidebar) -> Create Credentials (at the top) -> OAuth Client ID (choose for desktop) -> save that client id and client secret. | ||
# In the future, we hope to remove this need, so that authorization can happen more simply. | ||
|
||
# ## Authorization | ||
# The browser will ask you to allow access, after clicking allow, the browser will intruct you to close the browser window. The connection is stored globally for the functions to access. | ||
# ``` | ||
# client_id = "527478*******3dh26e.apps.googleusercontent.com" | ||
# client_secret = "GO******j9yG" | ||
# connect_gsheet(client_id, client_secret) | ||
# ``` | ||
# ## Reading | ||
# File paths can either be be full links, or spreadsheet ids | ||
# The sheet name defaults to `Sheet1` but can be changed as shown below. | ||
# ``` | ||
# spreadsheet_id = "1QbFtedQs56oIO1bjQKSwdpcWsMpo3r_04ZIrdg9_fFM" | ||
# read_file(spreadsheet_id) | ||
# 4×4 DataFrame | ||
# Row │ this is test sheet | ||
# │ Float64? String? String? String? | ||
# ─────┼────────────────────────────────────── | ||
# 1 │ 3.0 missing missing missing | ||
# 2 │ missing COLUMNS missing missing | ||
# 3 │ missing missing ARE missing | ||
# 4 │ missing missing missing STRINGS | ||
# ``` | ||
# | ||
# ## Read a particular sheet | ||
# ``` | ||
# public_sheet = "https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms/edit?gid=0#gid=0" | ||
# read_gsheet(public_sheet, sheet="Class Data", col_names=true, n_max=5) | ||
# 5×6 DataFrame | ||
# Row │ Student Name Gender Class Level Home State Major Extracurricular Activity | ||
# │ String String String String String String | ||
# ─────┼─────────────────────────────────────────────────────────────────────────────────── | ||
# 1 │ Alexandra Female 4. Senior CA English Drama Club | ||
# 2 │ Andrew Male 1. Freshman SD Math Lacrosse | ||
# 3 │ Anna Female 1. Freshman NC English Basketball | ||
# 4 │ Becky Female 2. Sophomore SD Art Baseball | ||
# 5 │ Benjamin Male 4. Senior WI English Basketball | ||
# ``` | ||
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
|
||
mutable struct GSheetAuth | ||
client_id::String | ||
client_secret::String | ||
redirect_uri::String | ||
access_token::String | ||
end | ||
global const GSHEET_AUTH = Ref{GSheetAuth}() | ||
# Helper function to open a URL in the default browser | ||
function open_browser(url::String) | ||
if Sys.iswindows() | ||
run(`start $url`) | ||
elseif Sys.isapple() | ||
run(`open $url`) | ||
elseif Sys.islinux() | ||
run(`xdg-open $url`) | ||
else | ||
# println("Please open the following URL in your browser:") | ||
# println(url) | ||
end | ||
end | ||
|
||
# Start a simple loopback server to capture the code | ||
function get_authorization_code(port::Int, auth_url::String) | ||
open_browser(auth_url) | ||
println("Waiting for redirect on http://localhost:$port ...") | ||
|
||
server = Sockets.listen(port) | ||
sock = Sockets.accept(server) | ||
request = String(readavailable(sock)) | ||
|
||
# Print the entire request for debugging | ||
println("Received request:") | ||
println(request) | ||
|
||
# Look for the 'code' parameter in the GET request | ||
m = match(r"code=([^&\s]+)", request) | ||
if m === nothing | ||
error("Authorization code not found in the request") | ||
end | ||
code = String(m.captures[1]) # Convert to String | ||
|
||
# Print the captured code for debugging | ||
# println("Captured code: $code") | ||
|
||
# Send a simple HTTP response | ||
response = """ | ||
HTTP/1.1 200 OK\r | ||
Content-Type: text/html\r | ||
\r | ||
<html><body><h1>Authorization complete</h1> | ||
You can now read Google Sheets from TidierFiles. Feel free to close this window.</body></html> | ||
""" | ||
write(sock, response) | ||
close(sock) | ||
close(server) | ||
return code | ||
end | ||
|
||
# Exchange the authorization code for an access token | ||
function get_access_token(code::String, client_id::String, client_secret::String, redirect_uri::String) | ||
token_url = "https://oauth2.googleapis.com/token" | ||
response = HTTP.post(token_url, ["Content-Type" => "application/x-www-form-urlencoded"], | ||
"code=$code&client_id=$client_id&client_secret=$client_secret&redirect_uri=$redirect_uri&grant_type=authorization_code") | ||
if response.status != 200 | ||
error("Failed to get access token: $(String(response.body))") | ||
end | ||
token_data = JSON3.read(String(response.body)) | ||
return token_data["access_token"] | ||
end | ||
|
||
# Function to connect to Google Sheets and get the access token | ||
""" | ||
$docstring_connect_gsheet | ||
""" | ||
function connect_gsheet(client_id::String, client_secret::String; redirect_uri::String = "http://localhost:8081") | ||
scope = "https://www.googleapis.com/auth/spreadsheets" | ||
state = "random_state_string" # ideally, generate a random string | ||
auth_url = "https://accounts.google.com/o/oauth2/v2/auth?" * | ||
"client_id=$client_id&" * | ||
"redirect_uri=$(HTTP.escapeuri(redirect_uri))&" * | ||
"response_type=code&" * | ||
"scope=$(HTTP.escapeuri(scope))&" * | ||
"state=$state" | ||
|
||
code = get_authorization_code(8081, auth_url) | ||
access_token = get_access_token(code, client_id, client_secret, redirect_uri) | ||
|
||
global GSHEET_AUTH[] = GSheetAuth(client_id, client_secret, redirect_uri, access_token) | ||
return GSHEET_AUTH[] | ||
end | ||
|
||
function parse_value(value::String, missing_value::String) | ||
if isempty(value) | ||
return missing | ||
elseif tryparse(Float64, value) !== nothing | ||
return parse(Float64, value) | ||
else | ||
return value | ||
end | ||
end | ||
|
||
""" | ||
$docstring_read_gsheet | ||
""" | ||
function read_gsheet(spreadsheet_id::String; | ||
sheet::String="Sheet1", | ||
range::String="", | ||
col_names::Bool=true, | ||
skip::Int=0, | ||
n_max::Int=10000, | ||
col_select=nothing, | ||
missing_value::String="") | ||
|
||
# If a full Google Sheets URL is provided, extract the spreadsheet id. | ||
if occursin("spreadsheets/d/", spreadsheet_id) | ||
m = match(r"spreadsheets/d/([^/]+)", spreadsheet_id) | ||
if m !== nothing | ||
spreadsheet_id = m.captures[1] | ||
end | ||
end | ||
|
||
isempty(range) ? range = "A1:Z1000" : range | ||
range = sheet * "!" * range | ||
|
||
url = "https://sheets.googleapis.com/v4/spreadsheets/$spreadsheet_id/values/$(HTTP.escapeuri(range))" | ||
headers = ["Authorization" => "Bearer $(GSHEET_AUTH[].access_token)"] | ||
response = HTTP.get(url, headers) | ||
data = JSON3.read(String(response.body)) | ||
|
||
values = data["values"] | ||
|
||
if isempty(values) | ||
return DataFrame() | ||
end | ||
|
||
if col_names | ||
header = values[1] | ||
rows = values[2:end] | ||
else | ||
header = ["Column$i" for i in 1:length(values[1])] | ||
rows = values | ||
end | ||
|
||
rows = rows[skip+1:min(skip+n_max, end)] | ||
|
||
max_length = maximum(length(row) for row in rows) | ||
|
||
padded_rows = [vcat(row, fill(missing_value, max_length - length(row))) for row in rows] | ||
|
||
parsed_rows = [[parse_value(cell, missing_value) for cell in row] for row in padded_rows] | ||
|
||
df = DataFrame([Symbol(header[i]) => [row[i] for row in parsed_rows] for i in 1:length(header)]) | ||
|
||
for i in (length(header)+1):max_length | ||
df[!, Symbol("x$(i-length(header))")] = [row[i] for row in parsed_rows] | ||
end | ||
|
||
if col_select !== nothing | ||
df = df[:, col_select] | ||
end | ||
|
||
if missing_value != "" | ||
for col in names(df) | ||
df[!, col] = coalesce.(df[!, col], missing_value) | ||
end | ||
end | ||
|
||
return df | ||
end | ||
|