Skip to content

Commit

Permalink
Merge pull request #29 from TidierOrg/write_gsheet
Browse files Browse the repository at this point in the history
`write_gsheet`
  • Loading branch information
drizk1 authored Feb 11, 2025
2 parents 525a550 + 30b6d04 commit 6c34012
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 5 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TidierFiles"
uuid = "8ae5e7a9-bdd3-4c93-9cc3-9df4d5d947db"
authors = ["Daniel Rizk <[email protected]> and contributors"]
version = "0.3.0"
version = "0.3.1"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Expand Down
2 changes: 1 addition & 1 deletion src/TidierFiles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ using Sockets
export read_csv, write_csv, read_tsv, write_tsv, read_table, write_table, read_delim, read_xlsx, write_xlsx,
read_fwf, write_fwf, fwf_empty, fwf_positions, fwf_positions, read_sav, read_sas, read_dta, write_sav, write_sas,
write_dta, read_arrow, write_arrow, read_parquet, write_parquet, read_csv2, read_file, write_file, read_rdata, list_files,
read_gsheet, connect_gsheet
read_gsheet, connect_gsheet, write_gsheet


include("docstrings.jl")
Expand Down
27 changes: 25 additions & 2 deletions src/docstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ Write a DataFrame to a CSV (comma-separated values) file.
- `missing_value`: = "": The string to represent missing values in the output file. Default is an empty string.
- `append`: Whether to append to the file if it already exists. Default is false.
- `col_names`: = true: Whether to write column names as the first line of the file. Default is true.
- `eol`: = "\n": The end-of-line character to use in the output file. Default is the newline character.
- `eol`: The end-of-line character to use in the output file. Default is the newline character.
- `num_threads` = Threads.nthreads(): The number of threads to use for writing the file. Default is the number of available threads.
# Examples
Expand All @@ -223,7 +223,7 @@ Write a DataFrame to a TSV (tab-separated values) file.
- `missing_value`: = "": The string to represent missing values in the output file. Default is an empty string.
- `append`: Whether to append to the file if it already exists. Default is false.
- `col_names`: = true: Whether to write column names as the first line of the file. Default is true.
- `eol`: = "\n": The end-of-line character to use in the output file. Default is the newline character.
- `eol`: The end-of-line character to use in the output file. Default is the newline character.
- `num_threads` = Threads.nthreads(): The number of threads to use for writing the file. Default is the number of available threads.
# Examples
Expand Down Expand Up @@ -416,6 +416,7 @@ julia> read_sas("test.xpt")
─────┼──────────────────
1 │ sav 10.1
2 │ por 10.2
```
"""

const docstring_read_sav =
Expand Down Expand Up @@ -766,4 +767,26 @@ julia> read_gsheet(public_sheet, sheet="Class Data", n_max=5)
4 │ Becky Female 2. Sophomore SD Art Baseball
5 │ Benjamin Male 4. Senior WI English Basketball
```
"""

const docstring_write_gsheet =
"""
write_gsheet(data::DataFrame, spreadsheet_id::String; sheet::String="Sheet1", range::String="", missing_value::String = "", append::Bool = true)
Writes the contents of a DataFrame to a specified Google Sheets spreadsheet.
# Arguments
- `data::DataFrame`: The DataFrame containing the data to be written to Google Sheets.
- `spreadsheet_id::String`: The ID of the Google Sheets spreadsheet or the full URL containing the ID.
- `sheet::String`: The name of the sheet within the spreadsheet where the data will be written. Defaults to "Sheet1".
- `range::String`: The range in the sheet where the data will be written. If empty, defaults to "A1".
- `missing_value::String`: The value to replace missing entries in the DataFrame. Defaults to an empty string.
- `append::Bool`: If true, appends the data to the existing data in the sheet. If false, overwrites the existing data. Defaults to true.
# Examples
```
julia> df = DataFrame(A=1:5, B=["a", missing, "c", "d", "e"], C=[1.1, 2.2, 3.3, 4.4, 5.5]);
julia> write_gsheet(df, full, sheet = "sheet2", append = false)
```
"""
73 changes: 72 additions & 1 deletion src/gsheets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,75 @@ function read_gsheet(spreadsheet_id::String;

return df
end


"""
$docstring_write_gsheet
"""
function write_gsheet(data::DataFrame, spreadsheet_id::String; sheet::String="Sheet1", range::String="", missing_value::String = "", append::Bool = false)
# URL-escape spreadsheet_id if necessary by extracting it from a full URL.
if occursin("spreadsheets/d/", spreadsheet_id)
m = match(r"spreadsheets/d/([^/]+)", spreadsheet_id)
if m !== nothing
spreadsheet_id = m.captures[1]
end
end

# Use a default range if none is provided.
if isempty(range)
range = "A1"
end

# If appending, use only the sheet name; if not, use "sheet!range".
loc = append ? sheet : sheet * "!" * range
loc = HTTP.escapeuri(loc)

headers = ["Authorization" => "Bearer $(GSHEET_AUTH[].access_token)", "Content-Type" => "application/json"]

# Convert the DataFrame to a JSON object replacing missing values.
col_names = [string(c) for c in names(data)]
rows_data = [map(x -> ismissing(x) ? missing_value : x, collect(row)) for row in eachrow(data)]
# If appending, do not include the header; otherwise, prepend the header.
rows = append ? rows_data : vcat([col_names], rows_data)
body = Dict("values" => rows)

if append
# For appending data, use the append endpoint with POST.
url = "https://sheets.googleapis.com/v4/spreadsheets/$spreadsheet_id/values/$loc:append?valueInputOption=USER_ENTERED&insertDataOption=INSERT_ROWS"
response = HTTP.post(url, headers, JSON3.write(body))
else
# For updating (overwriting) data, use the update endpoint with PUT.
url = "https://sheets.googleapis.com/v4/spreadsheets/$spreadsheet_id/values/$loc?valueInputOption=USER_ENTERED"
response = HTTP.put(url, headers, JSON3.write(body))
end

if response.status != 200
error("Failed to write to Google Sheets: $(String(response.body))")
end

# If not appending, clear out any cells below the new data.
if !append
# Determine how many rows were written (including header).
new_N = length(rows)
# Helper function: convert a 1-indexed column number to its corresponding letter.
function col_letter(n::Int)
s = ""
while n > 0
rem = (n - 1) % 26
s = Char(rem + 'A') * s
n = (n - 1) ÷ 26
end
return s
end
last_col = col_letter(length(col_names))
# Build a clear range from the row after new data to a high row (here, row 1000).
clear_range = "$(sheet)!A$(new_N+1):$(last_col)1000" # note the parentheses around sheet
clear_range = HTTP.escapeuri(clear_range)
clear_url = "https://sheets.googleapis.com/v4/spreadsheets/$spreadsheet_id/values/$clear_range:clear"
clear_response = HTTP.post(clear_url, headers, "{}")
if clear_response.status != 200
error("Failed to clear remaining cells: $(String(clear_response.body))")
end
end

return response
end

0 comments on commit 6c34012

Please sign in to comment.