Merge pull request #29 from TidierOrg/write_gsheet

`write_gsheet`
TidierOrg · Feb 11, 2025 · 6c34012 · 6c34012
2 parents 525a550 + 30b6d04
commit 6c34012
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 5 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "TidierFiles"
 uuid = "8ae5e7a9-bdd3-4c93-9cc3-9df4d5d947db"
 authors = ["Daniel Rizk <[email protected]> and contributors"]
-version = "0.3.0"
+version = "0.3.1"
 
 [deps]
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"

diff --git a/src/TidierFiles.jl b/src/TidierFiles.jl
@@ -19,7 +19,7 @@ using Sockets
 export read_csv, write_csv, read_tsv, write_tsv, read_table, write_table, read_delim, read_xlsx, write_xlsx, 
  read_fwf, write_fwf, fwf_empty, fwf_positions, fwf_positions, read_sav, read_sas, read_dta, write_sav, write_sas, 
  write_dta, read_arrow, write_arrow, read_parquet, write_parquet, read_csv2, read_file, write_file, read_rdata, list_files,
- read_gsheet, connect_gsheet
+ read_gsheet, connect_gsheet, write_gsheet
 
 
 include("docstrings.jl")

diff --git a/src/docstrings.jl b/src/docstrings.jl
@@ -201,7 +201,7 @@ Write a DataFrame to a CSV (comma-separated values) file.
 - `missing_value`: = "": The string to represent missing values in the output file. Default is an empty string.
 - `append`: Whether to append to the file if it already exists. Default is false.
 - `col_names`: = true: Whether to write column names as the first line of the file. Default is true.
-- `eol`: = "\n": The end-of-line character to use in the output file. Default is the newline character.
+- `eol`: The end-of-line character to use in the output file. Default is the newline character.
 - `num_threads` = Threads.nthreads(): The number of threads to use for writing the file. Default is the number of available threads.
 
 # Examples
@@ -223,7 +223,7 @@ Write a DataFrame to a TSV (tab-separated values) file.
 - `missing_value`: = "": The string to represent missing values in the output file. Default is an empty string.
 - `append`: Whether to append to the file if it already exists. Default is false.
 - `col_names`: = true: Whether to write column names as the first line of the file. Default is true.
-- `eol`: = "\n": The end-of-line character to use in the output file. Default is the newline character.
+- `eol`: The end-of-line character to use in the output file. Default is the newline character.
 - `num_threads` = Threads.nthreads(): The number of threads to use for writing the file. Default is the number of available threads.
 
 # Examples
@@ -416,6 +416,7 @@ julia> read_sas("test.xpt")
 ─────┼──────────────────
    1 │ sav         10.1
    2 │ por         10.2
+```
 """
 
 const docstring_read_sav =
@@ -766,4 +767,26 @@ julia> read_gsheet(public_sheet, sheet="Class Data", n_max=5)
    4 │ Becky         Female  2. Sophomore  SD          Art      Baseball
    5 │ Benjamin      Male    4. Senior     WI          English  Basketball
 ```
+"""
+
+const docstring_write_gsheet = 
+"""
+    write_gsheet(data::DataFrame, spreadsheet_id::String; sheet::String="Sheet1", range::String="", missing_value::String = "", append::Bool = true)
+
+Writes the contents of a DataFrame to a specified Google Sheets spreadsheet.
+
+# Arguments
+- `data::DataFrame`: The DataFrame containing the data to be written to Google Sheets.
+- `spreadsheet_id::String`: The ID of the Google Sheets spreadsheet or the full URL containing the ID.
+- `sheet::String`: The name of the sheet within the spreadsheet where the data will be written. Defaults to "Sheet1".
+- `range::String`: The range in the sheet where the data will be written. If empty, defaults to "A1".
+- `missing_value::String`: The value to replace missing entries in the DataFrame. Defaults to an empty string.
+- `append::Bool`: If true, appends the data to the existing data in the sheet. If false, overwrites the existing data. Defaults to true.
+
+# Examples
+```
+julia> df = DataFrame(A=1:5, B=["a", missing, "c", "d", "e"], C=[1.1, 2.2, 3.3, 4.4, 5.5]);
+
+julia> write_gsheet(df, full, sheet = "sheet2", append = false)
+```
 """
diff --git a/src/gsheets.jl b/src/gsheets.jl
@@ -168,4 +168,75 @@ function read_gsheet(spreadsheet_id::String;
 
     return df
   end
-
+
+"""
+$docstring_write_gsheet
+"""
+function write_gsheet(data::DataFrame, spreadsheet_id::String; sheet::String="Sheet1", range::String="", missing_value::String = "", append::Bool = false)
+    # URL-escape spreadsheet_id if necessary by extracting it from a full URL.
+    if occursin("spreadsheets/d/", spreadsheet_id)
+        m = match(r"spreadsheets/d/([^/]+)", spreadsheet_id)
+        if m !== nothing
+            spreadsheet_id = m.captures[1]
+        end
+    end
+
+    # Use a default range if none is provided.
+    if isempty(range)
+        range = "A1"
+    end
+
+    # If appending, use only the sheet name; if not, use "sheet!range".
+    loc = append ? sheet : sheet * "!" * range
+    loc = HTTP.escapeuri(loc)
+
+    headers = ["Authorization" => "Bearer  $(GSHEET_AUTH[].access_token)", "Content-Type" => "application/json"]
+
+    # Convert the DataFrame to a JSON object replacing missing values.
+    col_names = [string(c) for c in names(data)]
+    rows_data = [map(x -> ismissing(x) ? missing_value : x, collect(row)) for row in eachrow(data)]
+    # If appending, do not include the header; otherwise, prepend the header.
+    rows = append ? rows_data : vcat([col_names], rows_data)
+    body = Dict("values" => rows)
+
+    if append
+        # For appending data, use the append endpoint with POST.
+        url = "https://sheets.googleapis.com/v4/spreadsheets/$spreadsheet_id/values/$loc:append?valueInputOption=USER_ENTERED&insertDataOption=INSERT_ROWS"
+        response = HTTP.post(url, headers, JSON3.write(body))
+    else
+        # For updating (overwriting) data, use the update endpoint with PUT.
+        url = "https://sheets.googleapis.com/v4/spreadsheets/$spreadsheet_id/values/$loc?valueInputOption=USER_ENTERED"
+        response = HTTP.put(url, headers, JSON3.write(body))
+    end
+
+    if response.status != 200
+        error("Failed to write to Google Sheets: $(String(response.body))")
+    end
+
+    # If not appending, clear out any cells below the new data.
+    if !append
+        # Determine how many rows were written (including header).
+        new_N = length(rows)
+        # Helper function: convert a 1-indexed column number to its corresponding letter.
+        function col_letter(n::Int)
+            s = ""
+            while n > 0
+                rem = (n - 1) % 26
+                s = Char(rem + 'A') * s
+                n = (n - 1) ÷ 26
+            end
+            return s
+        end
+        last_col = col_letter(length(col_names))
+        # Build a clear range from the row after new data to a high row (here, row 1000).
+        clear_range = "$(sheet)!A$(new_N+1):$(last_col)1000"  # note the parentheses around sheet
+        clear_range = HTTP.escapeuri(clear_range)
+        clear_url = "https://sheets.googleapis.com/v4/spreadsheets/$spreadsheet_id/values/$clear_range:clear"
+        clear_response = HTTP.post(clear_url, headers, "{}")
+        if clear_response.status != 200
+            error("Failed to clear remaining cells: $(String(clear_response.body))")
+        end
+    end
+
+    return response
+end