Skip to content

Commit

Permalink
generating data.json. next see how to visualize
Browse files Browse the repository at this point in the history
  • Loading branch information
galabovaa committed Jan 29, 2025
1 parent af2aa02 commit 6781b80
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 106 deletions.
1 change: 1 addition & 0 deletions repositories/contributor_prs_over_time.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"dates":["2018-03","2018-04","2018-05","2018-09","2018-10","2018-11","2018-12","2019-01","2019-02","2019-03","2019-04","2019-05","2019-06","2019-07","2019-08","2019-09","2019-10","2019-11","2019-12","2020-01","2020-02","2020-03","2020-04","2020-05","2020-06","2020-07","2020-08","2020-09","2020-10","2020-12","2021-01","2021-02","2021-03","2021-04","2021-05","2021-06","2021-07","2021-08","2021-09","2021-10","2021-11","2021-12","2022-01","2022-02","2022-03","2022-04","2022-05","2022-06","2022-07","2022-08","2022-09","2022-10","2022-11","2022-12","2023-01","2023-02","2023-03","2023-04","2023-05","2023-06","2023-07","2023-08","2023-09","2023-10","2023-11","2023-12","2024-01","2024-02","2024-03","2024-04","2024-05","2024-06","2024-07","2024-08","2024-09","2024-10","2024-11","2024-12","2025-01"],"counts":[[4,4],[9,0],[3,0],[4,0],[8,1],[10,0],[17,2],[6,0],[32,8],[12,0],[6,0],[11,0],[17,0],[8,0],[4,0],[6,1],[13,1],[5,0],[7,0],[15,4],[11,0],[14,2],[35,4],[24,1],[9,0],[6,0],[4,0],[5,0],[10,1],[13,5],[2,0],[14,7],[12,6],[1,0],[6,0],[7,0],[21,9],[10,1],[8,1],[16,2],[10,0],[20,0],[35,2],[43,2],[11,1],[29,1],[26,6],[8,2],[13,2],[7,2],[3,0],[16,3],[16,1],[18,0],[28,3],[42,3],[26,0],[21,3],[18,2],[12,6],[15,0],[10,3],[25,2],[21,2],[28,0],[7,1],[26,3],[21,0],[30,8],[9,0],[28,3],[21,3],[20,3],[17,4],[26,3],[32,0],[22,1],[16,4],[31,2]]}
1 change: 1 addition & 0 deletions repositories/data.json

Large diffs are not rendered by default.

108 changes: 2 additions & 106 deletions scripts/repositories.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ if isfile(joinpath(@__DIR__, "dev.env"))
println("Loaded GH secret IG")
end

const DATA_DIR = joinpath(dirname(@__DIR__), "docs", "repositories")
const DATA_DIR = joinpath(dirname(@__DIR__), "repositories")

function Repository(repo; since, until, my_auth)
println("Getting : ", repo)
Expand All @@ -45,50 +45,6 @@ function get_repos(since, until)
)
end

function download_stats(file)
url = "https://julialang-logs.s3.amazonaws.com/public_outputs/current/$(file).csv.gz"
output = joinpath(dirname(@__DIR__), "data", "$(file).csv.gz")
Downloads.download(url, output)
return output
end

function load_stats(file, uuids)
out = download_stats(file)
df = CSV.read(out, DataFrames.DataFrame)
uuid_to_name = DataFrames.DataFrame(
package_uuid = collect(keys(uuids)),
name = collect(values(uuids)),
)
df = DataFrames.leftjoin(df, uuid_to_name; on = :package_uuid)
filter!(df) do row
return !ismissing(row.client_type) &&
row.client_type == "user" &&
!ismissing(row.name) &&
occursin("ERGO-Code/", row.name) &&
row.status in (200, 301, 302)
end
return DataFrames.select(df, [:name, :date, :request_count])
end

function get_historical_downloads(
filename::String = joinpath(DATA_DIR, "download_stats.json"),
)
current = JSON.parsefile(filename; use_mmap = false)
name = String[]
date = Dates.Date[]
request_count_sum = Int[]
for (pkg, results) in current
append!(name, fill("ERGO-Code/$pkg", length(results["requests"])))
append!(date, Dates.Date.(results["dates"]))
append!(request_count_sum, results["requests"])
end
return DataFrames.DataFrame(
name = name,
date = date,
request_count_sum = request_count_sum,
)
end

function get_pkg_uuids()
pkg_uuids = Dict{String,String}()
r = first(Pkg.Registry.reachable_registries())
Expand All @@ -101,28 +57,6 @@ function get_pkg_uuids()
return pkg_uuids
end

function update_download_statistics()
pkg_uuids = get_pkg_uuids()
df = load_stats("package_requests_by_region_by_date", pkg_uuids)
new_df = sort!(combine(groupby(df, [:name, :date]), :request_count => sum))
new_df.name = String.(new_df.name)
current = get_historical_downloads()
append!(current, new_df)
unique!(current)
sort!(current, [:name, :date])
data = Dict{String,Dict{String,Any}}()
for g in groupby(current, :name)
key = replace(g[1, :name], "ERGO-Code/" => "")
data[key] = Dict{String,Any}(
"dates" => string.(collect(g.date)),
"requests" => collect(g.request_count_sum),
)
end
open(joinpath(DATA_DIR, "download_stats.json"), "w") do io
return write(io, JSON.json(data))
end
return
end

function update_package_statistics()
since = "2013-01-01T00:00:00"
Expand Down Expand Up @@ -246,50 +180,12 @@ function prs_by_user(user)
return prs_by_user
end

# function state_of_jump_statistics()
# old_date = Dates.today() - Dates.Year(1)
# # Downloads
# df = get_historical_downloads()
# n_downloads = sum(df[df.date.>=old_date, :].request_count_sum)
# # PRs and issues
# data = JSON.parsefile(joinpath(DATA_DIR, "data.json"))
# prs_opened, issues_opened, contributors = 0, 0, Set{String}()
# for (pkg, items) in data, item in items
# if Dates.DateTime(item["date"]) >= old_date && item["type"] == "opened"
# if item["is_pr"]
# push!(contributors, item["user"])
# prs_opened += 1
# else
# issues_opened += 1
# end
# end
# end
# open(joinpath(DATA_DIR, "summary.json"), "w") do io
# summary = Dict(
# "n_downloads" => n_downloads,
# "prs_opened" => prs_opened,
# "issues_opened" => issues_opened,
# "num_contributors" => length(contributors),
# )
# write(io, JSON.json(summary))
# return
# end
# println("""
# Downloads : >$n_downloads
# Pull requests opened : $prs_opened
# Issues opened : $issues_opened
# Unique contributors : $(length(contributors))
# """)
# return
# end

has_arg(arg) = any(isequal(arg), ARGS)

if has_arg("--update")
# update_download_statistics()
update_package_statistics()
# update_contributor_prs_over_time()
# # state_of_jump_statistics()
update_contributor_prs_over_time()
end


Expand Down

0 comments on commit 6781b80

Please sign in to comment.