-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from PharmCat/dev_sm0.7
StatsModels 0.7
- Loading branch information
Showing
10 changed files
with
525 additions
and
469 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,35 @@ | ||
name = "MetidaBase" | ||
uuid = "075456b7-4006-432f-9324-2f8453996c49" | ||
authors = ["PharmCat <[email protected]> and contributors"] | ||
version = "0.10.2" | ||
version = "0.11.0" | ||
|
||
[deps] | ||
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" | ||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" | ||
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" | ||
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" | ||
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" | ||
Requires = "ae029012-a4dd-5104-9daa-d747884805df" | ||
SnoopPrecompile = "66db9d55-30c0-4569-8b51-7e840670fc0c" | ||
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" | ||
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" | ||
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" | ||
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" | ||
TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" | ||
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" | ||
|
||
|
||
[compat] | ||
CategoricalArrays = "0.8, 0.9, 0.10" | ||
DataFrames = "1" | ||
Distributions = "0.20, 0.21, 0.22, 0.23, 0.24, 0.25" | ||
CategoricalArrays = "0.9, 0.10" | ||
CPUSummary = "0.1" | ||
PrettyTables = "2" | ||
Requires = "1" | ||
SnoopPrecompile = "1" | ||
StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33" | ||
StatsModels = "0.6" | ||
StatsModels = "0.6, 0.7" | ||
Tables = "1" | ||
PrettyTables = "2" | ||
TypedTables = "1, 2" | ||
CPUSummary = "0.1" | ||
julia = "1" | ||
|
||
[extras] | ||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" | ||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" | ||
TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" | ||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" | ||
|
||
[targets] | ||
test = ["Test", "CSV"] | ||
test = ["DataFrames", "Test", "TypedTables", "CSV"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,27 @@ | ||
# Metida | ||
# Copyright © 2019-2020 Vladimir Arnautov aka PharmCat <[email protected]> | ||
|
||
__precompile__(true) | ||
module MetidaBase | ||
|
||
using Tables, PrettyTables, StatsBase, StatsModels, Distributions, CategoricalArrays#, Reexport | ||
using Tables, PrettyTables, StatsModels, CategoricalArrays, Requires#, Reexport | ||
|
||
#@reexport using StatsModels | ||
import DataFrames, TypedTables | ||
import StatsBase | ||
import StatsModels: StatisticalModel, RegressionModel | ||
import Tables: istable, columnaccess, columns, getcolumn, columnnames, schema, rowaccess, rows | ||
import CPUSummary: num_cores | ||
|
||
import Base: getindex, length, ht_keyindex, show, pushfirst!, iterate, size, findfirst | ||
|
||
include("abstracttype.jl") | ||
include("m_tables.jl") | ||
include("dataset.jl") | ||
include("types.jl") | ||
include("utils.jl") | ||
include("iterators.jl") | ||
include("precompile.jl") | ||
|
||
function __init__() | ||
@require DataFrames="a93c6f00-e57d-5684-b7b6-d8193f3e46c0" include("dataframes.jl") | ||
@require TypedTables="9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" include("typedtables.jl") | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# DataFrames.jl interface | ||
function DataFrames.DataFrame(obj::AbstractDataSet; kwargs...) | ||
DataFrames.DataFrame(metida_table_(obj; kwargs...)) | ||
end | ||
|
||
function DataFrames.DataFrame(obj::MetidaTable) | ||
DataFrames.DataFrame(obj.table) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,229 @@ | ||
################################################################################ | ||
# DATASET | ||
################################################################################ | ||
struct DataSet{T <: AbstractData} <: AbstractDataSet{AbstractData} | ||
ds::Vector{T} | ||
end | ||
|
||
function getdata(d::DataSet) | ||
d.ds | ||
end | ||
|
||
@inline function getindormiss(d::Dict{K}, i::K) where K | ||
ind::Int = ht_keyindex(d, i) | ||
if ind > 0 return d.vals[ind] end | ||
missing | ||
end | ||
|
||
Tables.istable(::AbstractDataSet) = false | ||
|
||
Tables.rowaccess(::AbstractDataSet) = false | ||
################################################################################ | ||
# BASE | ||
################################################################################ | ||
|
||
function Base.getindex(d::DataSet, ind::Int) | ||
d.ds[ind] | ||
end | ||
|
||
Base.getindex(d::DataSet, inds::UnitRange{Int64}) = subset(d, inds) | ||
|
||
|
||
@inline function getresultindex_safe(rd::T, ind::Symbol) where T <: AbstractResultData | ||
getindormiss(rd.result, ind) | ||
end | ||
@inline function getresultindex_unsafe(rd::T, ind::Symbol) where T <: AbstractResultData | ||
rd.result[ind] | ||
end | ||
|
||
function Base.getindex(d::DataSet{T}, col::Int, ind) where T <: AbstractResultData | ||
getresultindex_safe(d[col], ind) | ||
end | ||
function Base.getindex(d::DataSet{T}, col::Colon, ind) where T <: AbstractResultData | ||
@inbounds for i in Base.OneTo(length(d)) | ||
if Base.ht_keyindex(d.ds[i].result, ind) < 1 return getresultindex_safe.(d.ds, ind) end | ||
end | ||
getresultindex_unsafe.(d.ds, ind) | ||
end | ||
|
||
Base.first(d::DataSet) = first(getdata(d)) | ||
|
||
function Base.length(d::DataSet) | ||
length(getdata(d)) | ||
end | ||
|
||
function Base.iterate(d::DataSet) | ||
return Base.iterate(getdata(d)) | ||
end | ||
|
||
function Base.iterate(d::DataSet, i::Int) | ||
return Base.iterate(getdata(d), i) | ||
end | ||
|
||
function Base.map(f, d::DataSet) | ||
DataSet(map(f, getdata(d))) | ||
end | ||
|
||
################################################################################ | ||
# BASE | ||
################################################################################ | ||
# sort! | ||
################################################################################ | ||
function islessdict(a::Dict{A1,A2}, b::Dict{B1,B2}, k::Union{AbstractVector, Set}) where A1 where A2 where B1 where B2 | ||
l = length(k) | ||
av = Vector{Union{Missing, A2}}(undef, l) | ||
bv = Vector{Union{Missing, B2}}(undef, l) | ||
@inbounds for i = 1:l | ||
av[i] = getindormiss(a, k[i]) | ||
bv[i] = getindormiss(b, k[i]) | ||
end | ||
isless(av, bv) | ||
end | ||
function islessdict(a::Dict, b::Dict, k) | ||
isless(getindormiss(a, k), getindormiss(b, k)) | ||
end | ||
function Base.sort!(d::DataSet{T}, k; alg::Base.Algorithm = QuickSort, lt=nothing, by=nothing, rev::Bool=false, order::Base.Ordering = Base.Forward) where T <: Union{AbstractIdData, AbstractIDResult} | ||
if isnothing(by) by = x -> getid(x) end | ||
if isnothing(lt) lt = (x, y) -> islessdict(x, y, k) end | ||
sort!(d.ds; alg = alg, lt = lt, by = by, rev = rev, order = order) | ||
d | ||
end | ||
|
||
################################################################################ | ||
# filter | ||
# filter! | ||
################################################################################ | ||
function Base.filter(f::Function, d::DataSet) | ||
ds = getdata(d) | ||
inds = findall(f, ds) | ||
DataSet(ds[inds]) | ||
end | ||
function Base.filter!(f::Function, d::DataSet) | ||
filter!(f, getdata(d)) | ||
d | ||
end | ||
|
||
################################################################################ | ||
# Base.findfirst | ||
################################################################################ | ||
|
||
function Base.findfirst(d::DataSet{<: AbstractIdData}, sort::Dict) | ||
findfirst(x-> sort ⊆ getid(x), getdata(d)) | ||
end | ||
|
||
################################################################################ | ||
# SELF | ||
################################################################################ | ||
|
||
getid_safe(idd::AbstractIdData, ind) = getindormiss(idd.id, ind) | ||
|
||
getid_unsafe(idd::AbstractIdData, ind) = idd.id[ind] | ||
|
||
getid_safe(asr::AbstractIDResult, ind) = getindormiss(asr.data.id, ind) | ||
|
||
getid_unsafe(asr::AbstractIDResult, ind) = asr.data.id[ind] | ||
|
||
getid(idd::AbstractIdData, ind) = getid_safe(idd, ind) | ||
|
||
getid(asr::AbstractIDResult, ind) = getid_safe(asr, ind) | ||
|
||
getid(idd::AbstractIdData) = idd.id | ||
|
||
getid(asr::AbstractIDResult) = asr.data.id | ||
|
||
function getid(d::DataSet{T}, col::Int, ind) where T <: Union{AbstractIdData, AbstractIDResult} | ||
getid(d[col], ind) | ||
end | ||
function getid(d::DataSet{T}, col::Colon, ind) where T <: AbstractIdData | ||
@inbounds for i in Base.OneTo(length(d)) | ||
if Base.ht_keyindex(d.ds[i].id, ind) < 1 return getid_safe.(d.ds, ind) end | ||
end | ||
getid_unsafe.(d.ds, ind) | ||
end | ||
function getid(d::DataSet{T}, col::Colon, ind) where T <: AbstractIDResult | ||
@inbounds for i in Base.OneTo(length(d)) | ||
if Base.ht_keyindex(d.ds[i].data.id, ind) < 1 return getid_safe.(d.ds, ind) end | ||
end | ||
getid_unsafe.(d.ds, ind) | ||
end | ||
|
||
|
||
function uniqueidlist(d::DataSet{T}, list::AbstractVector{Symbol}) where T <: AbstractIdData | ||
dl = Vector{Dict}(undef, 0) | ||
for i in d | ||
if list ⊆ keys(getid(i)) | ||
subd = Dict(k => getid(i)[k] for k in list) | ||
if subd ∉ dl push!(dl, subd) end | ||
end | ||
end | ||
dl | ||
end | ||
|
||
function uniqueidlist(d::DataSet{T}, list::Symbol) where T <: AbstractIdData | ||
dl = Vector{Dict}(undef, 0) | ||
for i in d | ||
if list in keys(getid(i)) | ||
subd = Dict(list => getid(i)[list]) | ||
if subd ∉ dl push!(dl, subd) end | ||
end | ||
end | ||
dl | ||
end | ||
#= | ||
function uniqueidlist(d::DataSet{T}) where T <: AbstractIdData | ||
dl = Vector{Dict}(undef, 0) | ||
for i in d | ||
id = getid(i) | ||
if id ∉ dl push!(dl, id) end | ||
end | ||
dl | ||
end | ||
=# | ||
function uniqueidlist(::DataSet{T}, ::Nothing) where T <: AbstractIdData | ||
nothing | ||
end | ||
|
||
|
||
function subset(d::DataSet{T}, sort::Dict) where T <: AbstractIdData | ||
inds = findall(x-> sort ⊆ getid(x), getdata(d)) | ||
if length(inds) > 0 return DataSet(getdata(d)[inds]) end | ||
DataSet(Vector{T}(undef, 0)) | ||
end | ||
function subset(d::DataSet{T}, sort::Dict) where T <: AbstractIDResult | ||
inds = findall(x-> sort ⊆ getid(x), getdata(d)) | ||
if length(inds) > 0 return DataSet(getdata(d)[inds]) end | ||
DataSet(Vector{T}(undef, 0)) | ||
end | ||
function subset(d::DataSet, inds) | ||
DataSet(getdata(d)[inds]) | ||
end | ||
################################################################################ | ||
# metida_table from DataSet{AbstractIDResult} | ||
################################################################################ | ||
function metida_table_(obj::DataSet{RD}; order = nothing, results = nothing, ids = nothing) where RD <: AbstractIDResult | ||
idset = Set(keys(first(obj).data.id)) | ||
resset = Set(keys(first(obj).result)) | ||
if length(obj) > 1 | ||
for i = 2:length(obj) | ||
union!(idset, Set(keys(obj[i].data.id))) | ||
union!(resset, Set(keys(obj[i].result))) | ||
end | ||
end | ||
if !isnothing(results) | ||
if isa(results, Symbol) results = [results] end | ||
if isa(results, String) results = [Symbol(results)] end | ||
ressetl = isnothing(order) ? collect(intersect(resset, results)) : sortbyvec!(collect(intersect(resset, results)), order) | ||
else | ||
ressetl = isnothing(order) ? collect(resset) : sortbyvec!(collect(resset), order) | ||
end | ||
if !isnothing(ids) | ||
if isa(ids, Symbol) ids = [ids] end | ||
if isa(ids, String) ids = [Symbol(ids)] end | ||
ids ⊆ idset || error("Some id not in dataset!") | ||
idset = intersect(idset, ids) | ||
end | ||
mt1 = metida_table_((getid(obj, :, c) for c in idset)...; names = idset) | ||
mt2 = metida_table_((obj[:, c] for c in ressetl)...; names = ressetl) | ||
merge(mt1, mt2) | ||
end | ||
################################################################################ |
Oops, something went wrong.
31de92f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@JuliaRegistrator register
31de92f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Registration pull request created: JuliaRegistries/General/79583
After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.
This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via: