Skip to content

Commit

Permalink
Merge pull request #3 from PharmCat/dev_sm0.7
Browse files Browse the repository at this point in the history
StatsModels 0.7
  • Loading branch information
PharmCat authored Mar 14, 2023
2 parents 7fcb7d7 + 3f0820b commit 31de92f
Show file tree
Hide file tree
Showing 10 changed files with 525 additions and 469 deletions.
29 changes: 15 additions & 14 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,34 +1,35 @@
name = "MetidaBase"
uuid = "075456b7-4006-432f-9324-2f8453996c49"
authors = ["PharmCat <[email protected]> and contributors"]
version = "0.10.2"
version = "0.11.0"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
SnoopPrecompile = "66db9d55-30c0-4569-8b51-7e840670fc0c"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"


[compat]
CategoricalArrays = "0.8, 0.9, 0.10"
DataFrames = "1"
Distributions = "0.20, 0.21, 0.22, 0.23, 0.24, 0.25"
CategoricalArrays = "0.9, 0.10"
CPUSummary = "0.1"
PrettyTables = "2"
Requires = "1"
SnoopPrecompile = "1"
StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33"
StatsModels = "0.6"
StatsModels = "0.6, 0.7"
Tables = "1"
PrettyTables = "2"
TypedTables = "1, 2"
CPUSummary = "0.1"
julia = "1"

[extras]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"

[targets]
test = ["Test", "CSV"]
test = ["DataFrames", "Test", "TypedTables", "CSV"]
15 changes: 10 additions & 5 deletions src/MetidaBase.jl
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
# Metida
# Copyright © 2019-2020 Vladimir Arnautov aka PharmCat <[email protected]>

__precompile__(true)
module MetidaBase

using Tables, PrettyTables, StatsBase, StatsModels, Distributions, CategoricalArrays#, Reexport
using Tables, PrettyTables, StatsModels, CategoricalArrays, Requires#, Reexport

#@reexport using StatsModels
import DataFrames, TypedTables
import StatsBase
import StatsModels: StatisticalModel, RegressionModel
import Tables: istable, columnaccess, columns, getcolumn, columnnames, schema, rowaccess, rows
import CPUSummary: num_cores

import Base: getindex, length, ht_keyindex, show, pushfirst!, iterate, size, findfirst

include("abstracttype.jl")
include("m_tables.jl")
include("dataset.jl")
include("types.jl")
include("utils.jl")
include("iterators.jl")
include("precompile.jl")

function __init__()
@require DataFrames="a93c6f00-e57d-5684-b7b6-d8193f3e46c0" include("dataframes.jl")
@require TypedTables="9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" include("typedtables.jl")
end

end
8 changes: 8 additions & 0 deletions src/dataframes.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# DataFrames.jl interface
function DataFrames.DataFrame(obj::AbstractDataSet; kwargs...)
DataFrames.DataFrame(metida_table_(obj; kwargs...))
end

function DataFrames.DataFrame(obj::MetidaTable)
DataFrames.DataFrame(obj.table)
end
229 changes: 229 additions & 0 deletions src/dataset.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
################################################################################
# DATASET
################################################################################
struct DataSet{T <: AbstractData} <: AbstractDataSet{AbstractData}
ds::Vector{T}
end

function getdata(d::DataSet)
d.ds
end

@inline function getindormiss(d::Dict{K}, i::K) where K
ind::Int = ht_keyindex(d, i)
if ind > 0 return d.vals[ind] end
missing
end

Tables.istable(::AbstractDataSet) = false

Tables.rowaccess(::AbstractDataSet) = false
################################################################################
# BASE
################################################################################

function Base.getindex(d::DataSet, ind::Int)
d.ds[ind]
end

Base.getindex(d::DataSet, inds::UnitRange{Int64}) = subset(d, inds)


@inline function getresultindex_safe(rd::T, ind::Symbol) where T <: AbstractResultData
getindormiss(rd.result, ind)
end
@inline function getresultindex_unsafe(rd::T, ind::Symbol) where T <: AbstractResultData
rd.result[ind]
end

function Base.getindex(d::DataSet{T}, col::Int, ind) where T <: AbstractResultData
getresultindex_safe(d[col], ind)
end
function Base.getindex(d::DataSet{T}, col::Colon, ind) where T <: AbstractResultData
@inbounds for i in Base.OneTo(length(d))
if Base.ht_keyindex(d.ds[i].result, ind) < 1 return getresultindex_safe.(d.ds, ind) end
end
getresultindex_unsafe.(d.ds, ind)
end

Base.first(d::DataSet) = first(getdata(d))

function Base.length(d::DataSet)
length(getdata(d))
end

function Base.iterate(d::DataSet)
return Base.iterate(getdata(d))
end

function Base.iterate(d::DataSet, i::Int)
return Base.iterate(getdata(d), i)
end

function Base.map(f, d::DataSet)
DataSet(map(f, getdata(d)))
end

################################################################################
# BASE
################################################################################
# sort!
################################################################################
function islessdict(a::Dict{A1,A2}, b::Dict{B1,B2}, k::Union{AbstractVector, Set}) where A1 where A2 where B1 where B2
l = length(k)
av = Vector{Union{Missing, A2}}(undef, l)
bv = Vector{Union{Missing, B2}}(undef, l)
@inbounds for i = 1:l
av[i] = getindormiss(a, k[i])
bv[i] = getindormiss(b, k[i])
end
isless(av, bv)
end
function islessdict(a::Dict, b::Dict, k)
isless(getindormiss(a, k), getindormiss(b, k))
end
function Base.sort!(d::DataSet{T}, k; alg::Base.Algorithm = QuickSort, lt=nothing, by=nothing, rev::Bool=false, order::Base.Ordering = Base.Forward) where T <: Union{AbstractIdData, AbstractIDResult}
if isnothing(by) by = x -> getid(x) end
if isnothing(lt) lt = (x, y) -> islessdict(x, y, k) end
sort!(d.ds; alg = alg, lt = lt, by = by, rev = rev, order = order)
d
end

################################################################################
# filter
# filter!
################################################################################
function Base.filter(f::Function, d::DataSet)
ds = getdata(d)
inds = findall(f, ds)
DataSet(ds[inds])
end
function Base.filter!(f::Function, d::DataSet)
filter!(f, getdata(d))
d
end

################################################################################
# Base.findfirst
################################################################################

function Base.findfirst(d::DataSet{<: AbstractIdData}, sort::Dict)
findfirst(x-> sort getid(x), getdata(d))
end

################################################################################
# SELF
################################################################################

getid_safe(idd::AbstractIdData, ind) = getindormiss(idd.id, ind)

getid_unsafe(idd::AbstractIdData, ind) = idd.id[ind]

getid_safe(asr::AbstractIDResult, ind) = getindormiss(asr.data.id, ind)

getid_unsafe(asr::AbstractIDResult, ind) = asr.data.id[ind]

getid(idd::AbstractIdData, ind) = getid_safe(idd, ind)

getid(asr::AbstractIDResult, ind) = getid_safe(asr, ind)

getid(idd::AbstractIdData) = idd.id

getid(asr::AbstractIDResult) = asr.data.id

function getid(d::DataSet{T}, col::Int, ind) where T <: Union{AbstractIdData, AbstractIDResult}
getid(d[col], ind)
end
function getid(d::DataSet{T}, col::Colon, ind) where T <: AbstractIdData
@inbounds for i in Base.OneTo(length(d))
if Base.ht_keyindex(d.ds[i].id, ind) < 1 return getid_safe.(d.ds, ind) end
end
getid_unsafe.(d.ds, ind)
end
function getid(d::DataSet{T}, col::Colon, ind) where T <: AbstractIDResult
@inbounds for i in Base.OneTo(length(d))
if Base.ht_keyindex(d.ds[i].data.id, ind) < 1 return getid_safe.(d.ds, ind) end
end
getid_unsafe.(d.ds, ind)
end


function uniqueidlist(d::DataSet{T}, list::AbstractVector{Symbol}) where T <: AbstractIdData
dl = Vector{Dict}(undef, 0)
for i in d
if list keys(getid(i))
subd = Dict(k => getid(i)[k] for k in list)
if subd dl push!(dl, subd) end
end
end
dl
end

function uniqueidlist(d::DataSet{T}, list::Symbol) where T <: AbstractIdData
dl = Vector{Dict}(undef, 0)
for i in d
if list in keys(getid(i))
subd = Dict(list => getid(i)[list])
if subd dl push!(dl, subd) end
end
end
dl
end
#=
function uniqueidlist(d::DataSet{T}) where T <: AbstractIdData
dl = Vector{Dict}(undef, 0)
for i in d
id = getid(i)
if id ∉ dl push!(dl, id) end
end
dl
end
=#
function uniqueidlist(::DataSet{T}, ::Nothing) where T <: AbstractIdData
nothing
end


function subset(d::DataSet{T}, sort::Dict) where T <: AbstractIdData
inds = findall(x-> sort getid(x), getdata(d))
if length(inds) > 0 return DataSet(getdata(d)[inds]) end
DataSet(Vector{T}(undef, 0))
end
function subset(d::DataSet{T}, sort::Dict) where T <: AbstractIDResult
inds = findall(x-> sort getid(x), getdata(d))
if length(inds) > 0 return DataSet(getdata(d)[inds]) end
DataSet(Vector{T}(undef, 0))
end
function subset(d::DataSet, inds)
DataSet(getdata(d)[inds])
end
################################################################################
# metida_table from DataSet{AbstractIDResult}
################################################################################
function metida_table_(obj::DataSet{RD}; order = nothing, results = nothing, ids = nothing) where RD <: AbstractIDResult
idset = Set(keys(first(obj).data.id))
resset = Set(keys(first(obj).result))
if length(obj) > 1
for i = 2:length(obj)
union!(idset, Set(keys(obj[i].data.id)))
union!(resset, Set(keys(obj[i].result)))
end
end
if !isnothing(results)
if isa(results, Symbol) results = [results] end
if isa(results, String) results = [Symbol(results)] end
ressetl = isnothing(order) ? collect(intersect(resset, results)) : sortbyvec!(collect(intersect(resset, results)), order)
else
ressetl = isnothing(order) ? collect(resset) : sortbyvec!(collect(resset), order)
end
if !isnothing(ids)
if isa(ids, Symbol) ids = [ids] end
if isa(ids, String) ids = [Symbol(ids)] end
ids idset || error("Some id not in dataset!")
idset = intersect(idset, ids)
end
mt1 = metida_table_((getid(obj, :, c) for c in idset)...; names = idset)
mt2 = metida_table_((obj[:, c] for c in ressetl)...; names = ressetl)
merge(mt1, mt2)
end
################################################################################
Loading

2 comments on commit 31de92f

@PharmCat
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/79583

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.11.0 -m "<description of version>" 31de92f8d3752e41f5301a1ed5a7fa22a22e53b2
git push origin v0.11.0

Please sign in to comment.