diff --git a/Project.toml b/Project.toml index 52e8a97..0d157cc 100644 --- a/Project.toml +++ b/Project.toml @@ -1,34 +1,35 @@ name = "MetidaBase" uuid = "075456b7-4006-432f-9324-2f8453996c49" authors = ["PharmCat and contributors"] -version = "0.10.2" +version = "0.11.0" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" +PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" +Requires = "ae029012-a4dd-5104-9daa-d747884805df" +SnoopPrecompile = "66db9d55-30c0-4569-8b51-7e840670fc0c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" -PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" -TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" -CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" + [compat] -CategoricalArrays = "0.8, 0.9, 0.10" -DataFrames = "1" -Distributions = "0.20, 0.21, 0.22, 0.23, 0.24, 0.25" +CategoricalArrays = "0.9, 0.10" +CPUSummary = "0.1" +PrettyTables = "2" +Requires = "1" +SnoopPrecompile = "1" StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33" -StatsModels = "0.6" +StatsModels = "0.6, 0.7" Tables = "1" -PrettyTables = "2" -TypedTables = "1, 2" -CPUSummary = "0.1" julia = "1" [extras] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" [targets] -test = ["Test", "CSV"] +test = ["DataFrames", "Test", "TypedTables", "CSV"] diff --git a/src/MetidaBase.jl b/src/MetidaBase.jl index f0aead7..726efda 100644 --- a/src/MetidaBase.jl +++ b/src/MetidaBase.jl @@ -1,13 +1,10 @@ # Metida # Copyright © 2019-2020 Vladimir Arnautov aka PharmCat - -__precompile__(true) module MetidaBase - using Tables, PrettyTables, StatsBase, StatsModels, Distributions, CategoricalArrays#, Reexport + using Tables, PrettyTables, StatsModels, CategoricalArrays, Requires#, Reexport - #@reexport using StatsModels - import DataFrames, TypedTables + import StatsBase import StatsModels: StatisticalModel, RegressionModel import Tables: istable, columnaccess, columns, getcolumn, columnnames, schema, rowaccess, rows import CPUSummary: num_cores @@ -15,8 +12,16 @@ module MetidaBase import Base: getindex, length, ht_keyindex, show, pushfirst!, iterate, size, findfirst include("abstracttype.jl") + include("m_tables.jl") + include("dataset.jl") include("types.jl") include("utils.jl") include("iterators.jl") + include("precompile.jl") + + function __init__() + @require DataFrames="a93c6f00-e57d-5684-b7b6-d8193f3e46c0" include("dataframes.jl") + @require TypedTables="9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" include("typedtables.jl") + end end diff --git a/src/dataframes.jl b/src/dataframes.jl new file mode 100644 index 0000000..ebba617 --- /dev/null +++ b/src/dataframes.jl @@ -0,0 +1,8 @@ +# DataFrames.jl interface +function DataFrames.DataFrame(obj::AbstractDataSet; kwargs...) + DataFrames.DataFrame(metida_table_(obj; kwargs...)) +end + +function DataFrames.DataFrame(obj::MetidaTable) + DataFrames.DataFrame(obj.table) +end \ No newline at end of file diff --git a/src/dataset.jl b/src/dataset.jl new file mode 100644 index 0000000..81d6c9c --- /dev/null +++ b/src/dataset.jl @@ -0,0 +1,229 @@ +################################################################################ +# DATASET +################################################################################ +struct DataSet{T <: AbstractData} <: AbstractDataSet{AbstractData} + ds::Vector{T} +end + +function getdata(d::DataSet) + d.ds +end + +@inline function getindormiss(d::Dict{K}, i::K) where K + ind::Int = ht_keyindex(d, i) + if ind > 0 return d.vals[ind] end + missing +end + +Tables.istable(::AbstractDataSet) = false + +Tables.rowaccess(::AbstractDataSet) = false +################################################################################ +# BASE +################################################################################ + +function Base.getindex(d::DataSet, ind::Int) + d.ds[ind] +end + +Base.getindex(d::DataSet, inds::UnitRange{Int64}) = subset(d, inds) + + +@inline function getresultindex_safe(rd::T, ind::Symbol) where T <: AbstractResultData + getindormiss(rd.result, ind) +end +@inline function getresultindex_unsafe(rd::T, ind::Symbol) where T <: AbstractResultData + rd.result[ind] +end + +function Base.getindex(d::DataSet{T}, col::Int, ind) where T <: AbstractResultData + getresultindex_safe(d[col], ind) +end +function Base.getindex(d::DataSet{T}, col::Colon, ind) where T <: AbstractResultData + @inbounds for i in Base.OneTo(length(d)) + if Base.ht_keyindex(d.ds[i].result, ind) < 1 return getresultindex_safe.(d.ds, ind) end + end + getresultindex_unsafe.(d.ds, ind) +end + +Base.first(d::DataSet) = first(getdata(d)) + +function Base.length(d::DataSet) + length(getdata(d)) +end + +function Base.iterate(d::DataSet) + return Base.iterate(getdata(d)) +end + +function Base.iterate(d::DataSet, i::Int) + return Base.iterate(getdata(d), i) +end + +function Base.map(f, d::DataSet) + DataSet(map(f, getdata(d))) +end + +################################################################################ +# BASE +################################################################################ +# sort! +################################################################################ +function islessdict(a::Dict{A1,A2}, b::Dict{B1,B2}, k::Union{AbstractVector, Set}) where A1 where A2 where B1 where B2 + l = length(k) + av = Vector{Union{Missing, A2}}(undef, l) + bv = Vector{Union{Missing, B2}}(undef, l) + @inbounds for i = 1:l + av[i] = getindormiss(a, k[i]) + bv[i] = getindormiss(b, k[i]) + end + isless(av, bv) +end +function islessdict(a::Dict, b::Dict, k) + isless(getindormiss(a, k), getindormiss(b, k)) +end +function Base.sort!(d::DataSet{T}, k; alg::Base.Algorithm = QuickSort, lt=nothing, by=nothing, rev::Bool=false, order::Base.Ordering = Base.Forward) where T <: Union{AbstractIdData, AbstractIDResult} + if isnothing(by) by = x -> getid(x) end + if isnothing(lt) lt = (x, y) -> islessdict(x, y, k) end + sort!(d.ds; alg = alg, lt = lt, by = by, rev = rev, order = order) + d +end + +################################################################################ +# filter +# filter! +################################################################################ +function Base.filter(f::Function, d::DataSet) + ds = getdata(d) + inds = findall(f, ds) + DataSet(ds[inds]) +end +function Base.filter!(f::Function, d::DataSet) + filter!(f, getdata(d)) + d +end + +################################################################################ +# Base.findfirst +################################################################################ + +function Base.findfirst(d::DataSet{<: AbstractIdData}, sort::Dict) + findfirst(x-> sort ⊆ getid(x), getdata(d)) +end + +################################################################################ +# SELF +################################################################################ + +getid_safe(idd::AbstractIdData, ind) = getindormiss(idd.id, ind) + +getid_unsafe(idd::AbstractIdData, ind) = idd.id[ind] + +getid_safe(asr::AbstractIDResult, ind) = getindormiss(asr.data.id, ind) + +getid_unsafe(asr::AbstractIDResult, ind) = asr.data.id[ind] + +getid(idd::AbstractIdData, ind) = getid_safe(idd, ind) + +getid(asr::AbstractIDResult, ind) = getid_safe(asr, ind) + +getid(idd::AbstractIdData) = idd.id + +getid(asr::AbstractIDResult) = asr.data.id + +function getid(d::DataSet{T}, col::Int, ind) where T <: Union{AbstractIdData, AbstractIDResult} + getid(d[col], ind) +end +function getid(d::DataSet{T}, col::Colon, ind) where T <: AbstractIdData + @inbounds for i in Base.OneTo(length(d)) + if Base.ht_keyindex(d.ds[i].id, ind) < 1 return getid_safe.(d.ds, ind) end + end + getid_unsafe.(d.ds, ind) +end +function getid(d::DataSet{T}, col::Colon, ind) where T <: AbstractIDResult + @inbounds for i in Base.OneTo(length(d)) + if Base.ht_keyindex(d.ds[i].data.id, ind) < 1 return getid_safe.(d.ds, ind) end + end + getid_unsafe.(d.ds, ind) +end + + +function uniqueidlist(d::DataSet{T}, list::AbstractVector{Symbol}) where T <: AbstractIdData + dl = Vector{Dict}(undef, 0) + for i in d + if list ⊆ keys(getid(i)) + subd = Dict(k => getid(i)[k] for k in list) + if subd ∉ dl push!(dl, subd) end + end + end + dl +end + +function uniqueidlist(d::DataSet{T}, list::Symbol) where T <: AbstractIdData + dl = Vector{Dict}(undef, 0) + for i in d + if list in keys(getid(i)) + subd = Dict(list => getid(i)[list]) + if subd ∉ dl push!(dl, subd) end + end + end + dl +end +#= +function uniqueidlist(d::DataSet{T}) where T <: AbstractIdData + dl = Vector{Dict}(undef, 0) + for i in d + id = getid(i) + if id ∉ dl push!(dl, id) end + end + dl +end +=# +function uniqueidlist(::DataSet{T}, ::Nothing) where T <: AbstractIdData + nothing +end + + +function subset(d::DataSet{T}, sort::Dict) where T <: AbstractIdData + inds = findall(x-> sort ⊆ getid(x), getdata(d)) + if length(inds) > 0 return DataSet(getdata(d)[inds]) end + DataSet(Vector{T}(undef, 0)) +end +function subset(d::DataSet{T}, sort::Dict) where T <: AbstractIDResult + inds = findall(x-> sort ⊆ getid(x), getdata(d)) + if length(inds) > 0 return DataSet(getdata(d)[inds]) end + DataSet(Vector{T}(undef, 0)) +end +function subset(d::DataSet, inds) + DataSet(getdata(d)[inds]) +end +################################################################################ +# metida_table from DataSet{AbstractIDResult} +################################################################################ +function metida_table_(obj::DataSet{RD}; order = nothing, results = nothing, ids = nothing) where RD <: AbstractIDResult + idset = Set(keys(first(obj).data.id)) + resset = Set(keys(first(obj).result)) + if length(obj) > 1 + for i = 2:length(obj) + union!(idset, Set(keys(obj[i].data.id))) + union!(resset, Set(keys(obj[i].result))) + end + end + if !isnothing(results) + if isa(results, Symbol) results = [results] end + if isa(results, String) results = [Symbol(results)] end + ressetl = isnothing(order) ? collect(intersect(resset, results)) : sortbyvec!(collect(intersect(resset, results)), order) + else + ressetl = isnothing(order) ? collect(resset) : sortbyvec!(collect(resset), order) + end + if !isnothing(ids) + if isa(ids, Symbol) ids = [ids] end + if isa(ids, String) ids = [Symbol(ids)] end + ids ⊆ idset || error("Some id not in dataset!") + idset = intersect(idset, ids) + end + mt1 = metida_table_((getid(obj, :, c) for c in idset)...; names = idset) + mt2 = metida_table_((obj[:, c] for c in ressetl)...; names = ressetl) + merge(mt1, mt2) +end +################################################################################ diff --git a/src/m_tables.jl b/src/m_tables.jl new file mode 100644 index 0000000..29f5c62 --- /dev/null +++ b/src/m_tables.jl @@ -0,0 +1,179 @@ +# MetidaBase.jl +struct MetidaTable{T <: NamedTuple} + table::T +end + +""" + metida_table(table::NamedTuple) + +Make MetidaTable from NamedTuple. +""" +function metida_table(table::NamedTuple) + MetidaTable(table) +end + +""" + metida_table(args...; kwargs...) + +Make MetidaTable. + +For AbstractIDResult: + + metida_table(obj::DataSet{RD}; order = nothing, results = nothing, ids = nothing) + +Where obj <: DataSet{<:AbstractIDResult} +order - order of columns (Vector of column's names); +results - result columns; +ids - ID's columns; +""" +function metida_table(args...; kwargs...) + MetidaTable(metida_table_(args...; kwargs...)) +end +function metida_table_(args...; names = nothing) + if length(args) > 1 + e1 = length(args[1]) + i = 2 + @inbounds for i = 2:length(args) + length(args[i]) == e1 || error("Length not equal") + end + end + if isnothing(names) + names = Tuple(Symbol.(:x , Symbol.(collect(1:length(args))))) + else + if length(args) != length(names) error("Length args and names not equal") end + if !(typeof(names) <: Tuple) + if !(typeof(names) <: AbstractVector{Symbol}) + names = Tuple(Symbol.(names)) + else + names = Tuple(names) + end + end + end + NamedTuple{names}(args) +end + +table(t::MetidaTable) = getfield(t, :table) +################################################################################ +# TABLES +################################################################################ +Tables.istable(t::MetidaTable) = true + +Tables.columnaccess(t::MetidaTable) = true + +Tables.columns(t::MetidaTable) = t + +Tables.getcolumn(t::MetidaTable, i::Int) = getfield(t, :table)[i] + +Tables.getcolumn(t::MetidaTable, nm::Symbol) = getfield(t, :table)[nm] + +Tables.getcolumn(t::MetidaTable, ::Type{T}, col::Int, nm::Symbol) where {T} = t[:, col] + +Tables.columnnames(t::MetidaTable) = names(t) + +Tables.rowaccess(::Type{<:MetidaTable}) = true +# just return itself, which means MatrixTable must iterate `Tables.AbstractRow`-compatible objects +Tables.rows(t::MetidaTable) = t + +# a custom row type; acts as a "view" into a row of an AbstractMatrix +struct MetidaTableRow{T} <: Tables.AbstractRow + row::Int + source::MetidaTable{T} +end + +Base.iterate(t::MetidaTable, st=1) = st > length(t) ? nothing : (MetidaTableRow(st, t), st + 1) + +Tables.getcolumn(t::MetidaTableRow, ::Type, col::Int, nm::Symbol) = getfield(t, :source)[getfield(t, :row), col] + +Tables.getcolumn(t::MetidaTableRow, i::Int) = getfield(t, :source)[getfield(t, :row), i] + +Tables.getcolumn(t::MetidaTableRow, nm::Symbol) = getfield(t, :source)[getfield(t, :row), nm] + +Tables.columnnames(t::MetidaTableRow) = names(getfield(t, :source)) + +Tables.schema(t::MetidaTable) = Tables.Schema(names(t), eltype(y) for y in t.table) + +################################################################################ +# BASE +################################################################################ + +Base.names(t::MetidaTable) = collect(keys(t.table)) + +function Base.getindex(t::MetidaTable, col::Colon, ind::T) where T <: Union{Symbol, Int} + Tables.getcolumn(t, ind) +end +function Base.getindex(t::MetidaTable, col::Colon, inds::AbstractVector{T}) where T <: Union{Symbol, Int} + if T <: Int + names = columnnames(t)[inds] + else + names = inds + end + cols = map(c->Tables.getcolumn(t, c), inds) + MetidaTable(metida_table_(cols...; names = names)) +end + +function Base.getindex(t::MetidaTable, r::Int, ::Colon) + MetidaTableRow(r, t) + #NamedTuple{keys(t.table)}(tuple(Iterators.map(c -> getindex(t, r, c), keys(t.table))...)) +end + + +function Base.getindex(t::MetidaTable, row::Int, ind::T) where T <: Union{Symbol, Int} + Tables.getcolumn(t, ind)[row] +end + +function Base.setindex!(t::MetidaTable, val, row::Int, ind::T) where T <: Union{Symbol, Int} + Tables.getcolumn(t, ind)[row] = val +end + +function Base.append!(t::MetidaTable, t2::MetidaTable) + if !(names(t) ⊆ names(t2)) error("Names for t not in t2") end + for n in names(t) + append!(t[:, n], t2[:, n]) + end + t +end + +function Base.pushfirst!(t::MetidaTable, row::AbstractVector) + if length(row) != length(keys(t.table)) error("Size not equal") end + i = 1 + for i = 1:length(row) + pushfirst!(t.table[i], row[i]) + end + t +end +function Base.pushfirst!(t::MetidaTable, row::NamedTuple) + kt = keys(t.table) + kr = keys(row) + if !issetequal(kt, kr) error("Size not equal") end + for i in kt + pushfirst!(t.table[i], row[i]) + end + t +end + +Base.length(t::MetidaTable) = length(first(t.table)) + +function Base.size(t::MetidaTable, i::Int) + if i == 1 + return length(first(t.table)) + elseif i == 2 + return length(t.table) + else + error("Wrong dimention!") + end +end + +function Base.show(io::IO, table::MetidaTable) + pretty_table(io, table; tf = PrettyTables.tf_compact) +end +function Base.show(io::IO, row::MetidaTableRow) + print(io, "Row: (") + names = keys(table(getfield(row, :source))) + print(io, names[1], " = ", row[names[1]]) + if length(names) > 1 + for i = 2:length(names) + print(io, ", ", names[i], " = ", row[names[i]]) + end + end + print(io, ")") +end \ No newline at end of file diff --git a/src/precompile.jl b/src/precompile.jl new file mode 100644 index 0000000..6a0774c --- /dev/null +++ b/src/precompile.jl @@ -0,0 +1,46 @@ +import SnoopPrecompile + +SnoopPrecompile.@precompile_all_calls begin + sdfromcv(0.4) + varfromcv(0.4) + cvfromvar(0.4) + cvfromsd(0.4) + nonunique([1,2,3,3,4,5,6,6]) + sortbyvec!([1,2,3,4,5,6,7,8], [2,5,3,1,8,4,6,7]) + mt = metida_table([1,2,3], ["a", "b", "c"]) + Tables.istable(mt) + Tables.rowaccess(mt) + mt = mt[:, [:x1, :x2]] + mtr = mt[1, :] + names(mt) + mt = MetidaBase.metida_table([1,2,3], ["a", "b", "c"], names = (:a, :b)) + pushfirst!(mt, [0, " "]) + ntr = NamedTuple{(:b, :a)}(["d", 10]) + pushfirst!(mt, ntr) + size(mt, 1) == 5 + Tables.rows(mt) + for (i,j) in enumerate(mt) + mt[i, :a] + end + length(mt) + mt2 = MetidaBase.metida_table([1,2,3], ["a", "b", "c"], names = (:a, :b)) + append!(mt, mt2) + mtd = MetidaBase.indsdict!(Dict(), mt) + + v1 = [1, 2, -6, missing, NaN, 0] + itr1 = skipnanormissing(v1) + for i in itr1 + !isnanormissing(i) + end + collect(itr1) + collect(eachindex(itr1)) + eltype(itr1) + itr2 = skipnonpositive(v1) + for i in itr2 + ispositive(i) + end + collect(eachindex(itr2)) + eltype(itr2) + collect(keys(itr2)) + length(itr2) +end \ No newline at end of file diff --git a/src/typedtables.jl b/src/typedtables.jl new file mode 100644 index 0000000..7845b26 --- /dev/null +++ b/src/typedtables.jl @@ -0,0 +1,7 @@ +# TypedTables.jl interface +function TypedTables.Table(obj::AbstractDataSet; kwargs...) + TypedTables.Table(metida_table_(obj; kwargs...)) +end +function TypedTables.Table(obj::MetidaTable) + TypedTables.Table(obj.table) +end \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index 1910df8..812d42c 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,433 +1,3 @@ - -# MetidaBase.jl - -struct MetidaTable{T <: NamedTuple} - table::T -end - - -""" - metida_table(table::NamedTuple) - -Make MetidaTable from NamedTuple. -""" -function metida_table(table::NamedTuple) - MetidaTable(table) -end - -""" - metida_table(args...; kwargs...) - -Make MetidaTable. - -For AbstractIDResult: - - metida_table(obj::DataSet{RD}; order = nothing, results = nothing, ids = nothing) - -Where obj <: DataSet{<:AbstractIDResult} -order - order of columns (Vector of column's names); -results - result columns; -ids - ID's columns; -""" -function metida_table(args...; kwargs...) - MetidaTable(metida_table_(args...; kwargs...)) -end -function metida_table_(args...; names = nothing) - if length(args) > 1 - e1 = length(args[1]) - i = 2 - @inbounds for i = 2:length(args) - length(args[i]) == e1 || error("Length not equal") - end - end - if isnothing(names) - names = Tuple(Symbol.(:x , Symbol.(collect(1:length(args))))) - else - if length(args) != length(names) error("Length args and names not equal") end - if !(typeof(names) <: Tuple) - if !(typeof(names) <: AbstractVector{Symbol}) - names = Tuple(Symbol.(names)) - else - names = Tuple(names) - end - end - end - NamedTuple{names}(args) -end - -table(t::MetidaTable) = getfield(t, :table) -################################################################################ -# TABLES -################################################################################ -Tables.istable(t::MetidaTable) = true - -Tables.columnaccess(t::MetidaTable) = true - -Tables.columns(t::MetidaTable) = t - -Tables.getcolumn(t::MetidaTable, i::Int) = getfield(t, :table)[i] - -Tables.getcolumn(t::MetidaTable, nm::Symbol) = getfield(t, :table)[nm] - -Tables.getcolumn(t::MetidaTable, ::Type{T}, col::Int, nm::Symbol) where {T} = t[:, col] - -Tables.columnnames(t::MetidaTable) = names(t) - -Tables.rowaccess(::Type{<:MetidaTable}) = true -# just return itself, which means MatrixTable must iterate `Tables.AbstractRow`-compatible objects -Tables.rows(t::MetidaTable) = t - -# a custom row type; acts as a "view" into a row of an AbstractMatrix -struct MetidaTableRow{T} <: Tables.AbstractRow - row::Int - source::MetidaTable{T} -end - -Base.iterate(t::MetidaTable, st=1) = st > length(t) ? nothing : (MetidaTableRow(st, t), st + 1) - -Tables.getcolumn(t::MetidaTableRow, ::Type, col::Int, nm::Symbol) = getfield(t, :source)[getfield(t, :row), col] - -Tables.getcolumn(t::MetidaTableRow, i::Int) = getfield(t, :source)[getfield(t, :row), i] - -Tables.getcolumn(t::MetidaTableRow, nm::Symbol) = getfield(t, :source)[getfield(t, :row), nm] - -Tables.columnnames(t::MetidaTableRow) = names(getfield(t, :source)) - -Tables.schema(t::MetidaTable) = Tables.Schema(names(t), eltype(y) for y in t.table) - -################################################################################ -# BASE -################################################################################ - -Base.names(t::MetidaTable) = collect(keys(t.table)) - -function Base.getindex(t::MetidaTable, col::Colon, ind::T) where T <: Union{Symbol, Int} - Tables.getcolumn(t, ind) -end -function Base.getindex(t::MetidaTable, col::Colon, inds::AbstractVector{T}) where T <: Union{Symbol, Int} - if T <: Int - names = columnnames(t)[inds] - else - names = inds - end - cols = map(c->Tables.getcolumn(t, c), inds) - MetidaTable(metida_table_(cols...; names = names)) -end - -function Base.getindex(t::MetidaTable, r::Int, ::Colon) - MetidaTableRow(r, t) - #NamedTuple{keys(t.table)}(tuple(Iterators.map(c -> getindex(t, r, c), keys(t.table))...)) -end - - -function Base.getindex(t::MetidaTable, row::Int, ind::T) where T <: Union{Symbol, Int} - Tables.getcolumn(t, ind)[row] -end - -function Base.setindex!(t::MetidaTable, val, row::Int, ind::T) where T <: Union{Symbol, Int} - Tables.getcolumn(t, ind)[row] = val -end - -function Base.append!(t::MetidaTable, t2::MetidaTable) - if !(names(t) ⊆ names(t2)) error("Names for t not in t2") end - for n in names(t) - append!(t[:, n], t2[:, n]) - end - t -end - -function Base.pushfirst!(t::MetidaTable, row::AbstractVector) - if length(row) != length(keys(t.table)) error("Size not equal") end - i = 1 - for i = 1:length(row) - pushfirst!(t.table[i], row[i]) - end - t -end -function Base.pushfirst!(t::MetidaTable, row::NamedTuple) - kt = keys(t.table) - kr = keys(row) - if !issetequal(kt, kr) error("Size not equal") end - for i in kt - pushfirst!(t.table[i], row[i]) - end - t -end - -Base.length(t::MetidaTable) = length(first(t.table)) - -function Base.size(t::MetidaTable, i::Int) - if i == 1 - return length(first(t.table)) - elseif i == 2 - return length(t.table) - else - error("Wrong dimention!") - end -end - -function Base.show(io::IO, table::MetidaTable) - pretty_table(io, table; tf = PrettyTables.tf_compact) -end -function Base.show(io::IO, row::MetidaTableRow) - print(io, "Row: (") - names = keys(table(getfield(row, :source))) - print(io, names[1], " = ", row[names[1]]) - if length(names) > 1 - for i = 2:length(names) - print(io, ", ", names[i], " = ", row[names[i]]) - end - end - print(io, ")") -end - -# All -################################################################################ -# DATASET -################################################################################ -struct DataSet{T <: AbstractData} <: AbstractDataSet{AbstractData} - ds::Vector{T} -end - -function getdata(d::DataSet) - d.ds -end - -@inline function getindormiss(d::Dict{K}, i::K) where K - ind::Int = ht_keyindex(d, i) - if ind > 0 return d.vals[ind] end - missing -end - -Tables.istable(::AbstractDataSet) = false - -Tables.rowaccess(::AbstractDataSet) = false -################################################################################ -# BASE -################################################################################ - -function Base.getindex(d::DataSet, ind::Int) - d.ds[ind] -end - -Base.getindex(d::DataSet, inds::UnitRange{Int64}) = subset(d, inds) - - -@inline function getresultindex_safe(rd::T, ind::Symbol) where T <: AbstractResultData - getindormiss(rd.result, ind) -end -@inline function getresultindex_unsafe(rd::T, ind::Symbol) where T <: AbstractResultData - rd.result[ind] -end - -function Base.getindex(d::DataSet{T}, col::Int, ind) where T <: AbstractResultData - getresultindex_safe(d[col], ind) -end -function Base.getindex(d::DataSet{T}, col::Colon, ind) where T <: AbstractResultData - @inbounds for i in Base.OneTo(length(d)) - if Base.ht_keyindex(d.ds[i].result, ind) < 1 return getresultindex_safe.(d.ds, ind) end - end - getresultindex_unsafe.(d.ds, ind) -end - -Base.first(d::DataSet) = first(getdata(d)) - -function Base.length(d::DataSet) - length(getdata(d)) -end - -function Base.iterate(d::DataSet) - return Base.iterate(getdata(d)) -end - -function Base.iterate(d::DataSet, i::Int) - return Base.iterate(getdata(d), i) -end - -function Base.map(f, d::DataSet) - DataSet(map(f, getdata(d))) -end - -################################################################################ -# BASE -################################################################################ -# sort! -################################################################################ -function islessdict(a::Dict{A1,A2}, b::Dict{B1,B2}, k::Union{AbstractVector, Set}) where A1 where A2 where B1 where B2 - l = length(k) - av = Vector{Union{Missing, A2}}(undef, l) - bv = Vector{Union{Missing, B2}}(undef, l) - @inbounds for i = 1:l - av[i] = getindormiss(a, k[i]) - bv[i] = getindormiss(b, k[i]) - end - isless(av, bv) -end -function islessdict(a::Dict, b::Dict, k) - isless(getindormiss(a, k), getindormiss(b, k)) -end -function Base.sort!(d::DataSet{T}, k; alg::Base.Algorithm = QuickSort, lt=nothing, by=nothing, rev::Bool=false, order::Base.Ordering = Base.Forward) where T <: Union{AbstractIdData, AbstractIDResult} - if isnothing(by) by = x -> getid(x) end - if isnothing(lt) lt = (x, y) -> islessdict(x, y, k) end - sort!(d.ds; alg = alg, lt = lt, by = by, rev = rev, order = order) - d -end - -################################################################################ -# filter -# filter! -################################################################################ -function Base.filter(f::Function, d::DataSet) - ds = getdata(d) - inds = findall(f, ds) - DataSet(ds[inds]) -end -function Base.filter!(f::Function, d::DataSet) - filter!(f, getdata(d)) - d -end - -################################################################################ -# Base.findfirst -################################################################################ - -function Base.findfirst(d::DataSet{<: AbstractIdData}, sort::Dict) - findfirst(x-> sort ⊆ getid(x), getdata(d)) -end - -################################################################################ -# SELF -################################################################################ - -getid_safe(idd::AbstractIdData, ind) = getindormiss(idd.id, ind) - -getid_unsafe(idd::AbstractIdData, ind) = idd.id[ind] - -getid_safe(asr::AbstractIDResult, ind) = getindormiss(asr.data.id, ind) - -getid_unsafe(asr::AbstractIDResult, ind) = asr.data.id[ind] - -getid(idd::AbstractIdData, ind) = getid_safe(idd, ind) - -getid(asr::AbstractIDResult, ind) = getid_safe(asr, ind) - -getid(idd::AbstractIdData) = idd.id - -getid(asr::AbstractIDResult) = asr.data.id - -function getid(d::DataSet{T}, col::Int, ind) where T <: Union{AbstractIdData, AbstractIDResult} - getid(d[col], ind) -end -function getid(d::DataSet{T}, col::Colon, ind) where T <: AbstractIdData - @inbounds for i in Base.OneTo(length(d)) - if Base.ht_keyindex(d.ds[i].id, ind) < 1 return getid_safe.(d.ds, ind) end - end - getid_unsafe.(d.ds, ind) -end -function getid(d::DataSet{T}, col::Colon, ind) where T <: AbstractIDResult - @inbounds for i in Base.OneTo(length(d)) - if Base.ht_keyindex(d.ds[i].data.id, ind) < 1 return getid_safe.(d.ds, ind) end - end - getid_unsafe.(d.ds, ind) -end - - -function uniqueidlist(d::DataSet{T}, list::AbstractVector{Symbol}) where T <: AbstractIdData - dl = Vector{Dict}(undef, 0) - for i in d - if list ⊆ keys(getid(i)) - subd = Dict(k => getid(i)[k] for k in list) - if subd ∉ dl push!(dl, subd) end - end - end - dl -end - -function uniqueidlist(d::DataSet{T}, list::Symbol) where T <: AbstractIdData - dl = Vector{Dict}(undef, 0) - for i in d - if list in keys(getid(i)) - subd = Dict(list => getid(i)[list]) - if subd ∉ dl push!(dl, subd) end - end - end - dl -end -#= -function uniqueidlist(d::DataSet{T}) where T <: AbstractIdData - dl = Vector{Dict}(undef, 0) - for i in d - id = getid(i) - if id ∉ dl push!(dl, id) end - end - dl -end -=# -function uniqueidlist(::DataSet{T}, ::Nothing) where T <: AbstractIdData - nothing -end - - -function subset(d::DataSet{T}, sort::Dict) where T <: AbstractIdData - inds = findall(x-> sort ⊆ getid(x), getdata(d)) - if length(inds) > 0 return DataSet(getdata(d)[inds]) end - DataSet(Vector{T}(undef, 0)) -end -function subset(d::DataSet{T}, sort::Dict) where T <: AbstractIDResult - inds = findall(x-> sort ⊆ getid(x), getdata(d)) - if length(inds) > 0 return DataSet(getdata(d)[inds]) end - DataSet(Vector{T}(undef, 0)) -end -function subset(d::DataSet, inds) - DataSet(getdata(d)[inds]) -end -################################################################################ -# metida_table from DataSet{AbstractIDResult} -################################################################################ -function metida_table_(obj::DataSet{RD}; order = nothing, results = nothing, ids = nothing) where RD <: AbstractIDResult - idset = Set(keys(first(obj).data.id)) - resset = Set(keys(first(obj).result)) - if length(obj) > 1 - for i = 2:length(obj) - union!(idset, Set(keys(obj[i].data.id))) - union!(resset, Set(keys(obj[i].result))) - end - end - if !isnothing(results) - if isa(results, Symbol) results = [results] end - if isa(results, String) results = [Symbol(results)] end - ressetl = isnothing(order) ? collect(intersect(resset, results)) : sortbyvec!(collect(intersect(resset, results)), order) - else - ressetl = isnothing(order) ? collect(resset) : sortbyvec!(collect(resset), order) - end - if !isnothing(ids) - if isa(ids, Symbol) ids = [ids] end - if isa(ids, String) ids = [Symbol(ids)] end - ids ⊆ idset || error("Some id not in dataset!") - idset = intersect(idset, ids) - end - mt1 = metida_table_((getid(obj, :, c) for c in idset)...; names = idset) - mt2 = metida_table_((obj[:, c] for c in ressetl)...; names = ressetl) - merge(mt1, mt2) -end -################################################################################ -# TypedTables.jl interface - -function TypedTables.Table(obj::AbstractDataSet; kwargs...) - TypedTables.Table(metida_table_(obj; kwargs...)) -end -function TypedTables.Table(obj::MetidaTable) - TypedTables.Table(obj.table) -end - -# DataFrames.jl interface -function DataFrames.DataFrame(obj::AbstractDataSet; kwargs...) - DataFrames.DataFrame(metida_table_(obj; kwargs...)) -end - -function DataFrames.DataFrame(obj::MetidaTable) - DataFrames.DataFrame(obj.table) -end # MetidaFreq.jl struct Proportion <: AbstractData x::Int diff --git a/src/utils.jl b/src/utils.jl index 0f4e6aa..343f5e8 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,4 +1,5 @@ # Заполняет словарь d индексами индивидуальных значений + function indsdict!(d::Dict, cdata::Union{Tuple, NamedTuple, AbstractVector{AbstractVector}}) @inbounds for (i, element) in enumerate(zip(cdata...)) ind = ht_keyindex(d, element) @@ -30,11 +31,16 @@ function indsdict!(d::Dict, mt::MetidaTable) indsdict!(d, table(mt)) end +function findfirstvec(x, vec) + l = length(vec) + 1 + res = findfirst(y -> x == y, vec) + if isnothing(res) return l else return res end +end """ Sort `a` by values of `vec`. """ function sortbyvec!(a, vec) - sort!(a, by = x -> findfirst(y -> x == y, vec)) + sort!(a, by = x -> findfirstvec(x, vec)) end """ @@ -66,7 +72,6 @@ ispositive(x) = x > zero(x) # STATISTICS - #CV2se """ sdfromcv(cv::Real)::AbstractFloat diff --git a/test/runtests.jl b/test/runtests.jl index 8c4512e..334135b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,8 +1,23 @@ using MetidaBase using Test, Tables, TypedTables, DataFrames, CSV + @testset "MetidaBase.jl" begin + + struct ExampleIDStruct <: MetidaBase.AbstractSubject + #time + #obs + id::Dict + end + struct ExampleResultStruct{T} <: MetidaBase.AbstractSubjectResult{T} + data::T + result::Dict + end + io = IOBuffer(); +##################################################################### +# metida_table +##################################################################### # Metida table names - auto mt = MetidaBase.metida_table([1,2,3], ["a", "b", "c"]) @@ -66,19 +81,10 @@ using Test, Tables, TypedTables, DataFrames, CSV mtd = MetidaBase.indsdict!(Dict(), mt[:, 1]) @test mtd[2] == [4, 7, 10] - ############################################################################ - # Structures - ############################################################################ - struct ExampleIDStruct <: MetidaBase.AbstractSubject - #time - #obs - id::Dict - end - - struct ExampleResultStruct{T} <: MetidaBase.AbstractSubjectResult{T} - data::T - result::Dict - end +############################################################################ +# Structures +############################################################################ + exiddsv = Vector{ExampleIDStruct}(undef, 3) for i in 1:3 exiddsv[i] = ExampleIDStruct(Dict(:a => 1, :b => 1)) @@ -168,14 +174,14 @@ using Test, Tables, TypedTables, DataFrames, CSV @test collect(keys(itr2)) == [1, 2] @test length(itr2) == 2 - ############################################################################ - # OTHER +############################################################################ + # OTHER @test MetidaBase.nonunique([1,2,3,3,4,5,6,6]) == [6,3] @test MetidaBase.sortbyvec!([1,2,3,4,5,6,7,8], [2,5,3,1,8,4,6,7]) == [2,5,3,1,8,4,6,7] - - ############################################################################ - # Ststutils + @test MetidaBase.sortbyvec!([1,2,3,4,5,6,7,8], [2,5,3,8,4,6,7]) == [2,5,3,8,4,6,7,1] +############################################################################ +# Stat utils MetidaBase.sdfromcv(0.4) ≈ 0.38525317015992666 MetidaBase.varfromcv(0.4) ≈ 0.1484200051182734 MetidaBase.cvfromvar(0.4) ≈ 0.7013021443295824