Skip to content

Commit

Permalink
Merge pull request #18 from TidierOrg/add-cat_expand
Browse files Browse the repository at this point in the history
bump lts, add cat_expand
  • Loading branch information
drizk1 authored Jan 8, 2025
2 parents 63b9b36 + 4a538f4 commit e03d307
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 4 deletions.
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TidierCats"
uuid = "79ddc9fe-4dbf-4a56-a832-df41fb326d23"
authors = ["Daniel Rizk"]
version = "0.1.2"
version = "0.2.0"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Expand All @@ -13,7 +13,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
CategoricalArrays = "0.10, 1.0"
DataFrames = "1.5"
Reexport = "0.2, 1"
julia = "1.9"
julia = "1.10"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
41 changes: 39 additions & 2 deletions src/TidierCats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ using Reexport
@reexport using CategoricalArrays

export cat_rev, cat_relevel, cat_infreq, cat_lump, cat_reorder, cat_collapse, cat_lump_min, cat_lump_prop
export as_categorical, as_integer, cat_replace_missing, cat_other, cat_recode
export as_categorical, as_integer, cat_replace_missing, cat_other, cat_recode, cat_expand
export cat_ages
include("catsdocstrings.jl")

"""
Expand Down Expand Up @@ -130,9 +131,17 @@ end
$docstring_as_categorical
"""
function as_categorical(arr::AbstractArray)
return CategoricalArray(map(x -> ismissing(x) ? missing : x, arr))
T = eltype(arr)
if T <: Number
# keep numeric data as numeric categories
return CategoricalArray{Union{Missing, T}}(arr)
else
# fallback: treat them as strings
return CategoricalArray(map(x -> ismissing(x) ? missing : string(x), arr))
end
end


"""
$docstring_cat_reorder
"""
Expand Down Expand Up @@ -329,5 +338,33 @@ function cat_recode(f::Union{CategoricalArray, AbstractVector}; kwargs...)
end


"""
$docstring_cat_expand
"""
function cat_expand(f::CategoricalArray, new_levels...; after=Inf)
# Get the current levels of the factor
current_levels = levels(f)

# Filter out new levels that already exist
unique_new_levels = [level for level in new_levels if level current_levels]

# Decide where to place the new levels based on the `after` argument
if after == Inf
# Append the new levels at the end if `after` is Inf (default)
expanded_levels = vcat(current_levels, unique_new_levels)
elseif after == 0
# Prepend the new levels at the beginning if `after` is 0
expanded_levels = vcat(unique_new_levels, current_levels)
else
# Insert the new levels after the specified index
expanded_levels = vcat(current_levels[1:after], unique_new_levels, current_levels[after+1:end])
end

# Update the levels of the categorical array
levels!(f, expanded_levels)

return f
end


end
28 changes: 28 additions & 0 deletions src/catsdocstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -408,3 +408,31 @@ julia> cat_other(cat_array, drop = ["A", "B"])
"E"
```
"""

const docstring_cat_expand =
"""
cat_expand(cat_array::CategoricalArray, new_levels...; after=Inf)
Expands the levels in a categorical array by adding new levels at a specified position.
# Arguments
- `cat_array`: Categorical array to expand levels
- `new_levels`: New levels to be added to the categorical array
- `after`: Position after which to insert the new levels. Default is Inf, which appends the new levels at the end.
# Returns
Categorical array with the new levels added.
# Examples
```julia
julia> cats = CategoricalArray(["a", "b", "c", "a", "c", "b"]);
julia> println("Original levels: ", levels(cats))
Original levels: ["a", "b", "c"]
julia> cats = cat_expand(f, "d", "e", "f");
julia> println("Expanded levels: ", levels(cats))
Expanded levels: ["a", "b", "c", "d", "e", "f"]
```
"""

2 comments on commit e03d307

@drizk1
Copy link
Member Author

@drizk1 drizk1 commented on e03d307 Jan 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register

Release notes:
Breaking Changes

  • Bumps julia LTS to 1.10

Additions
Adds cat_expand

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/122561

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.2.0 -m "<description of version>" e03d307ac1cee05fca94c019b1fcb8aa6031ca87
git push origin v0.2.0

Please sign in to comment.