Skip to content

Commit

Permalink
Add nanskewness and nankurtosis
Browse files Browse the repository at this point in the history
  • Loading branch information
brenhinkeller committed Jul 1, 2024
1 parent afb0e42 commit c6f0ddb
Show file tree
Hide file tree
Showing 8 changed files with 555 additions and 46 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "NaNStatistics"
uuid = "b946abbf-3ea7-4610-9019-9858bfdeaf2d"
authors = ["C. Brenhin Keller"]
version = "0.6.40"
version = "0.6.41"

[deps]
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ Summary statistics exported by NaNStatistics are generally named the same as the
* `nanpctile`   percentile
* `nanpctile!`   as `nanpctile` but quicksorts in-place for efficiency

##### Other summary statistics
* `nanskewness`   skewness
* `nankurtosis`   excess kurtosis

Note that, regardless of implementation, functions involving medians or percentiles are generally significantly slower than other summary statistics, since calculating a median or percentile requires a quicksort or quickselect of the input array; if not done in-place as in `nanmedian!` and `nanpctile!` then a copy of the entire array must also be made.

These functions will generally support the same `dims` keyword argument as their normal Julia counterparts (though are most efficient when operating on an entire collection).
Expand Down
243 changes: 243 additions & 0 deletions src/ArrayStats/nankurtosis.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
"""
```julia
nankurtosis(A; dims=:, mean=nothing)
```
Compute the kurtosis of all non-`NaN` elements in `A`, optionally over dimensions specified by `dims`.
As `StatsBase.kurtosis`, but ignoring `NaN`s.
A precomputed `mean` may optionally be provided, which results in a somewhat faster
calculation. If `corrected` is `true`, then _Bessel's correction_ is applied, such
that the sum is divided by `n-1` rather than `n`.
As an alternative to `dims`, `nankurtosis` also supports the `dim` keyword, which
behaves identically to `dims`, but also drops any singleton dimensions that have
been reduced over (as is the convention in some other languages).
## Examples
```julia
julia> using NaNStatistics
julia> A = [1 2 3 ; 4 5 6; 7 8 9]
3×3 Matrix{Int64}:
1 2 3
4 5 6
7 8 9
julia> nankurtosis(A, dims=1)
1×3 Matrix{Float64}:
-1.5 -1.5 -1.5
julia> nankurtosis(A, dims=2)
3-element Vector{Float64}:
-1.5
-1.5
-1.5
```
"""
nankurtosis(A; dims=:, dim=:, mean=nothing, corrected=false) = __nankurtosis(mean, corrected, A, dims, dim)
__nankurtosis(mean, corrected, A, ::Colon, ::Colon) = _nankurtosis(mean, corrected, A, :)
__nankurtosis(mean, corrected, A, region, ::Colon) = _nankurtosis(mean, corrected, A, region)
__nankurtosis(mean, corrected, A, ::Colon, region) = reducedims(_nankurtosis(mean, corrected, A, region), region)
export nankurtosis

# If dims is an integer, wrap it in a tuple
_nankurtosis(μ, corrected::Bool, A, dims::Int) = _nankurtosis(μ, corrected, A, (dims,))

# If the mean isn't known, compute it
_nankurtosis(::Nothing, corrected::Bool, A, dims::Tuple) = _nankurtosis!(_nanmean(A, dims), corrected, A, dims)
# Reduce all the dims!
function _nankurtosis(::Nothing, corrected::Bool, A, ::Colon)
T = eltype(A)
Tₒ = Base.promote_op(/, T, Int)
n = 0
Σ == zero(T)
@inbounds @simd ivdep for i eachindex(A)
Aᵢ = A[i]
notnan = Aᵢ==Aᵢ
n += notnan
Σ += ifelse(notnan, Aᵢ, ∅)
end
μ = Σ / n
μ₄ = μ₂ = ∅ₒ = zero(typeof(μ))
@inbounds @simd ivdep for i eachindex(A)
δ = A[i] - μ
notnan = δ==δ
δ² = δ * δ
μ₂ += ifelse(notnan, δ², ∅ₒ)
μ₄ += ifelse(notnan, δ² * δ², ∅ₒ)
end
σ = sqrt(μ₂ / max(n-corrected,0))
return (μ₄/n)/σ^4 - 3
end
function _nankurtosis(::Nothing, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:Integer
Tₒ = Base.promote_op(/, T, Int)
n = length(A)
Σ = zero(Tₒ)
@inbounds @simd ivdep for i eachindex(A)
Σ += A[i]
end
μ = Σ / n
μ₄ = μ₂ = zero(typeof(μ))
@inbounds @simd ivdep for i eachindex(A)
δ = A[i] - μ
δ² = δ * δ
μ₂ += δ²
μ₄ += δ² * δ²
end
σ = sqrt(μ₂ / max(n-corrected,0))
return (μ₄/n)/σ^4 - 3
end


# If the mean is known, pass it on in the appropriate form
_nankurtosis(μ, corrected::Bool, A, dims::Tuple) = _nankurtosis!(collect(μ), corrected, A, dims)
_nankurtosis::Array, corrected::Bool, A, dims::Tuple) = _nankurtosis!(copy(μ), corrected, A, dims)
_nankurtosis::Number, corrected::Bool, A, dims::Tuple) = _nankurtosis!([μ], corrected, A, dims)
# Reduce all the dims!
function _nankurtosis::Number, corrected::Bool, A, ::Colon)
n = 0
μ₄ = μ₂ = ∅ₒ = zero(typeof(μ))
@inbounds @simd ivdep for i eachindex(A)
δ = A[i] - μ
notnan = δ==δ
n += notnan
δ² = δ * δ
μ₂ += ifelse(notnan, δ², ∅ₒ)
μ₄ += ifelse(notnan, δ² * δ², ∅ₒ)
end
σ = sqrt(μ₂ / max(n-corrected,0))
return (μ₄/n)/σ^4 - 3
end
function _nankurtosis::Number, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:Integer
μ₄ = μ₂ = zero(typeof(μ))
if μ==μ
@inbounds @simd ivdep for i eachindex(A)
δ = A[i] - μ
δ² = δ * δ
μ₂ += δ²
μ₄ += δ² * δ²
end
n = length(A)
else
n = 0
end
σ = sqrt(μ₂ / max(n-corrected,0))
return (μ₄/n)/σ^4 - 3
end

# Metaprogramming magic adapted from Chris Elrod example:
# Generate customized set of loops for a given ndims and a vector
# `static_dims` of dimensions to reduce over
function staticdim_nankurtosis_quote(static_dims::Vector{Int}, N::Int)
M = length(static_dims)
# `static_dims` now contains every dim we're taking the var over.
Bᵥ = Expr(:call, :view, :B)
reduct_inds = Int[]
nonreduct_inds = Int[]
# Firstly, build our expressions for indexing each array
Aind = :(A[])
Bind = :(Bᵥ[])
inds = Vector{Symbol}(undef, N)
for n 1:N
ind = Symbol(:i_,n)
inds[n] = ind
push!(Aind.args, ind)
if n static_dims
push!(reduct_inds, n)
push!(Bᵥ.args, :(firstindex(B,$n)))
else
push!(nonreduct_inds, n)
push!(Bᵥ.args, :)
push!(Bind.args, ind)
end
end
firstn = first(nonreduct_inds)
# Secondly, build up our set of loops
block = Expr(:block)
loops = Expr(:for, :($(inds[firstn]) = indices((A,B),$firstn)), block)
if length(nonreduct_inds) > 1
for n @view(nonreduct_inds[2:end])
newblock = Expr(:block)
push!(block.args, Expr(:for, :($(inds[n]) = indices((A,B),$n)), newblock))
block = newblock
end
end
rblock = block
# Push more things here if you want them at the beginning of the reduction loop
push!(rblock.args, :(μ = $Bind))
push!(rblock.args, :(n = 0))
push!(rblock.args, :(μ₄ = μ₂ = ∅))
# Build the reduction loop
for n reduct_inds
newblock = Expr(:block)
if n==last(reduct_inds)
push!(block.args, Expr(:macrocall, Symbol("@simd"), :ivdep, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)))
else
push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
end
block = newblock
end
# Push more things here if you want them in the innermost loop
push!(block.args, :(δ = $Aind - μ))
push!(block.args, :(notnan = δ==δ))
push!(block.args, :(n += notnan))
push!(block.args, :(δ² = δ * δ))
push!(block.args, :(μ₂ += ifelse(notnan, δ², ∅)))
push!(block.args, :(μ₄ += ifelse(notnan, δ² * δ², ∅)))

# Push more things here if you want them at the end of the reduction loop
push!(rblock.args, :(σ = sqrt(μ₂ * inv(max(n-corrected,0))) ))
push!(rblock.args, :($Bind = (μ₄/n)/σ^4 - 3))

# Put it all together
quote
= zero(eltype(B))
Bᵥ = $Bᵥ
@inbounds $loops
return B
end
end

# Turn non-static integers in `dims` tuple into `StaticInt`s
# so we can construct `static_dims` vector within @generated code
function branches_nankurtosis_quote(N::Int, M::Int, D)
static_dims = Int[]
for m 1:M
param = D.parameters[m]
if param <: StaticInt
new_dim = _dim(param)::Int
@assert new_dim static_dims
push!(static_dims, new_dim)
else
t = Expr(:tuple)
for n static_dims
push!(t.args, :(StaticInt{$n}()))
end
q = Expr(:block, :(dimm = dims[$m]))
qold = q
ifsym = :if
for n 1:N
n static_dims && continue
tc = copy(t)
push!(tc.args, :(StaticInt{$n}()))
qnew = Expr(ifsym, :(dimm == $n), :(return _nankurtosis!(B, corrected, A, $tc)))
for r m+1:M
push!(tc.args, :(dims[$r]))
end
push!(qold.args, qnew)
qold = qnew
ifsym = :elseif
end
push!(qold.args, Expr(:block, :(throw("Dimension `$dimm` not found."))))
return q
end
end
staticdim_nankurtosis_quote(static_dims, N)
end

# Efficient @generated in-place var
@generated function _nankurtosis!(B::AbstractArray{Tₒ,N}, corrected::Bool, A::AbstractArray{T,N}, dims::D) where {Tₒ,T,N,M,D<:Tuple{Vararg{IntOrStaticInt,M}}}
N == M && return :(B[1] = _nankurtosis(B[1], corrected, A, :); B)
branches_nankurtosis_quote(N, M, D)
end
Loading

0 comments on commit c6f0ddb

Please sign in to comment.