From 1bbe51aff2866ec59a109f67d97b6db046bb46d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20C=2E=20Alejos?= Date: Fri, 30 Dec 2022 09:55:20 -0500 Subject: [PATCH] Add Hamming Distance (#58) --- .formatter.exs | 1 + lib/scholar/metrics/distance.ex | 64 ++++++++++++++++++++++++++ test/scholar/metrics/distance_test.exs | 14 ++++++ 3 files changed, 79 insertions(+) diff --git a/.formatter.exs b/.formatter.exs index d2cda26e..d9f22bc6 100644 --- a/.formatter.exs +++ b/.formatter.exs @@ -1,4 +1,5 @@ # Used by "mix format" [ + import_deps: [:nx], inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] ] diff --git a/lib/scholar/metrics/distance.ex b/lib/scholar/metrics/distance.ex index 526f064e..54fb032a 100644 --- a/lib/scholar/metrics/distance.ex +++ b/lib/scholar/metrics/distance.ex @@ -407,4 +407,68 @@ defmodule Scholar.Metrics.Distance do res = Nx.select(one_zero?, 0.0, res) 1.0 - Nx.select(both_zero?, 1.0, res) end + + @doc """ + Hamming distance. + + $$ + hamming(x ,y) = \frac{\#\{x_{i, j...} \neq y_{i, j, ...}\}}{\#x_{i, j, ...}}$, where $i, j, ...$ are the aggregation axes + $$ + + ## Options + + #{NimbleOptions.docs(@general_schema)} + + ## Examples + + iex> x = Nx.tensor([1, 0, 0]) + iex> y = Nx.tensor([0, 1, 0]) + iex> Scholar.Metrics.Distance.hamming(x, y) + #Nx.Tensor< + f32 + 0.6666666865348816 + > + iex> weights = Nx.tensor([1, 0.5, 0.5]) + iex> Scholar.Metrics.Distance.hamming(x, y, weights) + #Nx.Tensor< + f32 + 0.75 + > + + iex> x = Nx.tensor([1, 2]) + iex> y = Nx.tensor([1, 2, 3]) + iex> Scholar.Metrics.Distance.hamming(x, y) + ** (ArgumentError) expected tensor to have shape {2}, got tensor with shape {3} + + iex> x = Nx.tensor([[1, 2, 3], [0, 0, 0], [5, 2, 4]]) + iex> y = Nx.tensor([[1, 5, 2], [2, 4, 1], [0, 0, 0]]) + iex> Scholar.Metrics.Distance.hamming(x, y, axes: [1]) + #Nx.Tensor< + f32[3] + [0.6666666865348816, 1.0, 1.0] + > + """ + deftransform hamming(x, y), do: hamming_unweighted(x, y) + + deftransform hamming(x, y, opts) when is_list(opts) do + NimbleOptions.validate!(opts, @general_schema) + hamming_unweighted(x, y, opts) + end + + deftransform hamming(x, y, w), do: hamming_weighted(x, y, w) + + deftransform hamming(x, y, w, opts) when is_list(opts) do + NimbleOptions.validate!(opts, @general_schema) + hamming_weighted(x, y, w, opts) + end + + defnp hamming_unweighted(x, y, opts \\ []) do + assert_same_shape!(x, y) + (x != y) |> Nx.mean(axes: opts[:axes]) + end + + defnp hamming_weighted(x, y, w, opts \\ []) do + assert_same_shape!(x, y) + (x != y) |> Nx.weighted_mean(w, axes: opts[:axes]) + end end diff --git a/test/scholar/metrics/distance_test.exs b/test/scholar/metrics/distance_test.exs index 7d976de0..6597447d 100644 --- a/test/scholar/metrics/distance_test.exs +++ b/test/scholar/metrics/distance_test.exs @@ -52,4 +52,18 @@ defmodule Scholar.Metrics.DistanceTest do test "cosine matches scipy" do assert_all_close(Distance.cosine(@x, @y), Nx.tensor(0.7650632810164779)) end + + test "hamming matches scipy" do + assert Distance.hamming(Nx.tensor([1, 0, 0]), Nx.tensor([0, 1, 0])) == + Nx.tensor(0.6666666865348816) + + assert Distance.hamming(Nx.tensor([1, 0, 0]), Nx.tensor([1, 1, 0])) == + Nx.tensor(0.3333333432674408) + + assert Distance.hamming(Nx.tensor([1, 0, 0]), Nx.tensor([2, 0, 0])) == + Nx.tensor(0.3333333432674408) + + assert Distance.hamming(Nx.tensor([1, 0, 0]), Nx.tensor([3, 0, 0])) == + Nx.tensor(0.3333333432674408) + end end