Upgrade Scholar docs (#61)

* Upgrade Scholar docs * Apply suggestions from code review Co-authored-by: Paulo Valente <[email protected]> * Update lib/scholar/metrics/similarity.ex Co-authored-by: Paulo Valente <[email protected]> * Reformat * Reformat * Reformat * Final Reformat * Replace Returns with Return Values * Add logo to docs * Add simplified logo * chore: format Co-authored-by: Mateusz <[email protected]> Co-authored-by: Paulo Valente <[email protected]>
elixir-nx · Jan 4, 2023 · 74eefaf · 74eefaf
1 parent 7b60636
commit 74eefaf
Show file tree

Hide file tree

Showing 20 changed files with 696 additions and 243 deletions.
diff --git a/scholar.png → images/scholar.png b/scholar.png → images/scholar.png
diff --git a/images/scholar_simplified.png b/images/scholar_simplified.png
diff --git a/lib/scholar/cluster/k_means.ex b/lib/scholar/cluster/k_means.ex
@@ -1,6 +1,20 @@
 defmodule Scholar.Cluster.KMeans do
   @moduledoc """
-  K-Means algorithm.
+  K-Means Algorithm
+
+  K-Means is simple clustering method that works iteratively [1]. In the first iteration,
+  centroids are chosen randomly from input data. It turned out that some initialization
+  are especially effective. In 2007 David Arthur and Sergei Vassilvitskii proposed initialization
+  called k-means++ which speed up conergence of algorithm drastically [2]. After initialization, from each centroid
+  find points that are the clostest to that centroid. Then, for each centroid replace it with the
+  center of mass of associated points. These two steps mentioned above are repeated until the solution
+  converge. Since some initializations are unfortunate and converge to sub-optimal results
+  we need repeat the whole procedure a few times and take the best result.
+
+  Reference:
+
+  * [1] - [K-Means Algorithm](https://cs.nyu.edu/~roweis/csc2515-2006/readings/lloyd57.pdf)
+  * [2] - [K-Means++ Initialization](http://ilpubs.stanford.edu:8090/778/1/2006-13.pdf)
   """
   import Nx.Defn
   import Scholar.Shared
@@ -49,7 +63,7 @@ defmodule Scholar.Cluster.KMeans do
       Method for centroid initialization, either of:
 
       * `:k_means_plus_plus` - selects initial cluster centroids using sampling based
-        on an empirical probability distribution of the points’ contribution to
+        on an empirical probability distribution of the points' contribution to
         the overall inertia. This technique speeds up convergence, and is
         theoretically proven to be O(log(k))-optimal.
 
@@ -68,7 +82,7 @@ defmodule Scholar.Cluster.KMeans do
 
   #{NimbleOptions.docs(@opts_schema)}
 
-  ## Returns
+  ## Return Values
 
     The function returns a struct with the following parameters:
 
@@ -79,6 +93,33 @@ defmodule Scholar.Cluster.KMeans do
     * `:inertia` - Sum of squared distances of samples to their closest cluster center.
 
     * `:labels` - Labels of each point.
+
+  ## Examples
+
+      iex>  Scholar.Cluster.KMeans.fit(Nx.tensor([[1, 2], [2, 4], [1, 3], [2, 5]]),
+      ...>    num_clusters: 2
+      ...>  )
+      %Scholar.Cluster.KMeans{
+        num_iterations: #Nx.Tensor<
+          s64
+          2
+        >,
+        clusters: #Nx.Tensor<
+          f32[2][2]
+          [
+            [1.0, 2.5],
+            [2.0, 4.5]
+          ]
+        >,
+        inertia: #Nx.Tensor<
+          f32
+          1.0
+        >,
+        labels: #Nx.Tensor<
+          s64[4]
+          [0, 1, 0, 1]
+        >
+      }
   """
   deftransform fit(x, opts \\ []) do
     if Nx.rank(x) != 2 do
@@ -239,9 +280,23 @@ defmodule Scholar.Cluster.KMeans do
   end
 
   @doc """
-  Makes predictions with the given model on inputs `x`.
+  Makes predictions with the given `model` on inputs `x`.
+
+  ## Return Values
+
+    It returns a tensor with clusters corresponding to the input.
 
-  It returns a tensor with clusters corresponding to the input.
+  ## Examples
+
+      iex> model =
+      ...>  Scholar.Cluster.KMeans.fit(Nx.tensor([[1, 2], [2, 4], [1, 3], [2, 5]]),
+      ...>    num_clusters: 2
+      ...>  )
+      iex> Scholar.Cluster.KMeans.predict(model, Nx.tensor([[1.9, 4.3], [1.1, 2.0]]))
+      #Nx.Tensor<
+        s64[2]
+        [1, 0]
+      >
   """
   defn predict(%__MODULE__{clusters: clusters} = _model, x) do
     assert_same_shape!(x[0], clusters[0])
@@ -266,12 +321,33 @@ defmodule Scholar.Cluster.KMeans do
   end
 
   @doc """
-  Calculate distances between each sample from `x` and and the model centroids.
+  Calculates distances between each sample from `x` and the calculated centroids.
+
+  ## Return Values
+
+    It returns a tensor with corresponding distances.
+
+  ## Examples
+
+      iex> model =
+      ...>  Scholar.Cluster.KMeans.fit(Nx.tensor([[1, 2], [2, 4], [1, 3], [2, 5]]),
+      ...>    num_clusters: 2
+      ...>  )
+      iex> Scholar.Cluster.KMeans.transform(model, Nx.tensor([[1.0, 2.5]]))
+      #Nx.Tensor<
+        f32[1][2]
+        [
+          [2.2360680103302, 0.0]
+        ]
+      >
   """
   defn transform(%__MODULE__{clusters: clusters} = _model, x) do
+    {num_clusters, num_features} = Nx.shape(clusters)
+    {num_samples, _} = Nx.shape(x)
+
     Scholar.Metrics.Distance.euclidean(
-      Nx.new_axis(x, 1),
-      Nx.new_axis(clusters, 0),
+      Nx.new_axis(x, 1) |> Nx.broadcast({num_samples, num_clusters, num_features}),
+      Nx.new_axis(clusters, 0) |> Nx.broadcast({num_samples, num_clusters, num_features}),
       axes: [-1]
     )
   end

diff --git a/lib/scholar/covariance.ex b/lib/scholar/covariance.ex
@@ -1,5 +1,5 @@
 defmodule Scholar.Covariance do
-  @moduledoc """
+  @moduledoc ~S"""
   Algorithms to estimate the covariance of features given a set of points.
   """
   import Nx.Defn
@@ -29,39 +29,65 @@ defmodule Scholar.Covariance do
   @doc """
   Computes covariance matrix for sample inputs `x`.
 
+  The value on the position $Cov_{ij}$ in the $Cov$ matrix is calculated using the formula:
+
+  #{~S'''
+  $$ Cov(X\_{i}, X\_{j}) = \frac{\sum\_{k}\left(x\_{k} -
+  \bar{x}\right)\left(y\_{k} - \bar{y}\right)}{N - 1}
+  $$
+  Where:
+    * $X_i$ is a $i$th row of input
+  
+    * $x_k$ is a $k$th value of $X_i$
+  
+    * $y_k$ is a $k$th value of $X_j$
+  
+    * $\bar{x}$ is the mean of $X_i$
+  
+    * $\bar{y}$ is the mean of $X_j$
+  
+    * $N$ is the number of samples
+  
+  This is a non-biased version of covariance.
+  The biased version has $N$ in denominator instead of $N - 1$.
+  '''}
+
   ## Options
 
   #{NimbleOptions.docs(@opts_schema)}
 
   ## Example
-  iex> Scholar.Covariance.covariance_matrix(Nx.tensor([[3, 6, 5], [26, 75, 3], [23, 4, 1]]))
-  #Nx.Tensor<
-    f32[3][3]
-    [
-      [104.22222137451172, 195.5555419921875, -13.333333015441895],
-      [195.5555419921875, 1089.5555419921875, 1.3333333730697632],
-      [-13.333333015441895, 1.3333333730697632, 2.6666667461395264]
-    ]
-  >
 
-  iex> Scholar.Covariance.covariance_matrix(Nx.tensor([[3, 6], [2, 3], [7, 9], [5, 3]]))
-  #Nx.Tensor<
-    f32[2][2]
-    [
-      [3.6875, 3.1875],
-      [3.1875, 6.1875]
-    ]
-  >
+      iex> Scholar.Covariance.covariance_matrix(Nx.tensor([[3, 6, 5], [26, 75, 3], [23, 4, 1]]))
+      #Nx.Tensor<
+        f32[3][3]
+        [
+          [104.22222137451172, 195.5555419921875, -13.333333015441895],
+          [195.5555419921875, 1089.5555419921875, 1.3333333730697632],
+          [-13.333333015441895, 1.3333333730697632, 2.6666667461395264]
+        ]
+      >
 
-  iex> Scholar.Covariance.covariance_matrix(Nx.tensor([[3, 6, 5], [26, 75, 3], [23, 4, 1]]), biased: false)
-  #Nx.Tensor<
-    f32[3][3]
-    [
-      [156.3333282470703, 293.33331298828125, -20.0],
-      [293.33331298828125, 1634.333251953125, 2.0],
-      [-20.0, 2.0, 4.0]
-    ]
-  >
+      iex> Scholar.Covariance.covariance_matrix(Nx.tensor([[3, 6], [2, 3], [7, 9], [5, 3]]))
+      #Nx.Tensor<
+        f32[2][2]
+        [
+          [3.6875, 3.1875],
+          [3.1875, 6.1875]
+        ]
+      >
+
+      iex> Scholar.Covariance.covariance_matrix(Nx.tensor([[3, 6, 5], [26, 75, 3], [23, 4, 1]]),
+      ...>   biased: false
+      ...> )
+      #Nx.Tensor<
+        f32[3][3]
+        [
+          [156.3333282470703, 293.33331298828125, -20.0],
+          [293.33331298828125, 1634.333251953125, 2.0],
+          [-20.0, 2.0, 4.0]
+        ]
+      >
   """
   deftransform covariance_matrix(x, opts \\ []) do
     covariance_matrix_n(x, NimbleOptions.validate!(opts, @opts_schema))

diff --git a/lib/scholar/decomposition/pca.ex b/lib/scholar/decomposition/pca.ex
@@ -1,6 +1,14 @@
 defmodule Scholar.Decomposition.PCA do
   @moduledoc """
-  PCA decomposition algorithm.
+  Principal Component Analysis (PCA).
+
+  The main concept of PCA is to find components (i.e. columns of a matrix) which explain the most variance
+  of data set [1]. The sample data is decomposed using linear combination of
+  vectors that lie on the directions of those components.
+
+  Reference:
+
+  * [1] - [Principal Component Analysis](https://en.wikipedia.org/wiki/Principal_component_analysis)
   """
   import Nx.Defn
 
@@ -31,9 +39,8 @@ defmodule Scholar.Decomposition.PCA do
       type: {:or, [:pos_integer, {:in, [nil]}]},
       default: nil,
       doc: ~S"""
-      Number of components to keep. If `:num_components` is not set, all components are kept:
-
-      $num\\_components = min(num\\_samples, num\\_features)$
+      Number of components to keep. If `:num_components` is not set, all components are kept
+      which is the minimum value from number of features and number of samples.
       """
     ]
   ]
@@ -66,7 +73,7 @@ defmodule Scholar.Decomposition.PCA do
 
   #{NimbleOptions.docs(@fit_opts_schema)}
 
-  ## Returns
+  ## Return Values
 
   The function returns a struct with the following parameters:
 
@@ -92,6 +99,44 @@ defmodule Scholar.Decomposition.PCA do
     * `:num_features` - Number of features in the training data.
 
     * `:num_samples` - Number of samples in the training data.
+
+  ## Examples
+      iex> x = Nx.tensor([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+      iex> Scholar.Decomposition.PCA.fit(x)
+      %Scholar.Decomposition.PCA{
+        components: #Nx.Tensor<
+          f32[2][2]
+          [
+            [-0.8387274146080017, -0.544551432132721],
+            [0.544551432132721, -0.8387274146080017]
+          ]
+        >,
+        explained_variance: #Nx.Tensor<
+          f32[2]
+          [7.939539909362793, 0.060457102954387665]
+        >,
+        explained_variance_ratio: #Nx.Tensor<
+          f32[2]
+          [0.9924428462982178, 0.007557140663266182]
+        >,
+        singular_values: #Nx.Tensor<
+          f32[2]
+          [6.3006110191345215, 0.5498049855232239]
+        >,
+        mean: #Nx.Tensor<
+          f32[2]
+          [0.0, 0.0]
+        >,
+        num_components: 2,
+        num_features: #Nx.Tensor<
+          s64
+          2
+        >,
+        num_samples: #Nx.Tensor<
+          s64
+          6
+        >
+      }
   """
   deftransform fit(x, opts \\ []) do
     fit_n(x, NimbleOptions.validate!(opts, @fit_opts_schema))
@@ -144,9 +189,25 @@ defmodule Scholar.Decomposition.PCA do
 
   #{NimbleOptions.docs(@transform_opts_schema)}
 
-  ## Returns
-
-  The function returns a decomposed data.
+  ## Return Values
+
+  The function returns a tensor with decomposed data.
+
+  ## Examples
+      iex> x = Nx.tensor([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+      iex> model = Scholar.Decomposition.PCA.fit(x)
+      iex> Scholar.Decomposition.PCA.transform(model, x)
+      #Nx.Tensor<
+        f32[6][2]
+        [
+          [1.3832788467407227, 0.29417598247528076],
+          [2.222006320953369, -0.2503754496574402],
+          [3.605285167694092, 0.043800532817840576],
+          [-1.3832788467407227, -0.29417598247528076],
+          [-2.222006320953369, 0.2503754496574402],
+          [-3.605285167694092, -0.043800532817840576]
+        ]
+      >
   """
   deftransform transform(model, x, opts \\ []) do
     transform_n(model, x, NimbleOptions.validate!(opts, @transform_opts_schema))
@@ -176,16 +237,33 @@ defmodule Scholar.Decomposition.PCA do
 
   @doc """
   Fit the model with `x` and apply the dimensionality reduction on `x`.
-  This function is analogical to calling fit and then transform, but it is calculated
-  more efficiently.
+
+  This function is analogous to calling `fit/2` and then
+  `transform/3`, but it is calculated more efficiently.
 
   ## Options
 
   #{NimbleOptions.docs(@transform_opts_schema)}
 
-  ## Returns
-
-  The function returns a decomposed data.
+  ## Return Values
+
+  The function returns a tensor with decomposed data.
+
+  ## Examples
+
+      iex> x = Nx.tensor([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+      iex> Scholar.Decomposition.PCA.fit_transform(x)
+      #Nx.Tensor<
+        f32[6][2]
+        [
+          [1.3819527626037598, 0.29363134503364563],
+          [2.2231407165527344, -0.25125157833099365],
+          [3.605093240737915, 0.04237978905439377],
+          [-1.3819527626037598, -0.29363134503364563],
+          [-2.2231407165527344, 0.25125157833099365],
+          [-3.605093240737915, -0.04237978905439377]
+        ]
+      >
   """
 
   deftransform fit_transform(x, opts \\ []) do