Source code for skpns.sklearn

"""Scikit-learn wrappers for PNS."""

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin

from .pns import embed, pns, proj, reconstruct

__all__ = [
    "ExtrinsicPNS",
    "PNS",
    "IntrinsicPNS",
]


[docs] class ExtrinsicPNS(TransformerMixin, BaseEstimator): """Principal nested spheres (PNS) analysis with extrinsic coordinates. Reduces the dimensionality of data on a high-dimensional hypersphere while preserving its spherical geometry. The resulting data are represented by extrinsic coordinates. For example, `n_components=2` transforms data onto a 2D unit circle, represented by x and y coordinates. Parameters ---------- n_components : int, default=2 Number of components to keep. Data are transformed onto a unit hypersphere embedded in this dimensional space. tol : float, default=1e-3 Optimization tolerance. maxiter : int, optional Maximum number of iterations for the optimization. If None, the number of iterations is not checked. Attributes ---------- embedding_ : ndarray of shape (n_samples, n_components) Stores the embedding vectors. v_ : list of (n_features - 1) arrays Principal directions of nested spheres. r_ : ndarray of shape (n_features - 1,) Principal radii of nested spheres. Examples -------- >>> from skpns import ExtrinsicPNS >>> from skpns.util import circular_data, unit_sphere >>> X = circular_data() >>> pns = ExtrinsicPNS(n_components=2) >>> X_reduced = pns.fit_transform(X) >>> X_inv = pns.inverse_transform(X_reduced) >>> import matplotlib.pyplot as plt # doctest: +SKIP ... fig = plt.figure() ... ax1 = fig.add_subplot(121, projection='3d', computed_zorder=False) ... ax1.plot_surface(*unit_sphere(), color='skyblue', alpha=0.6, edgecolor='gray') ... ax1.scatter(*X_inv.T, zorder=10) ... ax1.scatter(*X.T) ... ax2 = fig.add_subplot(122) ... ax2.scatter(*X_reduced.T) ... ax2.set_aspect('equal') """ def __init__(self, n_components=2, tol=1e-3, maxiter=None): self.n_components = n_components self.tol = tol self.maxiter = maxiter def _fit_transform(self, X): self._n_features = X.shape[1] self.v_ = [] self.r_ = [] D = X.shape[1] pns_ = pns(X, self.tol, maxiter=self.maxiter) for _ in range(D - self.n_components): v, r, X = next(pns_) self.v_.append(v) self.r_.append(r) self.embedding_ = X
[docs] def fit(self, X, y=None): """Find principal nested spheres for the data X. Parameters ---------- X : array-like of shape (n_samples, n_features) Data on (n_features - 1)-dimensional hypersphere. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Returns a fitted instance of self. """ self._fit_transform(X) return self
[docs] def fit_transform(self, X, y=None): """Fit the model with data in X and transform X. Parameters ---------- X : array-like of shape (n_samples, n_features) Data on (n_features - 1)-dimensional hypersphere. y : Ignored Not used, present for API consistency by convention. Returns ------- X_new : array-like, shape (n_samples, n_components) X transformed in the new space. """ self._fit_transform(X) return self.embedding_
[docs] def transform(self, X): """Transform X onto the fitted subsphere. Parameters ---------- X : array-like of shape (n_samples, n_features) Data on (n_features - 1)-dimensional hypersphere. Returns ------- X_new : array-like, shape (n_samples, n_components) X transformed in the new space. """ if X.shape[1] != self._n_features: raise ValueError( f"Input dimension {X.shape[1]} does not match " f"fitted dimension {self._n_features}." ) for v, r in zip(self.v_, self.r_): A, _ = proj(X, v, r) X = embed(A, v, r) return X
[docs] def inverse_transform(self, X): """Transform the low-dimensional data back to the original hypersphere. Parameters ---------- X : array-like of shape (n_samples, n_components) Returns ------- X_new : array-like of shape (n_samples, n_features) """ for v, r in zip(reversed(self.v_), reversed(self.r_)): X = reconstruct(X, v, r) return X
[docs] def to_hypersphere(self, X): """Alias for :meth:`inverse_transform`.""" return self.inverse_transform(X)
PNS = ExtrinsicPNS
[docs] class IntrinsicPNS(TransformerMixin, BaseEstimator): r"""Principal nested spheres (PNS) analysis with intrinsic coordinates. Reduces the dimensionality of data on a high-dimensional hypersphere while preserving its spherical geometry. The resulting data are intrinsic Euclidean coordinates, which are the scaled residuals in each dimension. For example, `n_components=2` represents data on the surface of a 3D sphere. Parameters ---------- n_components : int, default=None Number of components to keep. Data are transformed onto a Euclidean space in this dimension, representing the surface of a hypersphere with the same dimension. If None, all components are kept, i.e., extrinsic coordinates are converted to intrinsic coordinates without loosing dimenisonality. tol : float, default=1e-3 Optimization tolerance. maxiter : int, optional Maximum number of iterations for the optimization. If None, the number of iterations is not checked. Attributes ---------- embedding_ : ndarray of shape (n_samples, d) The embedding vectors, :math:`\Xi(0), \Xi(1), \ldots, \Xi(d-1)`, where the input data is on d-sphere. v_ : list of arrays Principal directions of nested spheres, :math:`\hat{v}_1, \hat{v}_2, \ldots, \hat{v}_d`. r_ : ndarray Principal radii of nested spheres, :math:`\hat{r}_1, \hat{r}_2, \ldots, \hat{r}_d`. Notes ----- The resulting data is the transposed matrix of .. math:: \hat{X}_\mathrm{PNS} = \begin{bmatrix} \Xi(0) \\ \Xi(1) \\ \vdots \\ \Xi(n) \end{bmatrix}, with notations in the original paper, where :math:`n` is *n_components*. The coordinates lie in :math:`[-\pi, \pi] \times [-\pi/2, \pi/2]^{n-1}`, i.e., the azimuthal angle is the first coordinate. Examples -------- >>> from skpns import IntrinsicPNS >>> from skpns.util import circular_data, unit_sphere >>> X = circular_data() >>> pns = IntrinsicPNS() >>> Xi = pns.fit_transform(X) >>> import matplotlib.pyplot as plt # doctest: +SKIP ... fig = plt.figure() ... ax1 = fig.add_subplot(121, projection='3d', computed_zorder=False) ... ax1.plot_surface(*unit_sphere(), color='skyblue', edgecolor='gray') ... ax1.scatter(*X.T, c=Xi[:, 0]) ... ax2 = fig.add_subplot(122) ... ax2.scatter(*Xi.T, c=Xi[:, 0]) ... ax2.set_xlim(-np.pi, np.pi) ... ax2.set_ylim(-np.pi/2, np.pi/2) """ def __init__(self, n_components=None, tol=1e-3, maxiter=None): self.n_components = n_components self.tol = tol self.maxiter = maxiter def _fit_transform(self, X): if self.n_components is None: self.n_components = X.shape[1] - 1 self._n_features = X.shape[1] # d+1 self.v_ = [] self.r_ = [] residuals = [] for v, r, _, Xi in pns(X, self.tol, residual="scaled", maxiter=self.maxiter): self.v_.append(v) self.r_.append(r) residuals.append(Xi) self.embedding_ = np.flip(np.concatenate(residuals, axis=-1), axis=-1)
[docs] def fit(self, X, y=None): """Find principal nested spheres for the data X. Parameters ---------- X : array-like of shape (n_samples, n_features) Data on (n_features - 1)-dimensional hypersphere. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Returns a fitted instance of self. """ self._fit_transform(X) return self
[docs] def fit_transform(self, X, y=None): """Fit the model with data in X and transform X. Parameters ---------- X : array-like of shape (n_samples, n_features) Data on (n_features - 1)-dimensional hypersphere. y : Ignored Not used, present for API consistency by convention. Returns ------- X_new : array-like, shape (n_samples, n_components) X transformed in the new space. """ self._fit_transform(X) return self.embedding_[:, : self.n_components]
[docs] def transform(self, X, y=None): """Transform X onto the fitted subsphere. Parameters ---------- X : array-like of shape (n_samples, n_features) Data on (n_features - 1)-dimensional hypersphere. Returns ------- X_new : array-like, shape (n_samples, n_components) X transformed in the new space. """ if X.shape[1] != self._n_features: raise ValueError( f"Input dimension {X.shape[1]} does not match " f"fitted dimension {self._n_features}." ) d = X.shape[1] - 1 residuals = [] sin_r = 1 for k in range(1, d): v, r = self.v_[k - 1], self.r_[k - 1] P, xi = proj(X, v, r) X = embed(P, v, r) Xi = sin_r * xi residuals.append(Xi) sin_r *= np.sin(r) v, r = self.v_[d - 1], self.r_[d - 1] _, xi = proj(X, v, r) Xi = sin_r * xi residuals.append(Xi) ret = np.flip(np.concatenate(residuals, axis=-1), axis=-1) return ret[:, : self.n_components]
[docs] def inverse_transform(self, Xi): """Transform the low-dimensional data back to the original hypersphere. Parameters ---------- X : array-like of shape (n_samples, n_components) Returns ------- X_new : array-like of shape (n_samples, n_features) Examples -------- >>> from skpns import IntrinsicPNS >>> from skpns.util import circular_data, unit_sphere >>> X = circular_data() >>> pns = IntrinsicPNS(1) >>> Xi = pns.fit_transform(X) >>> X_inv = pns.inverse_transform(Xi) >>> import matplotlib.pyplot as plt # doctest: +SKIP ... ax = plt.figure().add_subplot(projection='3d', computed_zorder=False) ... ax.plot_surface(*unit_sphere(), color='skyblue', edgecolor='gray') ... ax.scatter(*X.T) ... ax.scatter(*X_inv.T) """ d = self._n_features - 1 _, n = Xi.shape if n > d: raise ValueError( f"Input dimension {n} is larger than fitted dimension {d}." ) Xi = np.concatenate( [Xi, np.zeros((Xi.shape[0], self._n_features - n - 1))], axis=-1 ) # Now, each column in Xi is Xi(0), ..., Xi(d-1). # Un-scale Xi, i.e., xi(d-k) = Xi(d-k) / prod_{i=1}^{k-1}(sin(r_i)). sin_rs = np.sin(self.r_[:-1]) # sin(r_1), sin(r_2), ..., sin(r_{d-1}) xi = Xi.copy() # xi(0), ..., xi(d-1) prod_sin_r = np.prod(sin_rs) for i in range(d - 1): xi[:, i] /= prod_sin_r prod_sin_r /= sin_rs[-i - 1] xi[:, d - 1] /= prod_sin_r # Starting from the lowest dimension, # 1. Convert to cartesian coordinates. # 2. Reconstruct to one higher dimension, with north pole different from v. # 3. Rotate for v. # 4. Un-project with residuals. # 5. Go to 2. # Initialize: rotate xi(0) and convert to cartesian xi[:, 0] += _cartesian_to_hyperspherical(self.v_[-1][np.newaxis, ...])[0] x_dagger = _hyperspherical_to_cartesian(xi[:, :1]) # Step 2 to Step 5 for i in range(d - 1): k = i + 1 # 1, 2, ..., d - 1 A = reconstruct(x_dagger, self.v_[-1 - k], self.r_[-1 - k]) x_dagger = _inv_proj(A, np.sin(xi[:, k]), self.v_[-1 - k], self.r_[-1 - k]) return x_dagger
def _cartesian_to_hyperspherical(X): """ Convert (N, d+1) Cartesian coordinates on a d-sphere to hyperspherical coordinates (N, d). Convention: - xi[..., 0]: azimuthal angle in [-pi, pi] - xi[..., 1:]: centered elevation angles in [-pi/2, pi/2] """ N, D = X.shape d = D - 1 xi = np.zeros((N, d)) xi[:, 0] = np.arctan2(X[:, 1], X[:, 0]) for i in range(1, d - 1): denom = np.linalg.norm(X[:, i:], axis=1) xi[:, i] = np.arctan(X[:, i + 1] / denom) xi[:, -1] = np.arctan2(X[:, -1], np.linalg.norm(X[:, :-1], axis=1)) return xi def _hyperspherical_to_cartesian(xi): """ Convert (N, d) hyperspherical coordinates back to Cartesian coordinates (N, d+1) on a unit d-sphere. Convention: - xi[..., 0]: azimuthal angle in [-pi, pi] - xi[..., 1:]: centered elevation angles in [-pi/2, pi/2] """ N, d = xi.shape X = np.zeros((N, d + 1)) for n in range(N): angles = xi[n] cos_elev = np.cumprod(np.cos(angles[1:][::-1]))[::-1] X[n, 0] = np.cos(angles[0]) * (cos_elev[0] if d > 1 else 1) X[n, 1] = np.sin(angles[0]) * (cos_elev[0] if d > 1 else 1) for k in range(1, d): X[n, k + 1] = np.sin(angles[k]) * (cos_elev[k] if k < d - 1 else 1) return X def _inv_proj(P, xi, v, r): """Inverse of pns.proj(). Parameters ---------- P, xi : Results from pns.proj() v, r : Principal axis and geodesic distance. """ rho = (xi + r)[..., np.newaxis] return (P * np.sin(rho) - np.sin(rho - r) * v) / np.sin(r)