% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/densmap.R
\name{umap}
\alias{umap}
\alias{densmap}
\title{Density-preserving and other implementations of UMAP}
\usage{
umap(
  x,
  n_components = 2L,
  dens_frac = 0.3,
  dens_lambda = 0.1,
  dens_var_shift = 0.1,
  n_neighbors = 30L,
  metric = "euclidean",
  densmap = FALSE,
  n_epochs = 750L,
  learning_rate = 1,
  init = c("spectral", "random"),
  Y_init = NULL,
  min_dist = 0.1,
  spread = 1,
  low_memory = FALSE,
  set_op_mix_ratio = 1,
  local_connectivity = 1L,
  repulsion_strength = 1,
  negative_sample_rate = 5L,
  transform_queue_size = 4,
  random_state = NULL,
  angular_rp_forest = FALSE,
  target_n_neighbors = -1,
  target_weight = 0.5,
  disconnection_distance = NULL
)

densmap(...)
}
\arguments{
\item{x}{A numeric matrix or matrix-like object.}

\item{n_components}{The dimension of the space to embed
into. This defaults to 2 to provide easy visualization,
but can reasonably be set to any integer value in the
range 2 to 100.}

\item{dens_frac}{numeric; fraction of the iterations for
which the full objective function (including the
density-preserving term) is used. For the first
\code{1 - dens_frac} fraction of the iterations, only
the original t-SNE objective function is used.
Only takes effect when \code{densmap=TRUE}.}

\item{dens_lambda}{numeric; the relative importance of the
density-preservation term compared to the original t-SNE
objective function.
Only takes effect when \code{densmap=TRUE}.}

\item{dens_var_shift}{Regularization term added to the variance
of embedding local radius for stability (float,
non-negative); default 0.1.
Only takes effect when \code{densmap=TRUE}.}

\item{n_neighbors}{The size of local neighborhood
(in terms of number of neighboring sample points) used for
manifold approximation. Larger values result in more
global views of the manifold, while smaller values result
in more local data being preserved. In general values
should be in the range 2 to 100.}

\item{metric}{The metric to use to compute distances in
high dimensional space. If a string is passed it must match
one of:
\itemize{
\item "euclidean"
\item "manhattan"
\item "chebyshev"
\item "minkowski"
\item "canberra"
\item "braycurtis"
\item "mahalanobis"
\item "wminkowski"
\item "seuclidean"
\item "cosine"
\item "correlation"
\item "haversine"
\item "hamming"
\item "jaccard"
\item "dice"
\item "russelrao"
\item "kulsinski"
\item "rogerstanimoto"
\item "sokalmichener"
\item "sokalsneath"
\item "yule"
}}

\item{densmap}{For \code{umap}, control whether the density-preserving
UMAP algorithm described by Narayan et al. is used.}

\item{n_epochs}{The number of training epochs to be used
in optimizing the low dimensional embedding. Larger values
result in more accurate embeddings. If None is specified a
value will be selected based on the size of the input
dataset (200 for large datasets, 500 for small).
a valid predefined metric.}

\item{learning_rate}{The initial learning rate for the embedding optimization.}

\item{init}{How to initialize the low dimensional
embedding. Valid options:
\itemize{
\item "spectral": use a spectral embedding of the fuzzy 1-skeleton
\item "random": assign initial embedding positions at random.
}}

\item{Y_init}{Numeric matrix specifying the initial
locations of the objects in the embedding. If NULL,
random or spectral initialization will be used,
controlled by the \code{init} argument.}

\item{min_dist}{The effective minimum distance between
embedded points. Smaller values will result in a more
clustered/clumped embedding where nearby points on the
manifold are drawn closer together, while larger values
will result on a more even dispersal of points. The value
should be set relative to the spread value, which
determines the scale at which embedded points will be
spread out.}

\item{spread}{The effective scale of embedded points. In combination with
min_dist this determines how clustered/clumped the embedded points are.}

\item{low_memory}{For some datasets the nearest neighbor computation can
consume a lot of memory. If you find that UMAP is failing due to memory
constraints consider setting this option to True. This approach is more
computationally expensive, but avoids excessive memory use.}

\item{set_op_mix_ratio}{Interpolate between (fuzzy) union
and intersection as the set operation used to combine
local fuzzy simplicial sets to obtain a global fuzzy
simplicial sets. Both fuzzy set operations use the product
t-norm. The value of this parameter should be between 0.0
and 1.0; a value of 1.0 will use a pure fuzzy union, while
0.0 will use a pure fuzzy intersection.}

\item{local_connectivity}{The local connectivity required
– i.e. the number of nearest neighbors that should be
assumed to be connected at a local level. The higher this
value the more connected the manifold becomes locally. In
practice this should be not more than the local intrinsic
dimension of the manifold.}

\item{repulsion_strength}{Weighting applied to negative
samples in low dimensional embedding optimization. Values
higher than one will result in greater weight being given
to negative samples.}

\item{negative_sample_rate}{The number of negative samples
to select per positive sample in the optimization process.
Increasing this value will result in greater repulsive
force being applied, greater optimization cost, but
slightly more accuracy.}

\item{transform_queue_size}{For transform operations
(embedding new points using a trained model_ this will
control how aggressively to search for nearest neighbors.
Larger values will result in slower performance but more
accurate nearest neighbor evaluation.}

\item{random_state}{The seed used by the random number
generator.}

\item{angular_rp_forest}{Whether to use an angular random
projection forest to initialise the approximate nearest
neighbor search. This can be faster, but is mostly on
useful for metric that use an angular style distance such
as cosine, correlation etc. In the case of those metrics
angular forests will be chosen automatically.}

\item{target_n_neighbors}{The number of nearest neighbors
to use to construct the target simplcial set. If set to -1
use the n_neighbors value.}

\item{target_weight}{Weighting factor between data
topology and target topology. A value of 0.0 weights
entirely on data, a value of 1.0 weights entirely on
target. The default of 0.5 balances the weighting equally
between data and target.}

\item{disconnection_distance}{Numeric scalar.
If specified, UMAP will disconnect any vertices of distance greater than or
equal to disconnection_distance when approximating the manifold via our k-nn
graph. This is particularly useful in the case that you have a bounded
metric. The UMAP assumption that we have a connected manifold can be
problematic when you have points that are maximally different from all the
rest of your data. The connected manifold assumption will make such points
have perfect similarity to a random set of other points. Too many such
points will artificially connect your space.}

\item{...}{Passed from \code{densmap} to \code{umap}.}
}
\value{
A numeric matrix
}
\description{
Density-preserving and other implementations of UMAP
}
\examples{
set.seed(42)
x <- matrix(rnorm(200), ncol=2)
densmap(x)
}
\references{
Density-Preserving Data Visualization Unveils Dynamic Patterns of Single-Cell
Transcriptomic Variability
Ashwin Narayan, Bonnie Berger, Hyunghoon Cho;
bioRxiv (2020)
\url{doi:10.1101/2020.05.12.077776}
}
