preparing for release 0.3 II

scverse · Nov 17, 2017 · e25ed83 · flying-sheep · Mar 3, 2018 · falexwolf
1 parent 5683e83
commit e25ed83
Show file tree

Hide file tree

Showing 7 changed files with 45 additions and 53 deletions.
diff --git a/docs/requires.txt b/docs/requires.txt
@@ -1,8 +1,8 @@
 # stuff necessary for docs generation
 numpydoc
 # same as ../requires.txt, but omitting the c++ packages
-anndata>=0.3.0.3
-matplotlib==2.0.0  # 2.0.2 seems buggy
+anndata>=0.3.1
+matplotlib  # 2.0.2 seems buggy
 pandas
 scipy
 seaborn

diff --git a/requires.txt b/requires.txt
@@ -1,5 +1,5 @@
-anndata>=0.3.0.3
-matplotlib==2.0.0  # 2.0.2 and higher seems buggy
+anndata>=0.3.1
+matplotlib==2.0.0  # >= 2.0.2 sometimes crashes in sc.pl.aga_graph, need to resolve
 pandas
 scipy
 # neat visualizations

diff --git a/scanpy/data_structs/data_graph.py b/scanpy/data_structs/data_graph.py
@@ -23,7 +23,7 @@ def add_or_update_graph_in_adata(
         adata,
         n_neighbors=30,
         n_pcs=50,
-        n_dcs=15,
+        n_dcs=None,
         knn=None,
         recompute_pca=False,
         recompute_distances=False,
@@ -209,7 +209,7 @@ def __init__(self,
                  knn=True,
                  n_jobs=None,
                  n_pcs=50,
-                 n_dcs=N_DCS,
+                 n_dcs=None,
                  recompute_pca=False,
                  recompute_distances=False,
                  recompute_graph=False,

diff --git a/scanpy/plotting/tools.py b/scanpy/plotting/tools.py
@@ -441,7 +441,7 @@ def aga(
         show=None,
         save=None,
         ext=None,
-        title_graph=None,
+        title_graph='abstracted graph',
         groups_graph=None,
         color_graph=None,
         **aga_graph_params):

diff --git a/scanpy/preprocessing/recipes.py b/scanpy/preprocessing/recipes.py
@@ -1,16 +1,15 @@
-# Author: F. Alex Wolf (http://falexwolf.de)
+# Author: Alex Wolf (http://falexwolf.de)
 """Preprocessing recipes from the literature
 """
 
-from .. import settings as sett
 from . import simple as pp
 
 
 def recipe_weinreb16(adata, mean_threshold=0.01, cv_threshold=2,
                      n_pcs=50, svd_solver='randomized', random_state=0, copy=False):
     """Normalization and filtering as of [Weinreb17]_.
 
-    This is deprecated but remains for backwards compatibility.
+    Expects logarithmized data.
 
     Parameters
     ----------
@@ -24,10 +23,6 @@ def recipe_weinreb16(adata, mean_threshold=0.01, cv_threshold=2,
         Change to use different intial states for the optimization.
     copy : bool (default: False)
         Return a copy if true.
-
-    Reference
-    ---------
-    Weinreb et al., bioRxiv doi:10.1101/090332 (2016).
     """
     from scipy.sparse import issparse
     if issparse(adata.X):
@@ -54,6 +49,8 @@ def recipe_weinreb16(adata, mean_threshold=0.01, cv_threshold=2,
 def recipe_zheng17(adata, n_top_genes=1000, zero_center=True, plot=False, copy=False):
     """Normalization and filtering as of [Zheng17]_.
 
+    Expects non-logarithmized data.
+
     This reproduces the preprocessing of the reference below, at the time, the
     Cell Ranger R Kit preprocessing of 10X Genomics.
 
@@ -68,16 +65,6 @@ def recipe_zheng17(adata, n_top_genes=1000, zero_center=True, plot=False, copy=F
         Show a plot of the gene dispersion vs. mean relation.
     copy : bool, optional (default: False)
         Return a copy of adata instead of updating the passed object.
-
-    Returns
-    -------
-    Returns or updates adata depending on `copy` with
-         adata.X, storing the preprocessed data matrix
-
-    Reference
-    ---------
-    Zheng et al., Nature Communications 8, 14049 (2017)
-        https://doi.org/10.1038/ncomms14049.
     """
     if copy: adata = adata.copy()
     pp.filter_genes(adata, min_counts=1)  # only consider genes with more than 1 count

diff --git a/scanpy/tools/aga.py b/scanpy/tools/aga.py
@@ -27,20 +27,21 @@
 
         adata.add['iroot'] = np.flatnonzero(adata.smp['exp_groups'] == 'Stem')[0]
 
-    Approximate graph abstraction (AGA) quantifies the connectivity of partitions of a
-    neighborhood graph of single cells, thereby generating a much simpler
-    abstracted graph whose nodes label the partitions. Together with a random
-    walk-based distance measure, this generates a topology preserving map of
-    single cells --- a partial coordinatization of data useful for exploring and
-    explaining its variation. We use the abstracted graph to assess which
-    subsets of data are better explained by discrete clusters than by a
-    continuous variable, to trace gene expression changes along aggregated
-    single-cell paths through data and to infer abstracted trees that best
-    explain the global topology of data.
-
-    The Louvain partitioning has been suggested by [Levine15]_ for analyzing
-    single cells. The random-walk based distance measure within AGA is an
-    extension of DPT [Haghverdi16]_.
+    Approximate graph abstraction (AGA) quantifies the connectivity of
+    partitions of a neighborhood graph of single cells, thereby generating a
+    much simpler abstracted graph whose nodes label the partitions. Together
+    with a random walk-based distance measure, this generates a partial
+    coordinatization of data useful for exploring and explaining its
+    variation. The abstracted graph can be used to assess which subsets of data
+    are better explained by discrete clusters than by a continuous variable, to
+    trace gene expression changes along aggregated single-cell paths through
+    data and to infer abstracted trees that best explain the global topology of
+    data.
+
+    AGA starts off from two main computational approaches for analyzing single
+    cell data. The Louvain algorithm, which has been suggested for clustering
+    single-cell data by [Levine15]_ for analyzing single cells. The random-walk
+    based distance measure within AGA is an extension of DPT [Haghverdi16]_.
 
     Most of the following parameters appear similarly in other tools and are
     used to generate the graph.
@@ -61,12 +62,12 @@
     n_dcs : int, optional (default: 10)
         Number of diffusion components (very similar to eigen vectors of
         adjacency matrix) to use for distance computations.
-    node_groups : any categorical sample annotation or {{'louvain', 'segments'}}, optional (default: 'louvain')
-        Criterion to determine the resoluting partitions of the
-        graph/data. 'louvain' uses the louvain algorithm and optimizes
-        modularity of the graph, 'segments' uses a bipartioning
-        criterium that is loosely inspired by hierarchical clustering. You can
-        also pass your predefined groups by choosing any sample annotation.
+    groups : any categorical smp/cell annotation or {{'louvain_groups', 'segments'}}, optional (default: 'louvain_groups')
+        Criterion to determine the resulting partitions of the single-cell
+        graph. 'louvain_groups' uses the louvain algorithm and optimizes
+        modularity of the graph, 'segments' uses a bipartioning criterium that
+        is loosely inspired by hierarchical clustering. You can also pass your
+        predefined groups by choosing any sample annotation.
     resolution : float, optional (default: 1.0)
         See tool `louvain`.
     random_state : int, optional (default: 0)
@@ -81,9 +82,9 @@
         How to measure connectedness between groups.
     n_nodes : int or None, optional (default: None)
         Number of nodes in the abstracted graph. Except when choosing
-        'segments' for `node_groups`, for which `n_nodes` defaults to
+        'segments' for `groups`, for which `n_nodes` defaults to
         `n_nodes=1`, `n_nodes` defaults to the number of groups implied by the
-        choice of `node_groups`.
+        choice of `groups`.
     recompute_graph : bool, optional (default: False)
         Recompute single-cell graph. Only then `n_neighbors` has an effect if
         there is already a cached `distance` or `X_diffmap` in adata.
@@ -126,8 +127,8 @@ def aga(adata,
         n_neighbors=None,
         n_pcs=50,
         n_dcs=10,
-        node_groups='louvain',
-        resolution=1,
+        groups='louvain_groups',
+        resolution=None,
         random_state=0,
         attachedness_measure='connectedness',
         tree_detection='min_span_tree',
@@ -145,9 +146,11 @@ def aga(adata,
         raise ValueError('`tree_detection` needs to be one of {}'
                          .format({'iterative_matching', 'min_span_tree'}))
     fresh_compute_louvain = False
-    if (node_groups == 'louvain'
+    if (groups == 'louvain_groups'
         and ('louvain_groups' not in adata.smp_keys()
+             # resolution does not match
              or ('louvain_params' in adata.uns
+                 and resolution is not None
                  and adata.uns['louvain_params']['resolution'] != resolution)
              or recompute_louvain
              or not data_graph.no_recompute_of_graph_necessary(
@@ -167,8 +170,7 @@ def aga(adata,
                 n_dcs=n_dcs,
                 random_state=random_state)
         fresh_compute_louvain = True
-    clusters = node_groups
-    if node_groups == 'louvain': clusters = 'louvain_groups'
+    clusters = groups
     logg.info('running Approximate Graph Abstraction (AGA)', reset=True)
     if ('iroot' not in adata.uns
         and 'xroot' not in adata.uns
@@ -271,7 +273,8 @@ def aga_expression_entropies(adata):
         Entropies of median expressions for each node.
     """
     from scipy.stats import entropy
-    groups_order, groups_masks = utils.select_groups(adata, smp='aga_groups')
+    groups_order, groups_masks = utils.select_groups(adata,
+                                                     key=adata.uns['aga_groups_key'])
     entropies = []
     for mask in groups_masks:
         X_mask = adata.X[mask]
@@ -473,6 +476,8 @@ def contract_nodes(adjacency_tree_confidence, node_groups):
 
 class AGA(data_graph.DataGraph):
     """Approximate Graph Abstraction
+
+    This needs to be rewritten in a cleaner way.
     """
 
     def __init__(self,

diff --git a/scanpy/tools/louvain.py b/scanpy/tools/louvain.py
@@ -24,7 +24,7 @@ def louvain(adata,
             recompute_pca=False,
             recompute_distances=False,
             recompute_graph=False,
-            n_dcs=15,
+            n_dcs=None,
             n_jobs=None,
             copy=False):
     """Cluster cells into subgroups [Blondel08]_ [Levine15]_ [Traag17]_.