Skip to content

Commit

Permalink
preparing for release 0.3 II
Browse files Browse the repository at this point in the history
  • Loading branch information
falexwolf committed Nov 17, 2017
1 parent 5683e83 commit e25ed83
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 53 deletions.
4 changes: 2 additions & 2 deletions docs/requires.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# stuff necessary for docs generation
numpydoc
# same as ../requires.txt, but omitting the c++ packages
anndata>=0.3.0.3
matplotlib==2.0.0 # 2.0.2 seems buggy
anndata>=0.3.1
matplotlib # 2.0.2 seems buggy
pandas
scipy
seaborn
Expand Down
4 changes: 2 additions & 2 deletions requires.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
anndata>=0.3.0.3
matplotlib==2.0.0 # 2.0.2 and higher seems buggy
anndata>=0.3.1
matplotlib==2.0.0 # >= 2.0.2 sometimes crashes in sc.pl.aga_graph, need to resolve

This comment has been minimized.

Copy link
@flying-sheep

flying-sheep Mar 3, 2018

Member

mpl is now 2.1.x, does it still crash?

This comment has been minimized.

Copy link
@falexwolf

falexwolf Mar 3, 2018

Author Member

hm, I don't know; but sure, one needs to get rid of the '==2.0.0'

This comment has been minimized.

Copy link
@falexwolf

falexwolf Mar 3, 2018

Author Member

I'm preparing scanpy 1.0, the API and the code base will achieve higher consistency and will become slimmer. we should achieve the same with the requirements...

pandas
scipy
# neat visualizations
Expand Down
4 changes: 2 additions & 2 deletions scanpy/data_structs/data_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def add_or_update_graph_in_adata(
adata,
n_neighbors=30,
n_pcs=50,
n_dcs=15,
n_dcs=None,
knn=None,
recompute_pca=False,
recompute_distances=False,
Expand Down Expand Up @@ -209,7 +209,7 @@ def __init__(self,
knn=True,
n_jobs=None,
n_pcs=50,
n_dcs=N_DCS,
n_dcs=None,
recompute_pca=False,
recompute_distances=False,
recompute_graph=False,
Expand Down
2 changes: 1 addition & 1 deletion scanpy/plotting/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ def aga(
show=None,
save=None,
ext=None,
title_graph=None,
title_graph='abstracted graph',
groups_graph=None,
color_graph=None,
**aga_graph_params):
Expand Down
21 changes: 4 additions & 17 deletions scanpy/preprocessing/recipes.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
# Author: F. Alex Wolf (http://falexwolf.de)
# Author: Alex Wolf (http://falexwolf.de)
"""Preprocessing recipes from the literature
"""

from .. import settings as sett
from . import simple as pp


def recipe_weinreb16(adata, mean_threshold=0.01, cv_threshold=2,
n_pcs=50, svd_solver='randomized', random_state=0, copy=False):
"""Normalization and filtering as of [Weinreb17]_.
This is deprecated but remains for backwards compatibility.
Expects logarithmized data.
Parameters
----------
Expand All @@ -24,10 +23,6 @@ def recipe_weinreb16(adata, mean_threshold=0.01, cv_threshold=2,
Change to use different intial states for the optimization.
copy : bool (default: False)
Return a copy if true.
Reference
---------
Weinreb et al., bioRxiv doi:10.1101/090332 (2016).
"""
from scipy.sparse import issparse
if issparse(adata.X):
Expand All @@ -54,6 +49,8 @@ def recipe_weinreb16(adata, mean_threshold=0.01, cv_threshold=2,
def recipe_zheng17(adata, n_top_genes=1000, zero_center=True, plot=False, copy=False):
"""Normalization and filtering as of [Zheng17]_.
Expects non-logarithmized data.
This reproduces the preprocessing of the reference below, at the time, the
Cell Ranger R Kit preprocessing of 10X Genomics.
Expand All @@ -68,16 +65,6 @@ def recipe_zheng17(adata, n_top_genes=1000, zero_center=True, plot=False, copy=F
Show a plot of the gene dispersion vs. mean relation.
copy : bool, optional (default: False)
Return a copy of adata instead of updating the passed object.
Returns
-------
Returns or updates adata depending on `copy` with
adata.X, storing the preprocessed data matrix
Reference
---------
Zheng et al., Nature Communications 8, 14049 (2017)
https://doi.org/10.1038/ncomms14049.
"""
if copy: adata = adata.copy()
pp.filter_genes(adata, min_counts=1) # only consider genes with more than 1 count
Expand Down
61 changes: 33 additions & 28 deletions scanpy/tools/aga.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,21 @@
adata.add['iroot'] = np.flatnonzero(adata.smp['exp_groups'] == 'Stem')[0]
Approximate graph abstraction (AGA) quantifies the connectivity of partitions of a
neighborhood graph of single cells, thereby generating a much simpler
abstracted graph whose nodes label the partitions. Together with a random
walk-based distance measure, this generates a topology preserving map of
single cells --- a partial coordinatization of data useful for exploring and
explaining its variation. We use the abstracted graph to assess which
subsets of data are better explained by discrete clusters than by a
continuous variable, to trace gene expression changes along aggregated
single-cell paths through data and to infer abstracted trees that best
explain the global topology of data.
The Louvain partitioning has been suggested by [Levine15]_ for analyzing
single cells. The random-walk based distance measure within AGA is an
extension of DPT [Haghverdi16]_.
Approximate graph abstraction (AGA) quantifies the connectivity of
partitions of a neighborhood graph of single cells, thereby generating a
much simpler abstracted graph whose nodes label the partitions. Together
with a random walk-based distance measure, this generates a partial
coordinatization of data useful for exploring and explaining its
variation. The abstracted graph can be used to assess which subsets of data
are better explained by discrete clusters than by a continuous variable, to
trace gene expression changes along aggregated single-cell paths through
data and to infer abstracted trees that best explain the global topology of
data.
AGA starts off from two main computational approaches for analyzing single
cell data. The Louvain algorithm, which has been suggested for clustering
single-cell data by [Levine15]_ for analyzing single cells. The random-walk
based distance measure within AGA is an extension of DPT [Haghverdi16]_.
Most of the following parameters appear similarly in other tools and are
used to generate the graph.
Expand All @@ -61,12 +62,12 @@
n_dcs : int, optional (default: 10)
Number of diffusion components (very similar to eigen vectors of
adjacency matrix) to use for distance computations.
node_groups : any categorical sample annotation or {{'louvain', 'segments'}}, optional (default: 'louvain')
Criterion to determine the resoluting partitions of the
graph/data. 'louvain' uses the louvain algorithm and optimizes
modularity of the graph, 'segments' uses a bipartioning
criterium that is loosely inspired by hierarchical clustering. You can
also pass your predefined groups by choosing any sample annotation.
groups : any categorical smp/cell annotation or {{'louvain_groups', 'segments'}}, optional (default: 'louvain_groups')
Criterion to determine the resulting partitions of the single-cell
graph. 'louvain_groups' uses the louvain algorithm and optimizes
modularity of the graph, 'segments' uses a bipartioning criterium that
is loosely inspired by hierarchical clustering. You can also pass your
predefined groups by choosing any sample annotation.
resolution : float, optional (default: 1.0)
See tool `louvain`.
random_state : int, optional (default: 0)
Expand All @@ -81,9 +82,9 @@
How to measure connectedness between groups.
n_nodes : int or None, optional (default: None)
Number of nodes in the abstracted graph. Except when choosing
'segments' for `node_groups`, for which `n_nodes` defaults to
'segments' for `groups`, for which `n_nodes` defaults to
`n_nodes=1`, `n_nodes` defaults to the number of groups implied by the
choice of `node_groups`.
choice of `groups`.
recompute_graph : bool, optional (default: False)
Recompute single-cell graph. Only then `n_neighbors` has an effect if
there is already a cached `distance` or `X_diffmap` in adata.
Expand Down Expand Up @@ -126,8 +127,8 @@ def aga(adata,
n_neighbors=None,
n_pcs=50,
n_dcs=10,
node_groups='louvain',
resolution=1,
groups='louvain_groups',
resolution=None,
random_state=0,
attachedness_measure='connectedness',
tree_detection='min_span_tree',
Expand All @@ -145,9 +146,11 @@ def aga(adata,
raise ValueError('`tree_detection` needs to be one of {}'
.format({'iterative_matching', 'min_span_tree'}))
fresh_compute_louvain = False
if (node_groups == 'louvain'
if (groups == 'louvain_groups'
and ('louvain_groups' not in adata.smp_keys()
# resolution does not match
or ('louvain_params' in adata.uns
and resolution is not None
and adata.uns['louvain_params']['resolution'] != resolution)
or recompute_louvain
or not data_graph.no_recompute_of_graph_necessary(
Expand All @@ -167,8 +170,7 @@ def aga(adata,
n_dcs=n_dcs,
random_state=random_state)
fresh_compute_louvain = True
clusters = node_groups
if node_groups == 'louvain': clusters = 'louvain_groups'
clusters = groups
logg.info('running Approximate Graph Abstraction (AGA)', reset=True)
if ('iroot' not in adata.uns
and 'xroot' not in adata.uns
Expand Down Expand Up @@ -271,7 +273,8 @@ def aga_expression_entropies(adata):
Entropies of median expressions for each node.
"""
from scipy.stats import entropy
groups_order, groups_masks = utils.select_groups(adata, smp='aga_groups')
groups_order, groups_masks = utils.select_groups(adata,
key=adata.uns['aga_groups_key'])
entropies = []
for mask in groups_masks:
X_mask = adata.X[mask]
Expand Down Expand Up @@ -473,6 +476,8 @@ def contract_nodes(adjacency_tree_confidence, node_groups):

class AGA(data_graph.DataGraph):
"""Approximate Graph Abstraction
This needs to be rewritten in a cleaner way.
"""

def __init__(self,
Expand Down
2 changes: 1 addition & 1 deletion scanpy/tools/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def louvain(adata,
recompute_pca=False,
recompute_distances=False,
recompute_graph=False,
n_dcs=15,
n_dcs=None,
n_jobs=None,
copy=False):
"""Cluster cells into subgroups [Blondel08]_ [Levine15]_ [Traag17]_.
Expand Down

0 comments on commit e25ed83

Please sign in to comment.