From 1fbbfcdbb53dda7f6ccab60098a9f9cd141a8025 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Thu, 16 Feb 2023 16:25:34 +0100 Subject: [PATCH] Backport PR #2414 on branch 1.9.x (matplotlib 3.7 compat) (#2419) * Backport PR #2414: matplotlib 3.7 compat * fix scrublet * Update visium default plot for matplotlib 3.7 * Update hashsolo docstrings * skip plotting test that changed on mpl 3.7 if mpl < 3.7 is installed * Fix hashsolo docs (again) * update anndata-dev tests to install anndata test deps * Temporarily set warnings as errors to False for doc builds * Release notes * Fix using custom layer with highly_variable_genes (#2302) * Fix using custom layer with highly_variable_genes * Add tests * Add release note * Move release note to correct section * Format release notes * Add check for number of normalized dispersions (#2231) * Add check for number of normalized dispersions In sc.pp.highly_variable_genes() when flavor='cell_ranger' and n_top_genes is set check that enough normalized dispersions have been calculated and if not raise a warning and set n_top_genes to the number of calculated dispersions. Fixes #2230 * Use .size instead of len() * Add test for n_top_genes warning * Add release note * Remove blank line Co-authored-by: Isaac Virshup --------- Co-authored-by: Isaac Virshup Co-authored-by: adamgayoso Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Co-authored-by: Luke Zappia --- .azure-pipelines.yml | 2 +- docs/conf.py | 4 ++- docs/release-notes/1.9.2.md | 12 +++---- scanpy/external/pp/_hashsolo.py | 17 +++++----- scanpy/external/pp/_scrublet.py | 5 +++ scanpy/plotting/_utils.py | 2 +- .../preprocessing/_highly_variable_genes.py | 7 ++++ .../_images/master_spatial_visium_default.png | Bin 1791 -> 1789 bytes scanpy/tests/test_embedding_plots.py | 6 ++++ scanpy/tests/test_highly_variable_genes.py | 30 +++++++++++++++++- 10 files changed, 66 insertions(+), 19 deletions(-) diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index 6946bbf064..978dfd41e2 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -50,7 +50,7 @@ jobs: displayName: 'Install dependencies' - script: | - pip install -v git+https://github.com/scverse/anndata + 'pip install -v "anndata[dev,test] @ git+https://github.com/scverse/anndata"' displayName: 'Install development anndata' condition: eq(variables['ANNDATA_DEV'], 'yes') diff --git a/docs/conf.py b/docs/conf.py index 634a0489e3..3bf6489379 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -128,7 +128,9 @@ def setup(app): - app.warningiserror = on_rtd + app.warningiserror = ( + False # Temporarily disable warnings as errors to get 1.9.2 out + ) # -- Options for other output formats ------------------------------------------ diff --git a/docs/release-notes/1.9.2.md b/docs/release-notes/1.9.2.md index dfe3d89f56..c2f5b647da 100644 --- a/docs/release-notes/1.9.2.md +++ b/docs/release-notes/1.9.2.md @@ -1,11 +1,9 @@ -### 1.9.2 {small}`the future` - - -```{rubric} Documentation -``` +### 1.9.2 {small}`2023-02-16` ```{rubric} Bug fixes ``` -```{rubric} Performance -``` +* {func}`~scanpy.pp.highly_variable_genes` `layer` argument now works in tandem with `batches` {pr}`2302` {smaller}`D Schaumont` +* {func}`~scanpy.pp.highly_variable_genes` with `flavor='cell_ranger'` now handles the case in {issue}`2230` where the number of calculated dispersions is less than `n_top_genes` {pr}`2231` {smaller}`L Zappia` +* Fix compatibility with matplotlib 3.7 {pr}`2414` {smaller}`I Virshup` {smaller}`P Fisher` +* Fix scrublet numpy matrix compatibility issue {pr}`2395` {smaller}`A Gayoso` diff --git a/scanpy/external/pp/_hashsolo.py b/scanpy/external/pp/_hashsolo.py index 0942572f21..e370d3a000 100644 --- a/scanpy/external/pp/_hashsolo.py +++ b/scanpy/external/pp/_hashsolo.py @@ -25,7 +25,8 @@ def _calculate_log_likelihoods(data, number_of_noise_barcodes): - """Calculate log likelihoods for each hypothesis, negative, singlet, doublet + """\ + Calculate log likelihoods for each hypothesis, negative, singlet, doublet Parameters ---------- @@ -43,8 +44,8 @@ def _calculate_log_likelihoods(data, number_of_noise_barcodes): """ def gaussian_updates(data, mu_o, std_o): - """Update parameters of your gaussian - https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf + """\ + Update parameters of your gaussian https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf Parameters ---------- @@ -210,7 +211,7 @@ def gaussian_updates(data, mu_o, std_o): def _calculate_bayes_rule(data, priors, number_of_noise_barcodes): - """ + """\ Calculate bayes rule from log likelihoods Parameters @@ -263,7 +264,8 @@ def hashsolo( number_of_noise_barcodes: int = None, inplace: bool = True, ): - """Probabilistic demultiplexing of cell hashing data using HashSolo [Bernstein20]_. + """\ + Probabilistic demultiplexing of cell hashing data using HashSolo [Bernstein20]_. .. note:: More information and bug reports `here `__. @@ -294,9 +296,8 @@ def hashsolo( Returns ------- - adata - if inplace is False returns AnnData with demultiplexing results - in .obs attribute otherwise does is in place + if inplace is False returns AnnData with demultiplexing results + in .obs attribute otherwise does is in place Examples ------- diff --git a/scanpy/external/pp/_scrublet.py b/scanpy/external/pp/_scrublet.py index 8ac14ab563..58573b1bf1 100644 --- a/scanpy/external/pp/_scrublet.py +++ b/scanpy/external/pp/_scrublet.py @@ -431,6 +431,11 @@ def _scrublet_call_doublets( if mean_center: logg.info('Embedding transcriptomes using PCA...') + # Sklearn PCA doesn't like matrices, so convert to arrays + if isinstance(scrub._E_obs_norm, np.matrix): + scrub._E_obs_norm = np.asarray(scrub._E_obs_norm) + if isinstance(scrub._E_sim_norm, np.matrix): + scrub._E_sim_norm = np.asarray(scrub._E_sim_norm) sl.pipeline_pca( scrub, n_prin_comps=n_prin_comps, random_state=scrub.random_state ) diff --git a/scanpy/plotting/_utils.py b/scanpy/plotting/_utils.py index 5579d171b4..bd95e7bcff 100644 --- a/scanpy/plotting/_utils.py +++ b/scanpy/plotting/_utils.py @@ -32,7 +32,7 @@ VBound = Union[str, float, Callable[[Sequence[float]], float]] -class _AxesSubplot(Axes, axes.SubplotBase, ABC): +class _AxesSubplot(Axes, axes.SubplotBase): """Intersection between Axes and SubplotBase: Has methods of both""" diff --git a/scanpy/preprocessing/_highly_variable_genes.py b/scanpy/preprocessing/_highly_variable_genes.py index 7db4a098c3..c083c374a8 100644 --- a/scanpy/preprocessing/_highly_variable_genes.py +++ b/scanpy/preprocessing/_highly_variable_genes.py @@ -265,6 +265,12 @@ def _highly_variable_genes_single_batch( if n_top_genes > adata.n_vars: logg.info('`n_top_genes` > `adata.n_var`, returning all genes.') n_top_genes = adata.n_vars + if n_top_genes > dispersion_norm.size: + warnings.warn( + '`n_top_genes` > number of normalized dispersions, returning all genes with normalized dispersions.', + UserWarning, + ) + n_top_genes = dispersion_norm.size disp_cut_off = dispersion_norm[n_top_genes - 1] gene_subset = np.nan_to_num(df['dispersions_norm'].values) >= disp_cut_off logg.debug( @@ -458,6 +464,7 @@ def highly_variable_genes( hvg = _highly_variable_genes_single_batch( adata_subset, + layer=layer, min_disp=min_disp, max_disp=max_disp, min_mean=min_mean, diff --git a/scanpy/tests/_images/master_spatial_visium_default.png b/scanpy/tests/_images/master_spatial_visium_default.png index 239028c36c6e495f7877b1c249cce53c75eb25ca..bddbf5cf4f29dfca5249d26bfde333def8748053 100644 GIT binary patch literal 1789 zcmeAS@N?(olHy`uVBq!ia0y~yU<5K54sfsm$=7f9rU5C-64!{5;QX|b^2DN42H(Vz zf}H%4oXjMJvecsD%=|oKJ##$+9fgdNl7eC@ef?ax0=@jAbbTA~bpk*`I14-?iy0W$ z)IpfhH{`}~AS27u#WAE}&fA-NvoB{eAN#o4UutdHC6^;%OS`>Y-31+EbQsyq1r?nf zuk3daI3iv!NyoK7@XZ77*gGPBGnXr6=-%BjQSWF%&s2}zj4hr%Ma!P+JeNEdT(#%? z-PgY*rMbctetz3m|Mq3s?pVeRU*=z6W@vF{U`S?RFmR)cx+Yz9AZvAqmX=uetM~8u zZCTk~ecjd8-fn7OAW(IxYt2puk#5)Co}QL@jLMTIPrk^Oa5k3l$Laq2y3fwvnZ?b} zu)m*y;TQ))LeH?HbWSnssr?=1?d?5jNo#BCf$WBguWfQ4{(DVwF*Y`SRbq8(fBpWn zw6v0!mzG}G+@RrF&h+EU%gLX9?pYVJbJ6=20P`QWP?iWzdw;bYvSIP zMc3BWW|_@~y3xSMXi@zBx|d&GUIq$AZO@B+;ps4C5s$;tpumLz0cNwWet&lxblWG}tFy?yHW=Tpz7 z{rtqNE%KC8VfNWqudc41wB+vYa`rgpBig5g85*uSQkKgo%%oJ;o~_65waWGj%Z$#S z#@8>uE!(}&ea$kjrHhg_uK4%w--Y0W1##=MD=H$GnVA*ww{X+?z2zjv8G*aaDwuV#hv%h_DGa%IZ0Wy`+&`&+&BcCM=DqMJ7( zO{I9}+SkXekJ}q$Ham3t?W<3oEQ#Awq38)p{JZa7119e;Z*Fcr@SbgTCxe89wDjeB zd#g*ozl+VivB9y`$#HF%cl7qWt4EHsh>44Dt^Aypdw*YSznpCl(5Pj;v%fq!DZIDx zv)d$KQZN1R;NXGxEUSANBrFOawH!Ns+|=57b?xtOnzPTY3ekEAH2Brqx1l>@!qg^T zeDGj_%FD;c`*#&T?+aQ9OyNNfzWz`LdZoCy_}0ec_GizZmp(qm3)Jn~>*fq}#nvd_ zyLsU&L%f8Ag(odZ+q}|k@yF-c|4q%zjukhUw*6<4cu`{Y>*wd?T&+!dF*_ENSm}C3 z#l?li$KU_|XRh1gg~06e|J+QD1_p`B@9*ZewY52&Kd_&Jk)h`!4^d_LH})q{O=;Kc S`^tb-5re0zpUXO@geCy~!@zd{ literal 1791 zcmds&ZB)`%9LFy{l$o^5qqEk8lax~n3&oO+MNO=~shJX*=0i4>@QDNyDaFdlO?$%B zGYTA7`IIyVUIKLzXdiA7BEo#AUG|0x7?B!h>YU zcL#-i&7irLRHQdQ`}(fSdFtm!oa1WL?Ki~b`DkoO?&+)fD@Ewh7_`;EM-1LZGQ;-I z{)>%2xlYq7*3mZg$}VHuV+)ISpQp*7fb^TplCjK7hc7C#(qqc%M2w}p8l_P!461l! z3-)mHoB(>Q8E|w4xm*69)b**%sgVr_siNRuE|;ry-5gx=#0!TLx3;#bLgz0_mXk!m zDy^2LUA-BH$J?iX##h!YF*&oTTe!oGSO9XI5TMW&v^ain5>7#|=ceT)rKRhkf`WpR z&%kRvlGvZP9uoKT9FRXxG4$RIlS-wHjF&sL2S|^_K8O>h@f{5ZgG#Yc>JeqxL3DE5Sfl{BKZdsTo?82*y^DjVptBbUA+o>dRY0L-Dn@ zZdLXqpA}m8ZmPCxX+6DkJrsE-d7re0LJ+KIvdWM2La=8|b>1nO>oZehdw3iUhizpw zE2c?6PcpTZx~C#Ypl&btSbn(k!arHFr(Ovp(TeJmClN>+o6m~<&%ksKKCnR|bsgtF ziHL{@siIiF&oR3BMAhBQvFiHzuqX8WGj9#ciOcdMyp#Z=8|=K;HIz=5M~v$Aq2=9H ziwhK&-0tpubEel{x$G5E%DYv3eA3RnW@3l^plj&IH}9Ho-xW6J%vHOGN%H=_lzYp- zfcOGxyTmx6Ok%UcYpL!N7&LvK01hxcsV8Jl%N!gWC{+|MZ||Fi=k@1=mcFjlfZ}E^ z@L~Gd+1U|}4&-%objWh%rd85h8acyq8>2rrwXdn_Ahsf4Lt81zM22Z_Tx?A9Mo81Z zOzg*#X2k>q2#>M=oBaSd{DH{R3`FG zj|g;jc3!h~tqpMqrE3Jhlogd}3C4|9Ahr;Ycoer{3VC zu+A*@`;aPapW--QD116GA?s_-uA$c4z(8JGw^ncz5^17x6!uxy*SJv@fzRiw;Ry`n z>FEjkV|qeYB>W<4m8mxtZwt%^2M0BZqBw z+ZT{~8<@Kk3WZy-{~AX}l}dF9qnn$VYRu4=gj8Mmwmq(Y(zF#mYBU-(cT%px&SDuf z`0RIY$djC?hY6vu#RWmTswnpe&r&LVty)#yBdWW@gbGITmUw*=PB6>^HJ|k zM34L`2qg$gFpE$0_U*BS`5;B~GmSinI}%_)0)