import pandas as pd
import scanpy as sc
In [ ]:
import numpy as np
In [13]:
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')
scanpy==1.7.2 anndata==0.7.6 umap==0.5.1 numpy==1.20.3 scipy==1.6.3 pandas==1.2.4 scikit-learn==0.24.2 statsmodels==0.12.2
In [14]:
results_file = 'write/pbmc3k.h5ad' # the file that will store the analysis results
In [15]:
adata = sc.read_10x_mtx(
'data/filtered_gene_bc_matrices/hg19/', # the directory with the `.mtx` file
var_names='gene_symbols', # use gene symbols for the variable names (variables-axis index)
cache=True) # write a cache file for faster subsequent reading
... reading from cache file cache/data-filtered_gene_bc_matrices-hg19-matrix.h5ad
In [16]:
cd ~
/root
In [17]:
adata = sc.read_10x_mtx(
'data/filtered_gene_bc_matrices/hg19/', # the directory with the `.mtx` file
var_names='gene_symbols', # use gene symbols for the variable names (variables-axis index)
cache=True) # write a cache file for faster subsequent reading
... reading from cache file cache/data-filtered_gene_bc_matrices-hg19-matrix.h5ad
In [18]:
adata.var_names_make_unique() # this is unnecessary if using `var_names='gene_ids'` in `sc.read_10x_mtx`
In [19]:
adata
Out[19]:
AnnData object with n_obs × n_vars = 2700 × 32738
var: 'gene_ids'
In [20]:
sc.pl.highest_expr_genes(adata, n_top=20, )
normalizing counts per cell
finished (0:00:00)
In [ ]:


