""" 
Reset the tutorial
"""
# delete the intermediate output folders to reset the tutorials
!rm -rf "data/"
!rm -rf "output/"


"""
Download the data and create output folders
"""
!mkdir data
!wget http://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz -O data/pbmc3k_filtered_gene_bc_matrices.tar.gz
!cd data && tar -xzf pbmc3k_filtered_gene_bc_matrices.tar.gz && gzip filtered_gene_bc_matrices/hg19/* && mv filtered_gene_bc_matrices/hg19/genes.tsv.gz filtered_gene_bc_matrices/hg19/features.tsv.gz
!mkdir output

--2023-11-18 20:47:07--  http://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz
Resolving cf.10xgenomics.com (cf.10xgenomics.com)... 104.18.0.173, 104.18.1.173, 2606:4700::6812:1ad, ...
Connecting to cf.10xgenomics.com (cf.10xgenomics.com)|104.18.0.173|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz [following]
--2023-11-18 20:47:07--  https://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz
Connecting to cf.10xgenomics.com (cf.10xgenomics.com)|104.18.0.173|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7621991 (7.3M) [application/x-tar]
Saving to: ‘data/pbmc3k_filtered_gene_bc_matrices.tar.gz’

data/pbmc3k_filtere 100%[===================>]   7.27M  6.91MB/s    in 1.1s    

2023-11-18 20:47:09 (6.91 MB/s) - ‘data/pbmc3k_filtered_gene_bc_matrices.tar.gz’ saved [7621991/7621991]


"""
Import necessary packages
"""
# not use GPU (only inference will be performed, and using CPUs are sufficient)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# import scelephant
import scelephant as el
from scelephant import RamData
import scanpy as sc
# set figure parameters
sc.set_figure_params( dpi = 200, figsize = ( 6, 5 ), frameon = True )

# plotly export settings
import plotly.io as pio
pio.renderers.default = "notebook"

"""
configure the jupyter notebook environment
"""
el.bk.Wide( 100 ) # adjust the jupyter notebook cell width 

import matplotlib as mpl
mpl.rcParams[ "figure.dpi" ] = 100
# set image resolution

# load memory profiler 
# %load_ext memory_profiler


# initialize a pool of managed operators
fop = el.managers.FileSystemOperatorPool( 8 ) # this pool of managed operators will be used throughout the tutorials

# create RamData from 10X-formateed MTX
el.create_ramdata_from_mtx( 
    path_folder_mtx_10x_input = f'data/filtered_gene_bc_matrices/hg19/', 
    path_folder_ramdata_output = f'output/pbmc3k.ram/',
    file_system_operator_pool = fop,
)


ram = RamData( 
    f'output/pbmc3k.ram/', 
    int_total_weight_for_each_batch = 100_000,
    int_num_cpus = 8,
    file_system_operator_pool = fop
)

2023-11-18 20:47:24,265 [SC-Elephant] <INFO> (layer) - 'raw' layer has been loaded

ram


"""
Perform preprocessing of the raw count matrix
"""
# settings
name_col_filter_filtered_barcode = 'filtered_barcodes'
flag_use_fast_mode = False # does not generate intermediate output layers for fast analysis # you can set the flag to False to generate all intermediate layers

''' preprocess outputs '''
ram.prepare_dimension_reduction_from_raw( 
    name_layer_raw = 'raw', # name of the input layer
    name_layer_capped = 'normalized_log1p_capped', # name of the output layer for dimension reduction
    name_layer_scaled = None, # does not perform scaling (capping is often sufficient)
    name_col_filter_filtered_barcode = name_col_filter_filtered_barcode, # the name of the 'barcode' axis filter that will contain filtered cells
    min_counts = 500, # cell filtering criteria
    min_features = 200, 
    max_counts = 15000,
    max_features = 2500,
    int_total_count_target = 10_000, # target count for count normalization
    int_num_highly_variable_features = 2_000, # the number of the highly-variable features for performing dimension reduction
    name_col_filter_highly_variable = 'filter_normalized_log1p_highly_variable_2000', # the name of the 'feature' axis filter that will contain detected highly variable features
    max_value = 10, 
    dict_kw_hv = { 'float_min_mean' : 0.01, 'float_min_variance' : 0.01 }, 
    flag_use_fast_mode = flag_use_fast_mode,
)

2023-11-18 20:47:26,984 [SC-Elephant] <INFO> (prepare_dimension_reduction_from_raw) - [SLOW MODE] converting dense to sparse formats ... 
2023-11-18 20:47:27,037 [aiobotocore.credentials] <INFO> (load) - Found credentials in shared credentials file: ~/.aws/credentials
raw/dense > raw/sparse_for_querying_features: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2286884.0/2286884 [00:01<00:00, 1886130.92it/s]
raw/dense > raw/sparse_for_querying_barcodes: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2286884.0/2286884 [00:01<00:00, 1286743.69it/s]
2023-11-18 20:47:29,729 [SC-Elephant] <INFO> (layer) - 'raw' layer has been loaded
2023-11-18 20:47:29,731 [SC-Elephant] <INFO> (apply) - apply operation raw > raw has been completed
2023-11-18 20:47:29,732 [SC-Elephant] <INFO> (prepare_dimension_reduction_from_raw) - summarizing total count for each barcode ... 
raw / barcodes: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2286884.0/2286884 [00:13<00:00, 175862.54it/s]
2023-11-18 20:47:42,932 [SC-Elephant] <INFO> (summarize) - summarize operation of raw in the 'barcode' axis was completed
2023-11-18 20:47:42,933 [SC-Elephant] <INFO> (prepare_dimension_reduction_from_raw) - filtering barcodes ... 
2023-11-18 20:47:43,557 [SC-Elephant] <INFO> (prepare_dimension_reduction_from_raw) - filtering completed.
raw/sparse_for_querying_barcodes > normalized_log1p_capped/sparse_for_querying_barcodes: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2286884.0/2286884 [00:03<00:00, 691451.47it/s]
raw/sparse_for_querying_features > normalized_log1p_capped/sparse_for_querying_features: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2286884.0/2286884 [00:17<00:00, 133115.54it/s]
2023-11-18 20:48:01,917 [SC-Elephant] <INFO> (apply) - apply operation raw > normalized_log1p_capped has been completed
2023-11-18 20:48:01,959 [SC-Elephant] <INFO> (layer) - 'normalized_log1p_capped' layer has been loaded
normalized_log1p_capped / features: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2286884.0/2286884 [00:20<00:00, 114067.20it/s]
2023-11-18 20:48:22,066 [SC-Elephant] <INFO> (summarize) - summarize operation of normalized_log1p_capped in the 'feature' axis was completed


# perform UMAP embedding
ram.perform_dimension_reduction_and_clustering(
    name_layer_pca = 'normalized_log1p_capped', 
    name_filter_barcodes = name_col_filter_filtered_barcode,
    name_filter_features = 'filter_normalized_log1p_highly_variable_2000',
    str_embedding_method = 'scanpy-umap', # use scanpy functions to generate umap and leiden clustering results
    float_prop_subsampling_pca = 1, # use all cells without subsampling (TIP: for large dataset, it is recommended to use number below 0.5 to reduce PCA fitting time)
    dict_kw_for_run_scanpy_using_pca = {
        'int_neighbors_n_neighbors': 30, 
        'int_neighbors_n_pcs': 30, 
        'set_method': {'umap', 'leiden'}, # run umap and leiden 
        'dict_kw_leiden': {'resolution': 0.75}, 
    },
)

2023-11-18 20:48:24,798 [SC-Elephant] <WARNING> (train_pca) - iPCA model 'ipca' does not exist in the RamData models database, initializing the model.
30 PCs from 1999 features: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2695/2695 [00:18<00:00, 145.39it/s]2023-11-18 20:48:44,614 [SC-Elephant] <INFO> (post_process_batch) - fit completed for 1-2695 barcodes
30 PCs from 1999 features: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2695/2695 [00:18<00:00, 145.35it/s]
2023-11-18 20:48:44,617 [SC-Elephant] <INFO> (train_pca) - fit completed
2023-11-18 20:48:44,720 [SC-Elephant] <INFO> (load_str) - completed loading of 1999 number of strings
2023-11-18 20:48:44,742 [SC-Elephant] <INFO> (load_str) - completed loading of 1999 number of strings
2023-11-18 20:48:45,038 [SC-Elephant] <INFO> (save_model) - ipca|ipca model saved.
30 PCs from 1999 features: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2271928.0/2271928 [00:06<00:00, 358512.44it/s]
2023-11-18 20:48:51,952 [SC-Elephant] <WARNING> (select_ramtx) - currently queried view is (barcode x features) 2695 x 0. please change the filter or queries in order to retrieve a valid count data. For operations that do not require count data, ignore this warning.
2023-11-18 20:48:51,954 [SC-Elephant] <INFO> (run_scanpy_using_pca) - anndata retrieved.
2023-11-18 20:48:56.833888: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-18 20:48:57.010284: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-18 20:48:57.047936: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-18 20:48:57.613450: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/mpi/gcc-9.3.0/openmpi-4.1.2/lib:/usr/local/cuda-11.6/lib64:
2023-11-18 20:48:57.613550: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/mpi/gcc-9.3.0/openmpi-4.1.2/lib:/usr/local/cuda-11.6/lib64:
2023-11-18 20:48:57.613558: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
2023-11-18 20:49:00,976 [SC-Elephant] <INFO> (run_scanpy_using_pca) - K-nearest neighbor graphs calculation completed.
2023-11-18 20:49:07,429 [SC-Elephant] <INFO> (run_scanpy_using_pca) - UMAP calculation completed, and the resulting UMAP-embedding was saved to the 'X_umap_scanpy' column of the RamData.
2023-11-18 20:49:08,135 [SC-Elephant] <INFO> (run_scanpy_using_pca) - leiden clustering completed, and the resulting cluster membership information was saved to the 'leiden_scanpy' column of the RamData.


"""
Display UMAP clustering results with marker genes
"""
# settings
name_col_label = 'leiden_scanpy'
name_col_umap = 'X_umap_scanpy'
dict_marker_genes = {
    'T-cell' : [ 'CD3D' ],
    'CD4 T-cell' : [ 'CD4' ],
    'CD8 T-cell' : [ 'CD8A', 'CD8B' ],
    'Naive T / TCM' : [ 'CCR7', 'SELL' ],
    'NK' : [ 'GNLY', 'GZMB' ],
    'B cell' : [ 'MS4A1', 'CD79A' ], 
    'Monocyte' : [ 'CD86' ],
    'Classical Monocyte' : [ 'CD14' ], 
    'Non-classical Monocyte' : [ 'FCGR3A' ],
    'Dendritic' : [ 'FCER1A' ],
    'Platelet' : [ 'ITGA2B' ],
} # previously known cell type markers

def Get_AnnData_from_RamData( l_name_gene : list, l_name_obs : list = [ ] ) :
    ''' # 2023-11-18 16:57:29 
    Load data from RamData in the AnnData format.
    '''
    # retrieve data in SCANPY's AnnData format
    ram.ft.filter = None # reset the filter
    adata = ram[
        'normalized_log1p_capped', # [layer selection] name of the layer
        :, # [barcode selection] all active cells (barcodes) in the filter
        [ # [barcode metadata selection] e.g., annotations, barcode names, and PCA/UMAP coordinates
            'str', # 'str' meaning string representations of the cells (barcodes)
            'raw_sum', # 'n_Counts' (total number of UMIs)
            name_col_label, # clustering results
            { 'X_pca', name_col_umap } # 2D data (values stored commonly in '.obsm') should be given as a set (or dictionary, in order to specify the number of dimensions to retrieve. by default, retrieve all dimensions)
        ] + l_name_obs, 
        l_name_gene, # [feature selection] specific features
        [ 'str' ] # [feature metadata] name of the features
    ]
    adata.obsm[ 'X_umap' ] = adata.obsm[ name_col_umap ] # copy UMAP coordinates for plotting
    return adata

# retrieve AnnData containing normalized gene expression counts of a given list of marker genes
l_marker_genes = [ ] # collect the marker genes
for e in dict_marker_genes :
    l_marker_genes += dict_marker_genes[ e ]
adata = Get_AnnData_from_RamData( l_marker_genes )

# plot leiden clustering result
sc.pl.umap( adata, color = ['leiden_scanpy', 'raw_sum'], vmax = 10_000 )

# show dotplot
sc.pl.dotplot( adata, dict_marker_genes, groupby = name_col_label, figsize = ( 9, 5 ) )

2023-11-18 20:49:08,435 [SC-Elephant] <INFO> (load_str) - completed loading of 2695 number of strings
2023-11-18 20:49:08,508 [SC-Elephant] <INFO> (load_str) - completed loading of 32738 number of strings


'''plot gene expression values'''
sc.pl.umap( adata, color = l_marker_genes[ : 6 ], cmap = 'Reds', ncols = 2 )


"""
Find marker genes of each cluster
"""
# settings
flag_perform_subsampling = False # it is highly recommended to turn on cluster-based subsampling for an extremely large dataset!
name_layer = 'normalized_log1p_capped'

if flag_perform_subsampling :
    ''' subsample for each cluster using the label '''
    ram.subsample_for_each_clus( 
        name_col_label = name_col_label, 
        int_num_entries_to_subsample = int( len( ram.bc ) / 3 ),  # subsample 1/3 of cells
        index_col_of_name_col_label = -1,
        name_col_filter = name_col_filter_filtered_barcode, # input column name containing the filter to subsample
        name_col_filter_subsampled = None, # output column name to store the filter. If None is given, output to current filter object
    )

''' search marker feature for each cluster '''
ram.find_markers( 
    name_layer = name_layer, # normalized using log1p-normalized values
    name_col_label = name_col_label, # name of the annotation column to perform marker gene search
    index_name_col_label = -1, # for a typical annotation column containing 1D data array, set this value to -1 (indicating 1D)
    l_name_cluster = None, # search marker genes for all cluster labels
    name_col_log2fc = f'{name_col_label}__marker_log2fc', 
    name_col_auroc = f'{name_col_label}__marker_auroc', 
    name_col_pval = f'{name_col_label}__marker_pval', 
    method_pval = 'wilcoxon', # use wilcoxon test 
)

''' retrieve filtered marker calculation results '''
df_res = ram.get_marker_table( 
    max_pval = 1e-3, 
    min_auroc = 0.6, 
    min_log2fc = 1, 
    name_col_log2fc = f'{name_layer}_{name_col_label}__marker_log2fc', 
    name_col_auroc = f'{name_layer}_{name_col_label}__marker_auroc', 
    name_col_pval = f'{name_layer}_{name_col_label}__marker_pval', 
)
df_res # display marker gene search results

2023-11-18 20:49:12,900 [SC-Elephant] <INFO> (find_markers) - [RamData.find_markers] finding markers for 9 number of clusters started
normalized_log1p_capped / features: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2286884.0/2286884 [01:07<00:00, 33730.04it/s]
2023-11-18 20:50:21,297 [SC-Elephant] <INFO> (summarize) - summarize operation of normalized_log1p_capped in the 'feature' axis was completed
2023-11-18 20:50:21,300 [SC-Elephant] <INFO> (find_markers) - finding markers for 9 number of clusters completed


''' select the best marker genes for each cluster '''
import pandas as pd
# settings 
int_num_marker_genes_to_retrieve_for_each_cluster = 5 # show 5 best marker genes for each cluster
df_res_best = pd.concat( list( _df.sort_values( 'value_auroc', ascending = False ).iloc[ : int_num_marker_genes_to_retrieve_for_each_cluster ] for name_clus, _df in df_res.groupby( 'name_cluster' ) ) )
df_res_best # display results


''' annotate each cluster (rename categories) '''
ram.bc.meta.set_categories( 
    name_col_label,
    {
        '0' : 'CD4+ T cells', # can be classified as 'Naive T / TCM', but to reproduce the SCANPY tutorial's annotations, 'CD4+ T cells' will be used.
        '1' : 'CD4+ T cells',
        '2' : 'Classical Monocytes',
        '3' : 'CD8+ T cells',
        '4' : 'B cells',
        '5' : 'Non-classical Monocytes',
        '6' : 'NK cells',
        '7' : 'Dendritic cells',
        '8' : 'Platelets (Possibly Megakaryocytes)', # based on the relatively low number of UMIs in this cluster, the cluster was named 'Platelets'
    },
)

''' redraw graphs using the new annotations '''
adata = Get_AnnData_from_RamData( l_marker_genes )

# plot leiden clustering result
sc.pl.umap( adata, color = ['leiden_scanpy'], vmax = 10000 )

# show dotplot
sc.pl.dotplot( adata, dict_marker_genes, groupby = name_col_label, figsize = ( 9, 5 ) )
# done!

2023-11-18 20:50:21,793 [SC-Elephant] <INFO> (load_str) - completed loading of 2695 number of strings
2023-11-18 20:50:21,863 [SC-Elephant] <INFO> (load_str) - completed loading of 32738 number of strings

ram

	name_feature	name_cluster	value_auroc	value_log2fc	value_pval
0	HES4	5	0.784821	5.012916	1.198525e-34
1	ISG15	2	0.693412	1.053537	9.208979e-42
2	ISG15	5	0.752974	1.144557	1.096021e-27
3	SDF4	6	0.608837	1.689237	4.881225e-06
4	RBP7	2	0.649043	4.956746	1.750609e-25
...	...	...	...	...	...
876	CSTB	5	0.774832	1.447370	2.236602e-32
877	CSTB	7	0.734083	1.110420	1.523886e-04
878	SUMO3	7	0.718719	1.445970	4.022935e-04
879	ITGB2	6	0.803003	1.276745	4.437262e-37
880	PRMT2	6	0.619456	1.245867	5.284158e-07

	name_feature	name_cluster	value_auroc	value_log2fc	value_pval
523	CD3D	0	0.718488	1.030397	8.593731e-61
685	CCR7	0	0.697147	2.349322	7.846861e-50
817	NOSIP	0	0.682633	1.055273	5.145078e-43
711	CD7	0	0.652515	1.010316	1.627199e-30
846	PIK3IP1	0	0.646340	1.428182	3.163886e-28
629	IL32	1	0.782837	1.167961	5.834442e-90
199	IL7R	1	0.760255	1.476689	1.849589e-76
524	CD3D	1	0.734960	1.065029	1.160025e-62
522	CD3E	1	0.711379	1.045499	4.636382e-51
36	CD2	1	0.698410	1.421356	3.374307e-45
553	LYZ	2	0.995589	2.195948	1.057025e-263
44	S100A9	2	0.991493	3.438859	2.137711e-259
46	S100A8	2	0.976530	4.182569	5.687890e-244
792	TYROBP	2	0.964748	2.505483	3.685421e-232
723	CST3	2	0.955975	2.597404	1.475804e-223
678	CCL5	3	0.964777	2.918793	3.938972e-176
822	NKG7	3	0.892556	2.525167	3.165132e-126
726	CST7	3	0.809526	2.729516	3.304554e-79
630	IL32	3	0.808673	1.165310	8.807138e-79
201	GZMA	3	0.806762	2.663258	7.832528e-78
219	CD74	4	0.984010	1.160216	1.119588e-187
801	CD79A	4	0.964204	5.356722	8.496395e-173
248	HLA-DRA	4	0.961973	1.724637	3.688188e-171
699	CD79B	4	0.940991	3.401221	3.799975e-156
275	HLA-DPB1	4	0.932506	1.705475	2.839742e-150
242	LST1	5	0.979469	2.277495	6.755443e-95
59	FCER1G	5	0.965243	2.136779	1.843033e-89
64	FCGR3A	5	0.961666	3.362263	4.040408e-88
246	AIF1	5	0.959130	2.109487	3.555060e-87
654	COTL1	5	0.958910	1.379087	4.292273e-87
823	NKG7	6	0.985672	2.808156	1.954639e-92
112	GNLY	6	0.980701	4.059274	1.363382e-90
591	GZMB	6	0.970438	4.586086	7.601851e-87
434	PRF1	6	0.962470	3.834529	5.420209e-84
495	CTSW	6	0.958750	2.512251	1.120704e-82
274	HLA-DPA1	7	0.968566	1.677491	3.435529e-14
220	CD74	7	0.962708	1.047246	7.101765e-14
276	HLA-DPB1	7	0.955651	1.648288	1.683378e-13
257	HLA-DRB1	7	0.954562	1.696712	1.920830e-13
249	HLA-DRA	7	0.947743	1.488289	4.360556e-13
184	PPBP	8	1.000000	7.022588	9.939164e-09
183	PF4	8	1.000000	7.779612	9.939164e-09
222	SPARC	8	1.000000	7.781240	9.939164e-09
319	GNG11	8	1.000000	8.102293	9.939164e-09
530	CD9	8	0.999966	6.545656	9.961947e-09

Preprocessing and clustering of the 3k PBMCs dataset¶

Convert 10x MEX format (Matrix Market format) to RamData¶

Loading RamData¶

View RamData¶

RamData

Preprocessing of Raw count data¶

Perform dimension reduction and clustering¶

Visualize clustering results¶

Find marker genes and annotate clusters¶

View RamData (after the analysis)¶

RamData