In this demo, we show how to integrate 10X scRNA-seq datasets from multiple tissues in Tabula Muris using FIRM.
The scRNA-seq datasets can be downloaded from https://figshare.com/projects/Tabula_Muris_Transcriptomic_characterization_of_20_organs_and_tissues_from_Mus_musculus_at_single_cell_resolution/27733 and saved in ‘./TabulaMuris/rawdata’.
# Load and prepare metadata
library(Seurat)
library(FIRM)
library(RANN)
system("mkdir -p ./TabulaMuris/atlas/data")
system("mkdir -p ./TabulaMuris/atlas/data/hvg")
meta_tenx <- read.csv("./TabulaMuris/rawdata/annotations_droplet.csv")
rownames(meta_tenx) <- meta_tenx$cell
save(meta_tenx, file = "./TabulaMuris/atlas/data/meta_tenx.RData")
# Load and prepare 10X data
tissues <- as.character(unique(meta_tenx$tissue))
for (i in 1:length(tissues)){
channels <- unique(meta_tenx$channel[which(meta_tenx$tissue == tissues[i])])
for (j in 1:length(channels)){
if (j == 1){
umi <- Read10X(paste0("./TabulaMuris/rawdata/droplet/", tissues[i], "-", channels[j]))
colnames(umi) <- paste0(channels[j], "_", gsub('.{2}$', '', colnames(umi)))
cell.index <- which(colnames(umi) %in% rownames(meta_tenx)[which(meta_tenx$cell_ontology_class != 0 & meta_tenx$cell_ontology_class != "")])
umi <- as.matrix(umi[, cell.index])
} else {
tmp <- Read10X(paste0("./TabulaMuris/rawdata/droplet/", tissues[i], "-", channels[j]))
colnames(tmp) <- paste0(channels[j], "_", gsub('.{2}$', '', colnames(tmp)))
cell.index <- which(colnames(tmp) %in% rownames(meta_tenx)[which(meta_tenx$cell_ontology_class != 0 & meta_tenx$cell_ontology_class != "")])
umi <- cbind(umi, as.matrix(tmp[, cell.index]))
}
}
prep_tenx <- prep_data(umi)
Dataset <- prep_tenx$Dataset
hvg <- prep_tenx$hvg
save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissues[i], ".RData"))
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissues[i], "_hvg.RData"))
tenx_obj <- CreateSeuratObject(umi)
if (i == 1){
umi_old <- as.matrix(tenx_obj[['RNA']]@counts)
} else {
umi_current <- as.matrix(tenx_obj[['RNA']]@counts)
umi_new <- matrix(0, length(union(rownames(umi_old), rownames(umi_current))), ncol(umi_old) + ncol(umi_current))
rownames(umi_new) <- union(rownames(umi_old), rownames(umi_current))
colnames(umi_new) <- c(colnames(umi_old), colnames(umi_current))
umi_new[rownames(umi_old), colnames(umi_old)] <- umi_old
umi_new[rownames(umi_current), colnames(umi_current)] <- umi_current
umi_old <- umi_new
}
}
umi_all <- umi_new
save(umi_all, file = "./TabulaMuris/atlas/data/umi_all.RData")
We suggest to integrate the datasets with more overlapped hvgs first.
num_hvg <- matrix(0, length(tissues), length(tissues))
rownames(num_hvg) <- tissues
colnames(num_hvg) <- tissues
for (i in 1:length(tissues)){
for (j in 1:length(tissues)){
load(paste0("./TabulaMuris/atlas/data/hvg/", tissues[i], "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/hvg/", tissues[j], "_hvg.RData"))
hvg2 <- hvg
num_hvg[i, j] <- length(intersect(hvg1, hvg2))
}
}
print(num_hvg)
## Lung Marrow Tongue Spleen Heart_and_Aorta Trachea Bladder
## Lung 4000 1669 1761 1766 1760 2433 2127
## Marrow 1669 4000 1413 2242 1332 1499 1512
## Tongue 1761 1413 4000 1490 1378 1957 1934
## Spleen 1766 2242 1490 4000 1237 1597 1570
## Heart_and_Aorta 1760 1332 1378 1237 4000 1775 1827
## Trachea 2433 1499 1957 1597 1775 4000 2337
## Bladder 2127 1512 1934 1570 1827 2337 4000
## Mammary_Gland 2334 1663 1887 1777 1877 2496 2246
## Kidney 2239 1539 1769 1609 1886 2273 2172
## Thymus 1547 2096 1410 2113 1183 1388 1424
## Liver 1458 1362 1261 1440 1231 1367 1446
## Limb_Muscle 2343 1646 1849 1713 1927 2432 2208
## Mammary_Gland Kidney Thymus Liver Limb_Muscle
## Lung 2334 2239 1547 1458 2343
## Marrow 1663 1539 2096 1362 1646
## Tongue 1887 1769 1410 1261 1849
## Spleen 1777 1609 2113 1440 1713
## Heart_and_Aorta 1877 1886 1183 1231 1927
## Trachea 2496 2273 1388 1367 2432
## Bladder 2246 2172 1424 1446 2208
## Mammary_Gland 4000 2254 1517 1394 2489
## Kidney 2254 4000 1433 1560 2242
## Thymus 1517 1433 4000 1406 1540
## Liver 1394 1560 1406 4000 1407
## Limb_Muscle 2489 2242 1540 1407 4000
So for 10X datasets from Tabula Muris, we will integrate datasets in the following order: (Mammary_Gland + Trachea + Limb_Muscle + Lung + Bladder + Kidney + Tongue + Heart_and_Aorta) + (Marrow + Spleen + Thymus) + Liver.
tissue1 <- "Mammary_Gland"
tissue2 <- "Trachea"
dims <- 16
coreNum <- 30
load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg
Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)
save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))
hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))
tissue1 <- "Mammary_Gland_Trachea"
tissue2 <- "Limb_Muscle"
dims <- 16
coreNum <- 30
load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg
Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)
save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))
hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))
tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle"
tissue2 <- "Lung"
dims <- 20
coreNum <- 30
load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg
Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)
save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))
hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))
tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung"
tissue2 <- "Bladder"
dims <- 20
coreNum <- 30
load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg
Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)
save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))
hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))
tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung_Bladder"
tissue2 <- "Kidney"
dims <- 20
coreNum <- 30
load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg
Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)
save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))
hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))
tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung_Bladder_Kidney"
tissue2 <- "Tongue"
dims <- 20
coreNum <- 30
load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg
Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)
save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))
hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))
tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung_Bladder_Kidney_Tongue"
tissue2 <- "Heart_and_Aorta"
dims <- 20
coreNum <- 30
load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg
Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)
save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))
hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))
tissue1 <- "Marrow"
tissue2 <- "Spleen"
dims <- 18
coreNum <- 30
load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg
Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)
save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))
hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))
tissue1 <- "Marrow_Spleen"
tissue2 <- "Thymus"
dims <- 18
coreNum <- 30
load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg
Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)
save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))
hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))
tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung_Bladder_Kidney_Tongue_Heart_and_Aorta"
tissue2 <- "Marrow_Spleen_Thymus"
dims <- 20
coreNum <- 30
load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg
Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)
save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))
hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))
tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung_Bladder_Kidney_Tongue_Heart_and_Aorta_Marrow_Spleen_Thymus"
tissue2 <- "Liver"
dims <- 20
coreNum <- 30
load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg
load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg
Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)
save(Dataset, file = "./TabulaMuris/atlas/data/All.RData")
# create Seurat object
integrated_FIRM <- CreateSeuratObject(umi_all[rownames(Dataset), colnames(Dataset)])
# normalization
integrated_FIRM <- NormalizeData(integrated_FIRM, verbose = FALSE)
# hvg
integrated_FIRM <- FindVariableFeatures(integrated_FIRM, nfeatures = 4000, verbose = FALSE)
# put the centered integrated data in scaled data
integrated_FIRM[["RNA"]]@scale.data <- Dataset - rowMeans(Dataset)
# add meta data
integrated_FIRM <- AddMetaData(integrated_FIRM, metadata = as.character(meta_tenx[colnames(integrated_FIRM), ]$cell_ontology_class), col.name = "annotation")
integrated_FIRM <- AddMetaData(integrated_FIRM, metadata = as.character(meta_tenx[colnames(integrated_FIRM), ]$tissue), col.name = "tissue")
# PCA
integrated_FIRM <- RunPCA(integrated_FIRM, npcs = 20, verbose = FALSE)
# UMAP
integrated_FIRM <- RunUMAP(integrated_FIRM, reduction = "pca", dims = 1:20, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)
save(integrated_FIRM, file = "./TabulaMuris/atlas/integrated_FIRM.RData")
DimPlot(integrated_FIRM, reduction = "umap", group.by = "tissue", label = T, repel = T)
library(ggplot2)
DimPlot(integrated_FIRM, reduction = "umap", group.by = "annotation", label = T, repel = T, label.size = 3) +
guides(col = guide_legend(ncol = 1, override.aes = list(size=1)))