Overview

In this demo, we show how to integrate 10X scRNA-seq datasets from multiple tissues in Tabula Muris using FIRM.

Download data

The scRNA-seq datasets can be downloaded from https://figshare.com/projects/Tabula_Muris_Transcriptomic_characterization_of_20_organs_and_tissues_from_Mus_musculus_at_single_cell_resolution/27733 and saved in ‘./TabulaMuris/rawdata’.

Data preparation

# Load and prepare metadata
library(Seurat)
library(FIRM)
library(RANN)

system("mkdir -p ./TabulaMuris/atlas/data")
system("mkdir -p ./TabulaMuris/atlas/data/hvg")

meta_tenx <- read.csv("./TabulaMuris/rawdata/annotations_droplet.csv")
rownames(meta_tenx) <- meta_tenx$cell
save(meta_tenx, file = "./TabulaMuris/atlas/data/meta_tenx.RData")

# Load and prepare 10X data
tissues <- as.character(unique(meta_tenx$tissue))

for (i in 1:length(tissues)){
  channels <- unique(meta_tenx$channel[which(meta_tenx$tissue == tissues[i])])
  
  for (j in 1:length(channels)){
    if (j == 1){
      umi <- Read10X(paste0("./TabulaMuris/rawdata/droplet/", tissues[i], "-", channels[j]))
      colnames(umi) <- paste0(channels[j], "_", gsub('.{2}$', '', colnames(umi)))
      cell.index <- which(colnames(umi) %in% rownames(meta_tenx)[which(meta_tenx$cell_ontology_class != 0 & meta_tenx$cell_ontology_class != "")])
      umi <- as.matrix(umi[, cell.index])
    } else {
      tmp <- Read10X(paste0("./TabulaMuris/rawdata/droplet/", tissues[i], "-", channels[j]))
      colnames(tmp) <- paste0(channels[j], "_", gsub('.{2}$', '', colnames(tmp)))
      cell.index <- which(colnames(tmp) %in% rownames(meta_tenx)[which(meta_tenx$cell_ontology_class != 0 & meta_tenx$cell_ontology_class != "")])
      umi <- cbind(umi, as.matrix(tmp[, cell.index]))
    }
  }
  
  prep_tenx <- prep_data(umi)
  Dataset <- prep_tenx$Dataset
  hvg <- prep_tenx$hvg

  save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissues[i], ".RData"))
  save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissues[i], "_hvg.RData"))
  
  tenx_obj <- CreateSeuratObject(umi)
  if (i == 1){
    umi_old <- as.matrix(tenx_obj[['RNA']]@counts)
  } else {
    umi_current <- as.matrix(tenx_obj[['RNA']]@counts)
    
    umi_new <- matrix(0, length(union(rownames(umi_old), rownames(umi_current))), ncol(umi_old) + ncol(umi_current))
    rownames(umi_new) <- union(rownames(umi_old), rownames(umi_current))
    colnames(umi_new) <- c(colnames(umi_old), colnames(umi_current))
    umi_new[rownames(umi_old), colnames(umi_old)] <- umi_old
    umi_new[rownames(umi_current), colnames(umi_current)] <- umi_current

    umi_old <- umi_new
  }
}

umi_all <- umi_new
save(umi_all, file = "./TabulaMuris/atlas/data/umi_all.RData")

Integration order

We suggest to integrate the datasets with more overlapped hvgs first.

num_hvg <- matrix(0, length(tissues), length(tissues))
rownames(num_hvg) <- tissues
colnames(num_hvg) <- tissues

for (i in 1:length(tissues)){
  for (j in 1:length(tissues)){
    load(paste0("./TabulaMuris/atlas/data/hvg/", tissues[i], "_hvg.RData"))
    hvg1 <- hvg
    load(paste0("./TabulaMuris/atlas/data/hvg/", tissues[j], "_hvg.RData"))
    hvg2 <- hvg
    
    num_hvg[i, j] <- length(intersect(hvg1, hvg2))
  }
}
print(num_hvg)
##                 Lung Marrow Tongue Spleen Heart_and_Aorta Trachea Bladder
## Lung            4000   1669   1761   1766            1760    2433    2127
## Marrow          1669   4000   1413   2242            1332    1499    1512
## Tongue          1761   1413   4000   1490            1378    1957    1934
## Spleen          1766   2242   1490   4000            1237    1597    1570
## Heart_and_Aorta 1760   1332   1378   1237            4000    1775    1827
## Trachea         2433   1499   1957   1597            1775    4000    2337
## Bladder         2127   1512   1934   1570            1827    2337    4000
## Mammary_Gland   2334   1663   1887   1777            1877    2496    2246
## Kidney          2239   1539   1769   1609            1886    2273    2172
## Thymus          1547   2096   1410   2113            1183    1388    1424
## Liver           1458   1362   1261   1440            1231    1367    1446
## Limb_Muscle     2343   1646   1849   1713            1927    2432    2208
##                 Mammary_Gland Kidney Thymus Liver Limb_Muscle
## Lung                     2334   2239   1547  1458        2343
## Marrow                   1663   1539   2096  1362        1646
## Tongue                   1887   1769   1410  1261        1849
## Spleen                   1777   1609   2113  1440        1713
## Heart_and_Aorta          1877   1886   1183  1231        1927
## Trachea                  2496   2273   1388  1367        2432
## Bladder                  2246   2172   1424  1446        2208
## Mammary_Gland            4000   2254   1517  1394        2489
## Kidney                   2254   4000   1433  1560        2242
## Thymus                   1517   1433   4000  1406        1540
## Liver                    1394   1560   1406  4000        1407
## Limb_Muscle              2489   2242   1540  1407        4000

So for 10X datasets from Tabula Muris, we will integrate datasets in the following order: (Mammary_Gland + Trachea + Limb_Muscle + Lung + Bladder + Kidney + Tongue + Heart_and_Aorta) + (Marrow + Spleen + Thymus) + Liver.

FIRM integration

Mammary_Gland + Trachea

tissue1 <- "Mammary_Gland"
tissue2 <- "Trachea"

dims <- 16
coreNum <- 30

load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg

load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg

Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)

save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))

hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))

+ Limb_Muscle

tissue1 <- "Mammary_Gland_Trachea"
tissue2 <- "Limb_Muscle"

dims <- 16
coreNum <- 30

load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg

load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg

Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)

save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))

hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))

+ Lung

tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle"
tissue2 <- "Lung"

dims <- 20
coreNum <- 30

load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg

load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg

Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)

save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))

hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))

+ Bladder

tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung"
tissue2 <- "Bladder"

dims <- 20
coreNum <- 30

load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg

load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg

Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)

save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))

hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))

+ Kidney

tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung_Bladder"
tissue2 <- "Kidney"

dims <- 20
coreNum <- 30

load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg

load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg

Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)

save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))

hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))

+ Tongue

tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung_Bladder_Kidney"
tissue2 <- "Tongue"

dims <- 20
coreNum <- 30

load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg

load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg

Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)

save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))

hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))

+ Heart_and_Aorta

tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung_Bladder_Kidney_Tongue"
tissue2 <- "Heart_and_Aorta"

dims <- 20
coreNum <- 30

load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg

load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg

Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)

save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))

hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))

Marrow + Spleen

tissue1 <- "Marrow"
tissue2 <- "Spleen"

dims <- 18
coreNum <- 30

load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg

load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg

Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)

save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))

hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))

+ Thymus

tissue1 <- "Marrow_Spleen"
tissue2 <- "Thymus"

dims <- 18
coreNum <- 30

load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg

load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg

Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)

save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))

hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))

Integrate previous two integrated datasets

tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung_Bladder_Kidney_Tongue_Heart_and_Aorta"
tissue2 <- "Marrow_Spleen_Thymus"

dims <- 20
coreNum <- 30

load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg

load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg

Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)

save(Dataset, file = paste0("./TabulaMuris/atlas/data/", tissue1, "_", tissue2, ".RData"))

hvg <- union(hvg1, hvg2)
save(hvg, file = paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_", tissue2, "_hvg.RData"))

+ Liver

tissue1 <- "Mammary_Gland_Trachea_Limb_Muscle_Lung_Bladder_Kidney_Tongue_Heart_and_Aorta_Marrow_Spleen_Thymus"
tissue2 <- "Liver"

dims <- 20
coreNum <- 30

load(paste0("./TabulaMuris/atlas/data/", tissue1, ".RData"))
Dataset1 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue1, "_hvg.RData"))
hvg1 <- hvg

load(paste0("./TabulaMuris/atlas/data/", tissue2, ".RData"))
Dataset2 <- Dataset
load(paste0("./TabulaMuris/atlas/data/hvg/", tissue2, "_hvg.RData"))
hvg2 <- hvg

Dataset <- FIRM(Dataset1, Dataset2, hvg1, hvg2, dims = dims, coreNum = coreNum)

save(Dataset, file = "./TabulaMuris/atlas/data/All.RData")

Seurat object for downstream analysis

# create Seurat object
integrated_FIRM <- CreateSeuratObject(umi_all[rownames(Dataset), colnames(Dataset)])

# normalization
integrated_FIRM <- NormalizeData(integrated_FIRM, verbose = FALSE)

# hvg 
integrated_FIRM <- FindVariableFeatures(integrated_FIRM, nfeatures = 4000, verbose = FALSE)

# put the centered integrated data in scaled data
integrated_FIRM[["RNA"]]@scale.data <- Dataset - rowMeans(Dataset)

# add meta data
integrated_FIRM <- AddMetaData(integrated_FIRM, metadata = as.character(meta_tenx[colnames(integrated_FIRM), ]$cell_ontology_class), col.name = "annotation")
integrated_FIRM <- AddMetaData(integrated_FIRM, metadata = as.character(meta_tenx[colnames(integrated_FIRM), ]$tissue), col.name = "tissue")

# PCA
integrated_FIRM <- RunPCA(integrated_FIRM, npcs = 20, verbose = FALSE)

# UMAP
integrated_FIRM <- RunUMAP(integrated_FIRM, reduction = "pca", dims = 1:20, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)

save(integrated_FIRM, file = "./TabulaMuris/atlas/integrated_FIRM.RData")

Visualization

DimPlot(integrated_FIRM, reduction = "umap", group.by = "tissue", label = T, repel = T)

library(ggplot2)
DimPlot(integrated_FIRM, reduction = "umap", group.by = "annotation", label = T, repel = T, label.size = 3) +
  guides(col = guide_legend(ncol = 1, override.aes = list(size=1)))