In this demo, we show how to integrate SS2 and 10X scRNA-seq datasets from Tabula Muris. We integrate the mammary gland scRNA-seq datasets profiled using SS2 and 10X as an example. We apply FIRM and existing methods, including Seurat, LIGER, Scanorama, to integrate the datasets.
The scRNA-seq datasets can be downloaded from https://figshare.com/projects/Tabula_Muris_Transcriptomic_characterization_of_20_organs_and_tissues_from_Mus_musculus_at_single_cell_resolution/27733 and saved in ‘./TabulaMuris/rawdata’.
# Load and prepare metadata
library(Seurat)
system("mkdir -p ./TabulaMuris/data")
meta_SS2 <- read.csv("./TabulaMuris/rawdata/annotations_facs.csv")
rownames(meta_SS2) <- meta_SS2$cell
save(meta_SS2, file = "./TabulaMuris/data/meta_SS2.RData")
meta_tenx <- read.csv("./TabulaMuris/rawdata/annotations_droplet.csv")
rownames(meta_tenx) <- meta_tenx$cell
save(meta_tenx, file = "./TabulaMuris/data/meta_tenx.RData")
meta_SS2 <- meta_SS2[, c("cell_ontology_class", "cell_ontology_id", "cluster.ids", "free_annotation", "mouse.id", "mouse.sex", "subtissue", "tissue")]
meta_SS2$method <- "SS2"
meta_tenx <- meta_tenx[, c("cell_ontology_class", "cell_ontology_id", "cluster.ids", "free_annotation", "mouse.id", "mouse.sex", "subtissue", "tissue")]
meta_tenx$method <- "10X"
meta <- rbind(meta_SS2, meta_tenx)
save(meta, file = "./TabulaMuris/data/meta.RData")
# Load and prepare SS2 data
tissue_name <- "Mammary_Gland" # For other tissues, just change the tissue_name.
counts <- read.csv(paste0("./TabulaMuris/rawdata/FACS/", tissue_name, "-counts.csv"),
row.names = 1)
counts <- as.matrix(counts)
ercc.index <- grep(pattern = "^ERCC-", x = rownames(counts), value = FALSE)
cell.index <- which(colnames(counts) %in% rownames(meta)[which(meta$cell_ontology_class != 0 & meta$cell_ontology_class != "")])
counts <- counts[-ercc.index, cell.index]
SS2_obj <- CreateSeuratObject(counts)
save(SS2_obj, file = paste0("./TabulaMuris/data/", tissue_name, "_SS2_obj.RData"))
# Load and prepare 10X data
load("./TabulaMuris/data/meta_tenx.RData")
channels <- unique(meta_tenx$channel[which(meta_tenx$tissue == tissue_name)])
for (j in 1:length(channels)){
if (j == 1){
umi <- Read10X(paste0("./TabulaMuris/rawdata/droplet/", tissue_name, "-", channels[j]))
colnames(umi) <- paste0(channels[j], "_", gsub('.{2}$', '', colnames(umi)))
cell.index <- which(colnames(umi) %in% rownames(meta_tenx)[which(meta_tenx$cell_ontology_class != 0 & meta_tenx$cell_ontology_class != "")])
umi <- as.matrix(umi[, cell.index])
} else {
tmp <- Read10X(paste0("./TabulaMuris/rawdata/droplet/", tissue_name, "-", channels[j]))
colnames(tmp) <- paste0(channels[j], "_", gsub('.{2}$', '', colnames(tmp)))
cell.index <- which(colnames(tmp) %in% rownames(meta_tenx)[which(meta_tenx$cell_ontology_class != 0 & meta_tenx$cell_ontology_class != "")])
umi <- cbind(umi, as.matrix(tmp[, cell.index]))
}
}
tenx_obj <- CreateSeuratObject(umi)
save(tenx_obj, file = paste0("./TabulaMuris/data/", tissue_name, "_tenx_obj.RData"))
library(FIRM)
library(RANN)
prep_SS2 <- prep_data(counts)
SS2 <- prep_SS2$Dataset
hvg_SS2 <- prep_SS2$hvg
prep_tenx <- prep_data(umi)
tenx <- prep_tenx$Dataset
hvg_tenx <- prep_tenx$hvg
dims <- 15
coreNum <- 30
integrated_data <- FIRM(SS2, tenx, hvg_SS2, hvg_tenx, dims = dims, coreNum = coreNum)
system("mkdir -p ./TabulaMuris/integrated_data")
save(integrated_data, file = paste0("./TabulaMuris/integrated_data/", tissue_name, "_FIRM.RData"))
# integrated counts
integrated_counts <- matrix(0, length(union(rownames(SS2_obj), rownames(tenx_obj))), ncol(SS2_obj) + ncol(tenx_obj))
rownames(integrated_counts) <- union(rownames(SS2_obj), rownames(tenx_obj))
colnames(integrated_counts) <- c(colnames(SS2_obj), colnames(tenx_obj))
integrated_counts[rownames(SS2_obj), colnames(SS2_obj)] <- as.matrix(SS2_obj[['RNA']]@counts)
integrated_counts[rownames(tenx_obj), colnames(tenx_obj)] <- as.matrix(tenx_obj[['RNA']]@counts)
# create Seurat object
integrated_FIRM <- CreateSeuratObject(integrated_counts)
integrated_FIRM <- integrated_FIRM[rownames(integrated_data), colnames(integrated_data)]
# normalization
integrated_FIRM <- NormalizeData(integrated_FIRM, verbose = FALSE)
# put the centered integrated data in scaled data
integrated_FIRM[["RNA"]]@scale.data <- integrated_data - rowMeans(integrated_data)
# add meta data
integrated_FIRM <- AddMetaData(integrated_FIRM, metadata = as.character(meta[colnames(integrated_FIRM), ]$method), col.name = "dataset")
integrated_FIRM <- AddMetaData(integrated_FIRM, metadata = as.character(meta[colnames(integrated_FIRM), ]$cell_ontology_class), col.name = "annotation")
# hvg for PCA and visualization
hvg <- intersect(hvg_SS2, hvg_tenx)
# PCA
integrated_FIRM <- RunPCA(integrated_FIRM, features = hvg, npcs = dims, verbose = FALSE)
# UMAP
integrated_FIRM <- RunUMAP(integrated_FIRM, reduction = "pca", dims = 1:dims, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)
system("mkdir -p ./TabulaMuris/integrated_seurat_obj")
save(integrated_FIRM, file = paste0("./TabulaMuris/integrated_seurat_obj/", tissue_name, "_FIRM.RData"))
DimPlot(integrated_FIRM, reduction = "umap", group.by = "dataset")
DimPlot(integrated_FIRM, reduction = "umap", group.by = "annotation", label = T, repel = T)
SS2_seurat_obj <- SS2_obj
SS2_seurat_obj <- NormalizeData(SS2_seurat_obj, verbose = FALSE)
SS2_seurat_obj <- FindVariableFeatures(SS2_seurat_obj, verbose = FALSE)
SS2_seurat_obj <- ScaleData(SS2_seurat_obj, features = hvg, verbose = FALSE)
SS2_seurat_obj <- RunPCA(SS2_seurat_obj, npcs = dims, verbose = FALSE)
tenx_seurat_obj <- tenx_obj
tenx_seurat_obj <- NormalizeData(tenx_seurat_obj, verbose = FALSE)
tenx_seurat_obj <- FindVariableFeatures(tenx_seurat_obj, verbose = FALSE)
tenx_seurat_obj <- ScaleData(tenx_seurat_obj, features = hvg, verbose = FALSE)
tenx_seurat_obj <- RunPCA(tenx_seurat_obj, npcs = dims, verbose = FALSE)
SS2_tenx <- list(SS2 = SS2_seurat_obj, tenx = tenx_seurat_obj)
anchors <- FindIntegrationAnchors(SS2_tenx, dims = 1:dims)
integrated_Seurat <- IntegrateData(anchorset = anchors, dims = 1:dims)
DefaultAssay(integrated_Seurat) <- "integrated"
integrated_Seurat <- ScaleData(integrated_Seurat, verbose = FALSE)
integrated_Seurat <- RunPCA(integrated_Seurat, npcs = dims, verbose = FALSE)
integrated_Seurat <- RunUMAP(integrated_Seurat, reduction = "pca", dims = 1:dims, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)
integrated_Seurat <- AddMetaData(integrated_Seurat, metadata = as.character(meta[colnames(integrated_Seurat), ]$method), col.name = "dataset")
integrated_Seurat <- AddMetaData(integrated_Seurat, metadata = as.character(meta[colnames(integrated_Seurat), ]$cell_ontology_class), col.name = "annotation")
save(integrated_Seurat, file = paste0("./TabulaMuris/integrated_seurat_obj/", tissue_name, "_Seurat.RData"))
DimPlot(integrated_Seurat, reduction = "umap", group.by = "dataset")
DimPlot(integrated_Seurat, reduction = "umap", group.by = "annotation", label = T, repel = T)
# integrated counts
integrated_counts <- matrix(0, length(union(rownames(SS2_obj), rownames(tenx_obj))), ncol(SS2_obj) + ncol(tenx_obj))
rownames(integrated_counts) <- union(rownames(SS2_obj), rownames(tenx_obj))
colnames(integrated_counts) <- c(colnames(SS2_obj), colnames(tenx_obj))
integrated_counts[rownames(SS2_obj), colnames(SS2_obj)] <- as.matrix(SS2_obj[['RNA']]@counts)
integrated_counts[rownames(tenx_obj), colnames(tenx_obj)] <- as.matrix(tenx_obj[['RNA']]@counts)
# create Seurat object
integrated_Harmony <- CreateSeuratObject(integrated_counts)
integrated_Harmony <- integrated_Harmony[rownames(integrated_data), colnames(integrated_data)]
# add meta data
integrated_Harmony <- AddMetaData(integrated_Harmony, metadata = as.character(meta[colnames(integrated_Harmony), ]$method), col.name = "dataset")
integrated_Harmony <- AddMetaData(integrated_Harmony, metadata = as.character(meta[colnames(integrated_Harmony), ]$cell_ontology_class), col.name = "annotation")
# normalization
integrated_Harmony <- NormalizeData(integrated_Harmony, verbose = FALSE)
# hvg
integrated_Harmony <- FindVariableFeatures(integrated_Harmony, verbose = FALSE)
# scaling
integrated_Harmony <- ScaleData(integrated_Harmony, verbose = FALSE)
# PCA
integrated_Harmony <- RunPCA(integrated_Harmony, npcs = dims, verbose = FALSE)
# Harmony
integrated_Harmony <- RunHarmony(integrated_Harmony, "dataset")
# UMAP
integrated_Harmony <- RunUMAP(integrated_Harmony, reduction = "pca", dims = 1:dims, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)
system("mkdir -p ./TabulaMuris/integrated_seurat_obj")
save(integrated_Harmony, file = paste0("./TabulaMuris/integrated_seurat_obj/", tissue_name, "_Harmony.RData"))
DimPlot(integrated_Harmony, reduction = "umap", group.by = "dataset")
DimPlot(integrated_Harmony, reduction = "umap", group.by = "annotation", label = T, repel = T)
# integrated counts
integrated_counts <- matrix(0, length(union(rownames(SS2_obj), rownames(tenx_obj))), ncol(SS2_obj) + ncol(tenx_obj))
rownames(integrated_counts) <- union(rownames(SS2_obj), rownames(tenx_obj))
colnames(integrated_counts) <- c(colnames(SS2_obj), colnames(tenx_obj))
integrated_counts[rownames(SS2_obj), colnames(SS2_obj)] <- as.matrix(SS2_obj[['RNA']]@counts)
integrated_counts[rownames(tenx_obj), colnames(tenx_obj)] <- as.matrix(tenx_obj[['RNA']]@counts)
# create Seurat object
integrated_BBKNN <- CreateSeuratObject(integrated_counts)
# add meta data
integrated_BBKNN <- AddMetaData(integrated_BBKNN, metadata = as.character(meta[colnames(integrated_BBKNN), ]$method), col.name = "dataset")
integrated_BBKNN <- AddMetaData(integrated_BBKNN, metadata = as.character(meta[colnames(integrated_BBKNN), ]$cell_ontology_class), col.name = "annotation")
# normalization
integrated_BBKNN <- NormalizeData(integrated_BBKNN, verbose = FALSE)
# hvg
integrated_BBKNN <- FindVariableFeatures(integrated_BBKNN, verbose = FALSE)
# scaling
integrated_BBKNN <- ScaleData(integrated_BBKNN, verbose = FALSE)
meta_bbknn <- cbind(as.numeric(integrated_BBKNN$dataset == "SS2"), integrated_BBKNN$annotation)
colnames(meta_bbknn) <- c("batch", "annotation")
write.csv(t(integrated_BBKNN@assays$RNA@scale.data), file = paste0("./TabulaMuris/integrated_data/", tissue_name, "_BBKNN_data.csv"))
write.csv(meta_bbknn, file = paste0("./TabulaMuris/integrated_data/", tissue_name, "_meta_bbknn.csv"))
import scanpy as sc
import pandas as pd
import numpy as np
import anndata
import bbknn
tissue_name = "Mammary_Gland"
dims = 15
data_file = ['./TabulaMuris/integrated_data/', tissue_name, '_BBKNN_data.csv']
data = pd.read_csv("".join(data_file), index_col = 0)
meta_file = ['./TabulaMuris/integrated_data/', tissue_name, '_meta_bbknn.csv']
meta = pd.read_csv("".join(meta_file), index_col = 0)
adata = anndata.AnnData(X = data, obs = meta)
sc.tl.pca(adata, n_comps=dims[i])
bbknn.bbknn(adata)
sc.tl.umap(adata)
umap = adata.obsm['X_umap']
umap_file = ['./TabulaMuris/integrated_data/', tissue_name, '_BBKNN_umap_data.txt']
np.savetxt("".join(umap_file), umap)
integrated_BBKNN <- RunUMAP(integrated_BBKNN, reduction = "pca", dims = 1:dims, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)
umap <- as.matrix(read.table(paste0("./TabulaMuris/integrated_data/", tissue_name, "_BBKNN_umap_data.txt")))
rownames(umap) <- rownames(integrated_BBKNN@reductions$umap@cell.embeddings)
colnames(umap) <- colnames(integrated_BBKNN@reductions$umap@cell.embeddings)
integrated_BBKNN@reductions$umap@cell.embeddings <- umap
save(integrated_BBKNN, file = paste0("./TabulaMuris/integrated_seurat_obj/", tissue_name, "_BBKNN.RData"))
DimPlot(integrated_BBKNN, reduction = "umap", group.by = "dataset")
DimPlot(integrated_BBKNN, reduction = "umap", group.by = "annotation", label = T, repel = T)
raw_data_SS2 <- SS2_obj[['RNA']]@counts[hvg, ]
raw_data_tenX <- tenx_obj[['RNA']]@counts[hvg, ]
raw_data_SS2 <- as.numeric(unlist(raw_data_SS2))
raw_data_tenX <- as.numeric(unlist(raw_data_tenX))
n_hvg <- length(hvg)
CountData <- list()
CountData[[1]] <- matrix(raw_data_tenX, nrow = n_hvg)
CountData[[2]] <- matrix(raw_data_SS2, nrow = n_hvg)
adjusted_values = function(Truereads, .B, .nb, .N, .G, .K,
.alpha,.beta,.nu,.delta,.phi,.w){
.w = .w + 1
.b = unlist(sapply(seq_len(.B), function(b) rep(b, .nb[b])))
global_u = array(runif(.N*.G),c(.G,.N))
get_corrected_read = function(i){
b = .b[i]
p_x <- pnbinom(Truereads[,i], size = .phi[,b], mu = exp(.alpha + .beta[,.w[i]] + .nu[,b] + .delta[i]))
p_xminus1 <- pnbinom(Truereads[,i] - 1, size = .phi[,b], mu = exp(.alpha + .beta[,.w[i]] + .nu[,b] + .delta[i]))
local_u = mapply(function(u) min(u, .999), global_u[,i]*(p_x - p_xminus1) + p_xminus1)
return(qnbinom(local_u, size = .phi[,1], mu = exp(.alpha + .beta[,.w[i]])))
}
return(mapply(get_corrected_read, seq_len(.N)))
}
for (K in c(5, 10, 15)){
proj = paste0(tissue_name, "_K", K)
system(paste0("mkdir -p ./TabulaMuris/integrated_data/BUSseq-1.0-master/", proj))
system(paste0("mkdir -p ./TabulaMuris/integrated_data/BUSseq-1.0-master/", proj, "/RawCountData"))
readcount <- cbind(CountData[[1]], CountData[[2]])
write.table(readcount, file = paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/", proj, "/RawCountData/count_data_", proj, "_v1.txt"),row.names = FALSE,col.names = FALSE)
nb <- c(dim(CountData[[1]])[2], dim(CountData[[2]])[2])
N <- sum(nb)
B <- 2
G <- n_hvg
dim <- c(N,G,B,nb)
write.table(dim, file = paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/", proj, "/RawCountData/dim_", proj, "_v1.txt"), row.names = FALSE, col.names = FALSE)
metadata <- data.frame(batch = paste0("Batch_",rep(1:B,nb)))
write.table(metadata, file = paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/", proj, "/RawCountData/metadata_", proj, "_v1.txt"), quote = FALSE, row.names = FALSE)
system(paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/BUSseq -d./ -r./TabulaMuris/integrated_data/BUSseq-1.0-master/", proj, "/RawCountData/ -p ", proj, " -v 1 -K ", K, " -i 8000 -o 2000 -s 1234 -c 30"))
system(paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/BUSseq_inference -d./ -r./TabulaMuris/integrated_data/BUSseq-1.0-master/", proj, "/RawCountData/ -p ", proj, " -v 1 -K ", K, " -i 8000 -b 4000 -c 30"))
filename_inference <- paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/", proj, "/Inference_K", K)
w.est <- read.table(paste0(filename_inference, "/w_est.txt"))
w_BUSseq <- unlist(w.est)
alpha.est <- read.table(paste0(filename_inference, "/alpha_est.txt"))
alpha.est <- unlist(alpha.est)
beta.est <- read.table(paste0(filename_inference, "/beta_est.txt"))
beta.est <- matrix(unlist(beta.est),G,K)
logmu.est<-beta.est+alpha.est
nu.est <- read.table(paste0(filename_inference, "/nu_est.txt"))
nu.est <- matrix(unlist(nu.est),G,B)
delta.est <- read.table(paste0(filename_inference, "/delta_est.txt"))
delta.est <- unlist(delta.est)
gamma.est <- read.table(paste0(filename_inference, "/gamma_est.txt"))
gamma.est <- matrix(unlist(gamma.est),B,2)
phi.est <- read.table(paste0(filename_inference, "/phi_est.txt"))
phi.est <- matrix(unlist(phi.est),G,B)
pi.est <- read.table(paste0(filename_inference, "/pi_est.txt"))
pi.est <- matrix(unlist(pi.est),B,K)
order.est<-order(pi.est[1,],decreasing = T)
p.est <- read.table(paste0(filename_inference, "/p_est.txt"))
tau0.est <- read.table(paste0(filename_inference, "/tau0_est.txt"))
PPI.est <- read.table(paste0(filename_inference, "/PPI_est.txt"))
D.est <- unlist(read.table(paste0(filename_inference, "/IG_est.txt")))
x_imputed <- read.table(paste0(filename_inference, "/x_imputed.txt"))
x_corrected<-adjusted_values(x_imputed, B, nb, N, G, K,
alpha.est,beta.est,nu.est,delta.est,phi.est,w_BUSseq)
write.table(x_corrected, file = paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/", proj, "/x_corrected.txt"), row.names = FALSE, col.names = FALSE)
}
raw_data_SS2 <- SS2_obj[['RNA']]@counts[hvg, ]
raw_data_tenX <- tenx_obj[['RNA']]@counts[hvg, ]
cells_tenx <- colnames(raw_data_tenX)
cells_ss2 <- colnames(raw_data_SS2)
cells <- c(cells_tenx, cells_ss2)
n_hvg <- length(hvg)
BIC_dirs = c(paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/", tissue_name, "_K5/Inference_K5/BIC.txt"),
paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/", tissue_name, "_K10/Inference_K10/BIC.txt"),
paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/", tissue_name, "_K15/Inference_K15/BIC.txt"))
correctx_dirs = c(paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/", tissue_name, "_K5/x_corrected.txt"),
paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/", tissue_name, "_K10/x_corrected.txt"),
paste0("./TabulaMuris/integrated_data/BUSseq-1.0-master/", tissue_name, "_K15/x_corrected.txt"))
BIC = rep(NA, 3)
for (j in 1:3){
BIC[j] = drop(read.table(BIC_dirs[j])[1,1])
}
j = which(BIC == min(BIC))
corrected_counts = read.table(correctx_dirs[j])
corrected_counts = as.matrix(corrected_counts, nrow = n_hvg)
rownames(corrected_counts) <- hvg
colnames(corrected_counts) <- cells
integrated_BUSseq <- CreateSeuratObject(counts = corrected_counts)
integrated_BUSseq <- NormalizeData(integrated_BUSseq, verbose = FALSE)
integrated_BUSseq <- ScaleData(integrated_BUSseq, features = hvg, verbose = FALSE)
integrated_BUSseq <- AddMetaData(integrated_BUSseq, metadata = as.character(meta[colnames(integrated_BUSseq), ]$method), col.name = "dataset")
integrated_BUSseq <- AddMetaData(integrated_BUSseq, metadata = as.character(meta[colnames(integrated_BUSseq), ]$cell_ontology_class), col.name = "annotation")
integrated_BUSseq <- RunPCA(integrated_BUSseq, features = hvg, npcs = dims, verbose = FALSE)
integrated_BUSseq <- RunUMAP(integrated_BUSseq, reduction = "pca", dims = 1:dims, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)
save(integrated_BUSseq, file = paste0("./TabulaMuris/integrated_seurat_obj/", tissue_name, "_BUSseq.RData"))
DimPlot(integrated_BUSseq, reduction = "umap", group.by = "dataset")
DimPlot(integrated_BUSseq, reduction = "umap", group.by = "annotation", label = T, repel = T)
library(liger)
liger_result = createLiger(list(SS2 = as.matrix(SS2_obj[['RNA']]@counts), tenx = tenx_obj[['RNA']]@counts))
liger_result = liger::normalize(liger_result)
liger_result = selectGenes(liger_result)
liger_result@var.genes <- hvg
liger_result = scaleNotCenter(liger_result)
liger_result = optimizeALS(liger_result, k = dims)
liger_result = quantileAlignSNF(liger_result)
save(liger_result, file = paste0("./TabulaMuris/integrated_data/", tissue_name, "_LIGER.RData"))
integrated_counts <- matrix(0, length(union(rownames(SS2_obj), rownames(tenx_obj))), ncol(SS2_obj) + ncol(tenx_obj))
rownames(integrated_counts) <- union(rownames(SS2_obj), rownames(tenx_obj))
colnames(integrated_counts) <- c(colnames(SS2_obj), colnames(tenx_obj))
integrated_counts[rownames(SS2_obj), colnames(SS2_obj)] <- as.matrix(SS2_obj[['RNA']]@counts)
integrated_counts[rownames(tenx_obj), colnames(tenx_obj)] <- as.matrix(tenx_obj[['RNA']]@counts)
H.norm <- liger_result@H.norm
cell <- rownames(H.norm)
integrated_counts <- integrated_counts[, cell]
integrated_LIGER <- CreateSeuratObject(integrated_counts)
integrated_LIGER <- NormalizeData(integrated_LIGER, verbose = FALSE)
integrated_LIGER <- ScaleData(integrated_LIGER, features = hvg, verbose = FALSE)
integrated_LIGER <- AddMetaData(integrated_LIGER, metadata = as.character(meta[colnames(integrated_LIGER), ]$method), col.name = "dataset")
integrated_LIGER <- AddMetaData(integrated_LIGER, metadata = as.character(meta[colnames(integrated_LIGER), ]$cell_ontology_class), col.name = "annotation")
integrated_LIGER <- RunPCA(integrated_LIGER, features = hvg, npcs = dims, verbose = FALSE)
colnames(H.norm) <- colnames(integrated_LIGER@reductions$pca@cell.embeddings)
integrated_LIGER@reductions$pca@cell.embeddings <- H.norm
integrated_LIGER <- RunUMAP(integrated_LIGER, reduction = "pca", dims = 1:dims, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)
save(integrated_LIGER, file = paste0("./TabulaMuris/integrated_seurat_obj/", tissue_name, "_LIGER.RData"))
DimPlot(integrated_LIGER, reduction = "umap", group.by = "dataset")
DimPlot(integrated_LIGER, reduction = "umap", group.by = "annotation", label = T, repel = T)
library(reticulate)
scanorama <- import('scanorama')
datasets <- list(t(as.matrix(SS2_obj[['RNA']]@counts[hvg, ])), t(as.matrix(tenx_obj[['RNA']]@counts[hvg, ])))
genes_list <- list(hvg, hvg)
corrected.data <- scanorama$correct(datasets, genes_list, dimred = as.integer(100), return_dense=TRUE)
corrected.data <- t(rbind(corrected.data[[1]][[1]], corrected.data[[1]][[2]]))
colnames(corrected.data) <- c(colnames(SS2_obj), colnames(tenx_obj))
rownames(corrected.data) <- hvg
save(corrected.data, file = paste0("./TabulaMuris/integrated_data/", tissue_name, "_Scanorama.RData"))
integrated_counts <- matrix(0, length(union(rownames(SS2_obj), rownames(tenx_obj))), ncol(SS2_obj) + ncol(tenx_obj))
rownames(integrated_counts) <- union(rownames(SS2_obj), rownames(tenx_obj))
colnames(integrated_counts) <- c(colnames(SS2_obj), colnames(tenx_obj))
integrated_counts[rownames(SS2_obj), colnames(SS2_obj)] <- as.matrix(SS2_obj[['RNA']]@counts)
integrated_counts[rownames(tenx_obj), colnames(tenx_obj)] <- as.matrix(tenx_obj[['RNA']]@counts)
integrated_Scanorama <- CreateSeuratObject(integrated_counts[, colnames(corrected.data)])
integrated_Scanorama[['RNA']]@data <- corrected.data
integrated_Scanorama <- ScaleData(integrated_Scanorama, features = hvg, verbose = FALSE)
integrated_Scanorama <- AddMetaData(integrated_Scanorama, metadata = as.character(meta[colnames(integrated_Scanorama), ]$method), col.name = "dataset")
integrated_Scanorama <- AddMetaData(integrated_Scanorama, metadata = as.character(meta[colnames(integrated_Scanorama), ]$cell_ontology_class), col.name = "annotation")
integrated_Scanorama <- RunPCA(integrated_Scanorama, features = hvg, npcs = dims, verbose = FALSE)
integrated_Scanorama <- RunUMAP(integrated_Scanorama, reduction = "pca", dims = 1:dims, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)
save(integrated_Scanorama, file = paste0("./TabulaMuris/integrated_seurat_obj/", tissue_name, "_Scanorama.RData"))
DimPlot(integrated_Scanorama, reduction = "umap", group.by = "dataset")
DimPlot(integrated_Scanorama, reduction = "umap", group.by = "annotation", label = T, repel = T)
library(batchelor)
SS2_obj <- NormalizeData(SS2_obj, verbose = FALSE)
tenx_obj <- NormalizeData(tenx_obj, verbose = FALSE)
mnnc.results <- mnnCorrect(as.matrix(SS2_obj@assays$RNA@data[hvg, ]), as.matrix(tenx_obj@assays$RNA@data[hvg, ]))
mnnc.combined <- assay(mnnc.results)
save(mnnc.combined, file = paste0("./TabulaMuris/integrated_data/", tissue_name, "_MNN.RData"))
integrated_counts <- matrix(0, length(union(rownames(SS2_obj), rownames(tenx_obj))), ncol(SS2_obj) + ncol(tenx_obj))
rownames(integrated_counts) <- union(rownames(SS2_obj), rownames(tenx_obj))
colnames(integrated_counts) <- c(colnames(SS2_obj), colnames(tenx_obj))
integrated_counts[rownames(SS2_obj), colnames(SS2_obj)] <- as.matrix(SS2_obj[['RNA']]@counts)
integrated_counts[rownames(tenx_obj), colnames(tenx_obj)] <- as.matrix(tenx_obj[['RNA']]@counts)
integrated_MNN <- CreateSeuratObject(integrated_counts[, colnames(mnnc.combined)])
integrated_MNN[['RNA']]@data <- mnnc.combined
integrated_MNN <- ScaleData(integrated_MNN, features = hvg, verbose = FALSE)
integrated_MNN <- AddMetaData(integrated_MNN, metadata = as.character(meta[colnames(integrated_MNN), ]$method), col.name = "dataset")
integrated_MNN <- AddMetaData(integrated_MNN, metadata = as.character(meta[colnames(integrated_MNN), ]$cell_ontology_class), col.name = "annotation")
integrated_MNN <- RunPCA(integrated_MNN, features = hvg, npcs = dims, verbose = FALSE)
integrated_MNN <- RunUMAP(integrated_MNN, reduction = "pca", dims = 1:dims, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)
save(integrated_MNN, file = paste0("./TabulaMuris/integrated_seurat_obj/", tissue_name, "_MNN.RData"))
DimPlot(integrated_MNN, reduction = "umap", group.by = "dataset")
DimPlot(integrated_MNN, reduction = "umap", group.by = "annotation", label = T, repel = T)
gene_all <- intersect(rownames(SS2_obj), rownames(tenx_obj))
counts_all <- as.matrix(cbind(SS2_obj[['RNA']]@counts[gene_all, ], tenx_obj[['RNA']]@counts[gene_all, ]))
ind <- which(colMeans(counts_all) == 0)
if (length(ind) != 0){
counts_all <- counts[, -ind]
}
counts_all <- as.matrix(counts_all)
meta_tmp = meta[colnames(counts_all), c("cell_ontology_class", "method")]
write.csv(counts_all, file = paste0("./TabulaMuris/integrated_data/", tissue_name, "_scVI_counts_all.csv"))
write.csv(hvg, file = paste0("./TabulaMuris/integrated_data/", tissue_name, "_hvg.csv"), row.names = FALSE)
write.csv(meta_tmp, file = paste0("./TabulaMuris/integrated_data/", tissue_name, "_meta.csv"))
import scvi
import scanpy as sc
import pandas as pd
import numpy as np
tissue_name = "Mammary_Gland"
dims = 15
counts_file = ['./TabulaMuris/integrated_data/', tissue_name, '_scVI_counts_all.csv']
counts = pd.read_csv("".join(counts_file), index_col = 0)
meta_file = ['./TabulaMuris/integrated_data/', tissue_name, '_meta.csv']
meta = pd.read_csv("".join(meta_file), index_col = 0)
hvg_file = ['./TabulaMuris/integrated_data/', tissue_name, '_hvg.csv']
hvg = pd.read_csv("".join(hvg_file))
SS2_10X = sc.AnnData(X = counts.T, obs = meta)
SS2_10X.layers["counts"] = SS2_10X.X.copy()
sc.pp.normalize_total(SS2_10X, target_sum = 1e4)
sc.pp.log1p(SS2_10X)
SS2_10X.raw = SS2_10X
SS2_10X = SS2_10X[:, hvg['x']]
SS2_10X = SS2_10X.copy()
scvi.data.setup_anndata(SS2_10X, layer = "counts", batch_key = "method")
vae = scvi.model.SCVI(SS2_10X, n_latent = dims)
vae.train()
latent = vae.get_latent_representation()
latent_file = ['./TabulaMuris/integrated_data/', tissue_name, '_latent_scVI.txt']
np.savetxt("".join(latent_file), latent)
latent <- read.table(paste0("./TabulaMuris/integrated_data/", tissue_name, "_latent_scVI.txt"))
integrated_counts <- matrix(0, length(union(rownames(SS2_obj), rownames(tenx_obj))), ncol(SS2_obj) + ncol(tenx_obj))
rownames(integrated_counts) <- union(rownames(SS2_obj), rownames(tenx_obj))
colnames(integrated_counts) <- c(colnames(SS2_obj), colnames(tenx_obj))
integrated_counts[rownames(SS2_obj), colnames(SS2_obj)] <- as.matrix(SS2_obj[['RNA']]@counts)
integrated_counts[rownames(tenx_obj), colnames(tenx_obj)] <- as.matrix(tenx_obj[['RNA']]@counts)
integrated_scVI <- CreateSeuratObject(integrated_counts[, colnames(counts_all)])
integrated_scVI <- NormalizeData(integrated_scVI, verbose = FALSE)
integrated_scVI <- ScaleData(integrated_scVI, features = hvg, verbose = FALSE)
integrated_scVI <- AddMetaData(integrated_scVI, metadata = as.character(meta[colnames(integrated_scVI), ]$method), col.name = "dataset")
integrated_scVI <- AddMetaData(integrated_scVI, metadata = as.character(meta[colnames(integrated_scVI), ]$cell_ontology_class), col.name = "annotation")
integrated_scVI <- RunPCA(integrated_scVI, features = hvg, npcs = dims, verbose = FALSE)
colnames(latent) <- colnames(integrated_scVI@reductions$pca@cell.embeddings)
rownames(latent) <- rownames(integrated_scVI@reductions$pca@cell.embeddings)
integrated_scVI@reductions$pca@cell.embeddings <- as.matrix(latent)
integrated_scVI <- RunUMAP(integrated_scVI, reduction = "pca", dims = 1:dims, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)
save(integrated_scVI, file = paste0("./TabulaMuris/integrated_seurat_obj/", tissue_name, "_scVI.RData"))
DimPlot(integrated_scVI, reduction = "umap", group.by = "dataset")
DimPlot(integrated_scVI, reduction = "umap", group.by = "annotation", label = T, repel = T)
library(zinbwave)
SS2_counts <- as.matrix(SS2_obj[['RNA']]@counts[hvg, ])
tenx_counts <- as.matrix(tenx_obj[['RNA']]@counts[hvg, ])
ind <- which(colMeans(SS2_counts) == 0)
if (length(ind) != 0){
SS2_counts <- SS2_counts[, -ind]
tenx_counts <- tenx_counts[, -ind]
}
ind <- which(colMeans(tenx_counts) == 0)
if (length(ind) != 0){
SS2_counts <- SS2_counts[, -ind]
tenx_counts <- tenx_counts[, -ind]
}
datasets <- cbind(SS2_counts[hvg, ], tenx_counts[hvg, ])
batch <- factor(c(rep(1, ncol(SS2_counts)), rep(2, ncol(tenx_counts))))
zinb_batch <- zinbFit(datasets, K = dims, X = model.matrix(~batch))
W <- zinb_batch@W
rownames(W) <- c(colnames(SS2_obj), colnames(tenx_obj))
save(W, file = paste0("./TabulaMuris/integrated_data/", tissue_name, "_zinbwave.RData"))
integrated_counts <- matrix(0, length(union(rownames(SS2_obj), rownames(tenx_obj))), ncol(SS2_obj) + ncol(tenx_obj))
rownames(integrated_counts) <- union(rownames(SS2_obj), rownames(tenx_obj))
colnames(integrated_counts) <- c(colnames(SS2_obj), colnames(tenx_obj))
integrated_counts[rownames(SS2_obj), colnames(SS2_obj)] <- as.matrix(SS2_obj[['RNA']]@counts)
integrated_counts[rownames(tenx_obj), colnames(tenx_obj)] <- as.matrix(tenx_obj[['RNA']]@counts)
integrated_zinbwave <- CreateSeuratObject(integrated_counts[, rownames(W)])
integrated_zinbwave <- NormalizeData(integrated_zinbwave, verbose = FALSE)
integrated_zinbwave <- ScaleData(integrated_zinbwave, features = hvg, verbose = FALSE)
integrated_zinbwave <- AddMetaData(integrated_zinbwave, metadata = as.character(meta[colnames(integrated_zinbwave), ]$method), col.name = "dataset")
integrated_zinbwave <- AddMetaData(integrated_zinbwave, metadata = as.character(meta[colnames(integrated_zinbwave), ]$cell_ontology_class), col.name = "annotation")
integrated_zinbwave <- RunPCA(integrated_zinbwave, features = hvg, npcs = dims, verbose = FALSE)
colnames(W) <- colnames(integrated_zinbwave@reductions$pca@cell.embeddings)
integrated_zinbwave@reductions$pca@cell.embeddings <- W
integrated_zinbwave <- RunUMAP(integrated_zinbwave, reduction = "pca", dims = 1:dims, umap.method = 'umap-learn', metric = "correlation", verbose = FALSE)
save(integrated_zinbwave, file = paste0("./TabulaMuris/integrated_seurat_obj/", tissue_name, "_zinbwave.RData"))
DimPlot(integrated_zinbwave, reduction = "umap", group.by = "dataset")
DimPlot(integrated_zinbwave, reduction = "umap", group.by = "annotation", label = T, repel = T)
library(Seurat)
library(ggplot2)
library(FIRM)
library(RANN)
library(cluster)
library(mclust)
# Mixing metric
MixingMetric_all <- numeric(10)
names(MixingMetric_all) <- c("FIRM", "Seurat", "Harmony", "BBKNN", "BUSseq", "LIGER", "Scanorama", "MNN", "scVI", "ZINB-WaVE")
MixingMetric_all[1] <- mean(Mixing_Metric(integrated_FIRM@reductions$pca@cell.embeddings, integrated_FIRM$dataset))
MixingMetric_all[2] <- mean(Mixing_Metric(integrated_Seurat@reductions$pca@cell.embeddings, integrated_Seurat$dataset))
MixingMetric_all[3] <- mean(Mixing_Metric(integrated_Harmony@reductions$harmony@cell.embeddings, integrated_Harmony$dataset))
MixingMetric_all[4] <- mean(Mixing_Metric(integrated_BBKNN@reductions$umap@cell.embeddings, integrated_BBKNN$dataset))
MixingMetric_all[5] <- mean(Mixing_Metric(integrated_BUSseq@reductions$pca@cell.embeddings, integrated_BUSseq$dataset))
MixingMetric_all[6] <- mean(Mixing_Metric(integrated_LIGER@reductions$pca@cell.embeddings, integrated_LIGER$dataset))
MixingMetric_all[7] <- mean(Mixing_Metric(integrated_Scanorama@reductions$pca@cell.embeddings, integrated_Scanorama$dataset))
MixingMetric_all[8] <- mean(Mixing_Metric(integrated_MNN@reductions$pca@cell.embeddings, integrated_MNN$dataset))
MixingMetric_all[9] <- mean(Mixing_Metric(integrated_scVI@reductions$pca@cell.embeddings, integrated_scVI$dataset))
MixingMetric_all[10] <- mean(Mixing_Metric(integrated_zinbwave@reductions$pca@cell.embeddings, integrated_zinbwave$dataset))
save(MixingMetric_all, file = paste0("./TabulaMuris/", tissue_name, "_MixingMetric.RData"))
# Local structure metric
LocalStructure_all <- numeric(10)
names(LocalStructure_all) <- c("FIRM", "Seurat", "Harmony", "BBKNN", "BUSseq", "LIGER", "Scanorama", "MNN", "scVI", "ZINB-WaVE")
Local_Struct_Harmony <- function(SS2, tenx, integrated, dims = 30, neighbors = 20) {
nn.SS2 <- nn2(SS2@reductions$pca@cell.embeddings[, 1:dims], k = neighbors)$nn.idx
nn.tenx <- nn2(tenx@reductions$pca@cell.embeddings[, 1:dims], k = neighbors)$nn.idx
nn.corrected.SS2 <- nn2(integrated@reductions$harmony@cell.embeddings[which(integrated$dataset == unique(SS2$dataset)), 1:dims], k = neighbors)$nn.idx
nn.corrected.tenx <- nn2(integrated@reductions$harmony@cell.embeddings[which(integrated$dataset == unique(tenx$dataset)), 1:dims], k = neighbors)$nn.idx
local.struct.SS2 <- sapply(X = 1:nrow(x = nn.SS2), FUN = function(x) {
length(x = intersect(x = nn.SS2[x, ], y = nn.corrected.SS2[x, ])) / neighbors})
local.struct.tenx <- sapply(X = 1:nrow(x = nn.tenx), FUN = function(x) {
length(x = intersect(x = nn.tenx[x, ], y = nn.corrected.tenx[x, ])) / neighbors})
local.struct <- list(SS2 = local.struct.SS2, tenx = local.struct.tenx)
return(local.struct)
}
Local_Struct_BBKNN <- function(SS2, tenx, integrated, dims = 30, neighbors = 20) {
nn.SS2 <- nn2(SS2@reductions$pca@cell.embeddings[, 1:dims], k = neighbors)$nn.idx
nn.tenx <- nn2(tenx@reductions$pca@cell.embeddings[, 1:dims], k = neighbors)$nn.idx
nn.corrected.SS2 <- nn2(integrated@reductions$umap@cell.embeddings[which(integrated$dataset == unique(SS2$dataset)), ], k = neighbors)$nn.idx
nn.corrected.tenx <- nn2(integrated@reductions$umap@cell.embeddings[which(integrated$dataset == unique(tenx$dataset)), ], k = neighbors)$nn.idx
local.struct.SS2 <- sapply(X = 1:nrow(x = nn.SS2), FUN = function(x) {
length(x = intersect(x = nn.SS2[x, ], y = nn.corrected.SS2[x, ])) / neighbors})
local.struct.tenx <- sapply(X = 1:nrow(x = nn.tenx), FUN = function(x) {
length(x = intersect(x = nn.tenx[x, ], y = nn.corrected.tenx[x, ])) / neighbors})
local.struct <- list(SS2 = local.struct.SS2, tenx = local.struct.tenx)
return(local.struct)
}
SS2_seurat_obj$dataset <- "SS2"
tenx_seurat_obj$dataset <- "10X"
SS2_FIRM <- SS2_seurat_obj[, colnames(integrated_FIRM)[which(integrated_FIRM$dataset == "SS2")]]
tenx_FIRM <- tenx_seurat_obj[, colnames(integrated_FIRM)[which(integrated_FIRM$dataset == "10X")]]
LocalStruct_FIRM <- Local_Struct(SS2_FIRM, tenx_FIRM, integrated_FIRM, dims = dims)
SS2_Seurat <- SS2_seurat_obj[, colnames(integrated_Seurat)[which(integrated_Seurat$dataset == "SS2")]]
tenx_Seurat <- tenx_seurat_obj[, colnames(integrated_Seurat)[which(integrated_Seurat$dataset == "10X")]]
LocalStruct_Seurat <- Local_Struct(SS2_Seurat, tenx_Seurat, integrated_Seurat, dims = dims)
SS2_Harmony <- SS2_seurat_obj[, colnames(integrated_Harmony)[which(integrated_Harmony$dataset == "SS2")]]
tenx_Harmony <- tenx_seurat_obj[, colnames(integrated_Harmony)[which(integrated_Harmony$dataset == "10X")]]
LocalStruct_Harmony <- Local_Struct_Harmony(SS2_Harmony, tenx_Harmony, integrated_Harmony, dims = dims)
SS2_BBKNN <- SS2_seurat_obj[, colnames(integrated_BBKNN)[which(integrated_BBKNN$dataset == "SS2")]]
tenx_BBKNN <- tenx_seurat_obj[, colnames(integrated_BBKNN)[which(integrated_BBKNN$dataset == "10X")]]
LocalStruct_BBKNN <- Local_Struct_BBKNN(SS2_BBKNN, tenx_BBKNN, integrated_BBKNN, dims = dims)
SS2_BUSseq <- SS2_seurat_obj[, colnames(integrated_BUSseq)[which(integrated_BUSseq$dataset == "SS2")]]
tenx_BUSseq <- tenx_seurat_obj[, colnames(integrated_BUSseq)[which(integrated_BUSseq$dataset == "10X")]]
LocalStruct_BUSseq <- Local_Struct(SS2_BUSseq, tenx_BUSseq, integrated_BUSseq, dims = dims)
SS2_LIGER <- SS2_seurat_obj[, colnames(integrated_LIGER)[which(integrated_LIGER$dataset == "SS2")]]
tenx_LIGER <- tenx_seurat_obj[, colnames(integrated_LIGER)[which(integrated_LIGER$dataset == "10X")]]
LocalStruct_LIGER <- Local_Struct(SS2_LIGER, tenx_LIGER, integrated_LIGER, dims = dims)
SS2_Scanorama <- SS2_seurat_obj[, colnames(integrated_Scanorama)[which(integrated_Scanorama$dataset == "SS2")]]
tenx_Scanorama <- tenx_seurat_obj[, colnames(integrated_Scanorama)[which(integrated_Scanorama$dataset == "10X")]]
LocalStruct_Scanorama <- Local_Struct(SS2_Scanorama, tenx_Scanorama, integrated_Scanorama, dims = dims)
SS2_MNN <- SS2_seurat_obj[, colnames(integrated_MNN)[which(integrated_MNN$dataset == "SS2")]]
tenx_MNN <- tenx_seurat_obj[, colnames(integrated_MNN)[which(integrated_MNN$dataset == "10X")]]
LocalStruct_MNN <- Local_Struct(SS2_MNN, tenx_MNN, integrated_MNN, dims = dims)
SS2_scVI <- SS2_seurat_obj[, colnames(integrated_scVI)[which(integrated_scVI$dataset == "SS2")]]
tenx_scVI <- tenx_seurat_obj[, colnames(integrated_scVI)[which(integrated_scVI$dataset == "10X")]]
LocalStruct_scVI <- Local_Struct(SS2_scVI, tenx_scVI, integrated_scVI, dims = dims)
SS2_zinbwave <- SS2_seurat_obj[, colnames(integrated_zinbwave)[which(integrated_zinbwave$dataset == "SS2")]]
tenx_zinbwave <- tenx_seurat_obj[, colnames(integrated_zinbwave)[which(integrated_zinbwave$dataset == "10X")]]
LocalStruct_zinbwave <- Local_Struct(SS2_zinbwave, tenx_zinbwave, integrated_zinbwave, dims = dims)
LocalStructure_all[1] <- mean(c(mean(LocalStruct_FIRM$SS2), mean(LocalStruct_FIRM$tenx)))
LocalStructure_all[2] <- mean(c(mean(LocalStruct_Seurat$SS2), mean(LocalStruct_Seurat$tenx)))
LocalStructure_all[3] <- mean(c(mean(LocalStruct_Harmony$SS2), mean(LocalStruct_Harmony$tenx)))
LocalStructure_all[4] <- mean(c(mean(LocalStruct_BBKNN$SS2), mean(LocalStruct_BBKNN$tenx)))
LocalStructure_all[5] <- mean(c(mean(LocalStruct_BUSseq$SS2), mean(LocalStruct_BUSseq$tenx)))
LocalStructure_all[6] <- mean(c(mean(LocalStruct_LIGER$SS2), mean(LocalStruct_LIGER$tenx)))
LocalStructure_all[7] <- mean(c(mean(LocalStruct_Scanorama$SS2), mean(LocalStruct_Scanorama$tenx)))
LocalStructure_all[8] <- mean(c(mean(LocalStruct_MNN$SS2), mean(LocalStruct_MNN$tenx)))
LocalStructure_all[9] <- mean(c(mean(LocalStruct_scVI$SS2), mean(LocalStruct_scVI$tenx)))
LocalStructure_all[10] <- mean(c(mean(LocalStruct_zinbwave$SS2), mean(LocalStruct_zinbwave$tenx)))
save(LocalStructure_all, file = paste0("./TabulaMuris/", tissue_name, "_LocalStructure.RData"))
# ASW
ASW_all <- numeric(10)
names(ASW_all) <- c("FIRM", "Seurat", "Harmony", "BBKNN", "BUSseq", "LIGER", "Scanorama", "MNN", "scVI", "ZINB-WaVE")
ASW_all[1] <- summary(silhouette(as.numeric(as.factor(integrated_FIRM$annotation)), dist(integrated_FIRM@reductions$pca@cell.embeddings)))[["avg.width"]]
ASW_all[2] <- summary(silhouette(as.numeric(as.factor(integrated_Seurat$annotation)), dist(integrated_Seurat@reductions$pca@cell.embeddings)))[["avg.width"]]
ASW_all[3] <- summary(silhouette(as.numeric(as.factor(integrated_Harmony$annotation)), dist(integrated_Harmony@reductions$harmony@cell.embeddings)))[["avg.width"]]
ASW_all[4] <- summary(silhouette(as.numeric(as.factor(integrated_BBKNN$annotation)), dist(integrated_BBKNN@reductions$umap@cell.embeddings)))[["avg.width"]]
ASW_all[5] <- summary(silhouette(as.numeric(as.factor(integrated_BUSseq$annotation)), dist(integrated_BUSseq@reductions$pca@cell.embeddings)))[["avg.width"]]
ASW_all[6] <- summary(silhouette(as.numeric(as.factor(integrated_LIGER$annotation)), dist(integrated_LIGER@reductions$pca@cell.embeddings)))[["avg.width"]]
ASW_all[7] <- summary(silhouette(as.numeric(as.factor(integrated_Scanorama$annotation)), dist(integrated_Scanorama@reductions$pca@cell.embeddings)))[["avg.width"]]
ASW_all[8] <- summary(silhouette(as.numeric(as.factor(integrated_MNN$annotation)), dist(integrated_MNN@reductions$pca@cell.embeddings)))[["avg.width"]]
ASW_all[9] <- summary(silhouette(as.numeric(as.factor(integrated_scVI$annotation)), dist(integrated_scVI@reductions$pca@cell.embeddings)))[["avg.width"]]
ASW_all[10] <- summary(silhouette(as.numeric(as.factor(integrated_zinbwave$annotation)), dist(integrated_zinbwave@reductions$pca@cell.embeddings)))[["avg.width"]]
save(ASW_all, file = paste0("./TabulaMuris/", tissue_name, "_ASW.RData"))
# ARI
ARI_all <- numeric(10)
names(ARI_all) <- c("FIRM", "Seurat", "Harmony", "BBKNN", "BUSseq", "LIGER", "Scanorama", "MNN", "scVI", "ZINB-WaVE")
integrated_FIRM <- FindNeighbors(integrated_FIRM, dims = 1:dims, force.recalc = TRUE, verbose = FALSE)
integrated_FIRM <- FindClusters(integrated_FIRM, verbose = FALSE)
integrated_Seurat <- FindNeighbors(integrated_Seurat, dims = 1:dims, force.recalc = TRUE, verbose = FALSE)
integrated_Seurat <- FindClusters(integrated_Seurat, verbose = FALSE)
integrated_Harmony <- FindNeighbors(integrated_Harmony, reduction = "harmony", dims = 1:dims, force.recalc = TRUE, verbose = FALSE)
integrated_Harmony <- FindClusters(integrated_Harmony, verbose = FALSE)
integrated_BBKNN <- FindNeighbors(integrated_BBKNN, reduction = "umap", dims = 1:2, force.recalc = TRUE, verbose = FALSE)
integrated_BBKNN <- FindClusters(integrated_BBKNN, verbose = FALSE)
integrated_BUSseq <- FindNeighbors(integrated_BUSseq, dims = 1:dims, force.recalc = TRUE, verbose = FALSE)
integrated_BUSseq <- FindClusters(integrated_BUSseq, verbose = FALSE)
integrated_LIGER <- FindNeighbors(integrated_LIGER, dims = 1:dims, force.recalc = TRUE, verbose = FALSE)
integrated_LIGER <- FindClusters(integrated_LIGER, verbose = FALSE)
integrated_Scanorama <- FindNeighbors(integrated_Scanorama, dims = 1:dims, force.recalc = TRUE, verbose = FALSE)
integrated_Scanorama <- FindClusters(integrated_Scanorama, verbose = FALSE)
integrated_MNN <- FindNeighbors(integrated_MNN, dims = 1:dims, force.recalc = TRUE, verbose = FALSE)
integrated_MNN <- FindClusters(integrated_MNN, verbose = FALSE)
integrated_scVI <- FindNeighbors(integrated_scVI, dims = 1:dims, force.recalc = TRUE, verbose = FALSE)
integrated_scVI <- FindClusters(integrated_scVI, verbose = FALSE)
integrated_zinbwave <- FindNeighbors(integrated_zinbwave, dims = 1:dims, force.recalc = TRUE, verbose = FALSE)
integrated_zinbwave <- FindClusters(integrated_zinbwave, verbose = FALSE)
ARI_all[1] <- adjustedRandIndex(integrated_FIRM$annotation, integrated_FIRM$seurat_clusters)
ARI_all[2] <- adjustedRandIndex(integrated_Seurat$annotation, integrated_Seurat$seurat_clusters)
ARI_all[3] <- adjustedRandIndex(integrated_Harmony$annotation, integrated_Harmony$seurat_clusters)
ARI_all[4] <- adjustedRandIndex(integrated_BBKNN$annotation, integrated_BBKNN$seurat_clusters)
ARI_all[5] <- adjustedRandIndex(integrated_BUSseq$annotation, integrated_BUSseq$seurat_clusters)
ARI_all[6] <- adjustedRandIndex(integrated_LIGER$annotation, integrated_LIGER$seurat_clusters)
ARI_all[7] <- adjustedRandIndex(integrated_Scanorama$annotation, integrated_Scanorama$seurat_clusters)
ARI_all[8] <- adjustedRandIndex(integrated_MNN$annotation, integrated_MNN$seurat_clusters)
ARI_all[9] <- adjustedRandIndex(integrated_scVI$annotation, integrated_scVI$seurat_clusters)
ARI_all[10] <- adjustedRandIndex(integrated_zinbwave$annotation, integrated_zinbwave$seurat_clusters)
save(ARI_all, file = paste0("./TabulaMuris/", tissue_name, "_ARI.RData"))
library(ggplot2)
library(gridExtra)
Mixing_Metric <- data.frame(value = MixingMetric_all, method = as.factor(names(MixingMetric_all)))
Mixing_Metric$method <- factor(Mixing_Metric$method, levels = levels(Mixing_Metric$method)[c(10, 8, 6, 7, 5, 2, 1, 4, 9, 3)])
P_MixingMetric <- ggplot(Mixing_Metric, aes(fill = -value, y = value, x = method)) +
geom_bar(stat = "identity") +
ylab("Mixing metric") +
coord_flip() +
theme(axis.title.y = element_blank(),
legend.position = "none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
axis.text.y = element_text(colour = "black")) +
geom_hline(yintercept = Mixing_Metric$value[which(Mixing_Metric$method == "FIRM")], linetype = "dashed")
Local_Struct <- data.frame(value = LocalStructure_all, method = as.factor(names(LocalStructure_all)))
Local_Struct$method <- factor(Local_Struct$method, levels = levels(Local_Struct$method)[c(10, 8, 6, 7, 5, 2, 1, 4, 9, 3)])
P_LocalStruct <- ggplot(Local_Struct, aes(fill = value, y = value, x = method)) +
geom_bar(stat = "identity") +
ylab("Local structure metric") +
coord_flip() +
theme(axis.title.y = element_blank(),
legend.position = "none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
axis.text.y = element_text(colour = "black")) +
geom_hline(yintercept = Local_Struct$value[which(Local_Struct$method == "FIRM")], linetype = "dashed")
ASW <- data.frame(value = ASW_all, method = as.factor(names(ASW_all)))
ASW$method <- factor(ASW$method, levels = levels(ASW$method)[c(10, 8, 6, 7, 5, 2, 1, 4, 9, 3)])
P_ASW <- ggplot(ASW, aes(fill = value, y = value, x = method)) +
geom_bar(stat = "identity") +
ylab("ASW") +
coord_flip() +
theme(axis.title.y = element_blank(),
legend.position = "none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
axis.text.y = element_text(colour = "black")) +
geom_hline(yintercept = ASW$value[which(ASW$method == "FIRM")], linetype = "dashed")
ARI <- data.frame(value = ARI_all, method = as.factor(names(ARI_all)))
ARI$method <- factor(ARI$method, levels = levels(ARI$method)[c(10, 8, 6, 7, 5, 2, 1, 4, 9, 3)])
P_ARI <- ggplot(ARI, aes(fill = value, y = value, x = method)) +
geom_bar(stat = "identity") +
ylab("ARI") +
coord_flip() +
theme(axis.title.y = element_blank(),
legend.position = "none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
axis.text.y = element_text(colour = "black")) +
geom_hline(yintercept = ARI$value[which(ARI$method == "FIRM")], linetype = "dashed")
grid.arrange(P_MixingMetric, P_LocalStruct, P_ASW, P_ARI,
layout_matrix = matrix(c(1, 3, 2, 4), nrow = 2, ncol = 2),
widths = c(1, 1))