单细胞学习（五）：通过聚类角度判断哪些cluster可能属于同一细胞类型-吃了吃了

1.总体代码

##清空环境
rm(list=ls())
#设置工作路径
###加载所需要的包
library(Seurat)
library(tidyverse)
library(dplyr)
library(patchwork)

x=list.files()

dir = c('BC2/', "BC21/")
names(dir) = c('BC2',  'BC21')      
?Read10X

counts <- Read10X(data.dir =dir)
scRNA1 = CreateSeuratObject(counts,min.cells = 3, min.features = 200)
table(scRNA1@meta.data$orig.ident)
dir[1]

scRNAlist <- list()
for(i in 1:length(dir)){
  counts <- Read10X(data.dir = dir[i])
  scRNAlist[[i]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)
}

counts <- Read10X(data.dir = "BC2/")
scRNAlist[[1]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)

counts <- Read10X(data.dir = "BC3/")
scRNAlist[[2]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)

counts <- Read10X(data.dir = "BC5/")
scRNAlist[[3]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)



save(scRNAlist,file = "scRNAlist.Rdata")
load("F:/gl/huada/scRNAlist.Rdata")

for (i in 1:length(scRNAlist)) {
  scRNAlist[[i]] <- NormalizeData(scRNAlist[[i]])
  scRNAlist[[i]] <- FindVariableFeatures(scRNAlist[[i]], selection.method = "vst",nfeatures = 3000)
}


seur.obj <- scRNAlist[[1]]

seur.obj <- seur.obj %>%
  Seurat::NormalizeData(verbose = F) %>%
  FindVariableFeatures(selection.method = 'vst') %>%
  ScaleData(verbose = F) %>%
  RunPCA(pc.genes = seur.obj@var.genes,npcs = 100,verbose= F) %>%
  FindNeighbors(dims= 1:10) %>%
  FindClusters(resolution= 0.5) %>%
  RunUMAP(dims= 1:10)

seur.obj2 <- scRNAlist[[2]]
seur.obj2 <- seur.obj2 %>%
  Seurat::NormalizeData(verbose = F) %>%
  FindVariableFeatures(selection.method = 'vst') %>%
  ScaleData(verbose = F) %>%
  RunPCA(pc.genes = seur.obj2@var.genes,npcs = 100,verbose= F) %>%
  FindNeighbors(dims= 1:10) %>%
  FindClusters(resolution= 0.5) %>%
  RunUMAP(dims= 1:10)

#确保两个数据集的特征数相等
shared_genes <- intersect(rownames(seur.obj),row.names(seur.obj2))
?intersect

#计算cluster之和并归一化

seur.obj.matrix <- AggregateExpression(seur.obj,assays = 'RNA',features = shared_genes,slot = 'count')$RNA
#除每个cluster的重复
seur.obj.matrix <- seur.obj.matrix / rep(colSums(seur.obj.matrix),each = nrow(seur.obj.matrix))
seur.obj.matrix <- log10(seur.obj.matrix *100000 +1)

seur.obj.matrix2 <- AggregateExpression(seur.obj2,assays = 'RNA',features = shared_genes,slot = 'count')$RNA
seur.obj.matrix2 <- seur.obj.matrix2 / rep(colSums(seur.obj.matrix2),each = nrow(seur.obj.matrix2))
seur.obj.matrix2 <- log10(seur.obj.matrix2 *100000 +1)

?AggregateExpression
?rep
##为了提高准确性，先根据目标数据集中的给定细胞类型和整体细胞类型中位数的倍数变化，选择前200个；然后根据目标数据集中给定的细胞类型和其他细胞类型最大值的倍数变化，选择前200个，对两个结果合并。
cluster <- 3
seur.obj.gene <- seur.obj.matrix[,cluster + 1]
gene_fc <- seur.obj.gene / apply(seur.obj.matrix,1,median)
gene_list1 <- names(sort(gene_fc,decreasing = T)[1:200])

gene_fc <- seur.obj.gene / apply(seur.obj.matrix[,-(cluster +1)],1,max)
gene_list2 <- names(sort(gene_fc,decreasing = T)[1:200])
gene_list <- unique(c(gene_list1,gene_list2))

#3，在相关系数不为负的限制下，求Ta

Ta <- seur.obj.matrix[gene_list,cluster + 1]
Mb <- seur.obj.matrix2[gene_list,]
install.packages('lsei')
library(lsei)
solv <- nnls(Mb,Ta)
corr <- solv$x
corr

#调换预测数据集和目标数据集，重新进行上述计算
# 批量计算各个类群的回归系数
cluster <- 0
ncol(seur.obj.matrix)
list1 <- list()
for (cluster in seq(1,ncol(seur.obj.matrix))){
  seur.obj.gene <- seur.obj.matrix[,cluster]
  gene_fc <- seur.obj.gene / apply(seur.obj.matrix,1,median)
  gene_list1 <- names(sort(gene_fc,decreasing = T)[1:200])
  
  gene_fc <- seur.obj.gene / apply(seur.obj.matrix[,-(cluster)],1,max)
  gene_list2 <- names(sort(gene_fc,decreasing = T)[1:200])
  gene_list <- unique(c(gene_list1,gene_list2))
  Ta <- seur.obj.matrix[gene_list,cluster]
  Mb <- seur.obj.matrix2[gene_list,]
  solv <- nnls(Mb,Ta)
  corr <- solv$x
  list1[[cluster]] <- corr
}
cluster <- c()
list2 <- list()
for (cluster in seq(1,ncol(seur.obj.matrix2))){
  seur.obj.gene <- seur.obj.matrix2[,cluster]
  gene_fc <- seur.obj.gene / apply(seur.obj.matrix2,1,median)
  gene_list1 <- names(sort(gene_fc,decreasing = T)[1:200])
  
  gene_fc <- seur.obj.gene / apply(seur.obj.matrix2[,-(cluster)],1,max)
  gene_list2 <- names(sort(gene_fc,decreasing = T)[1:200])
  gene_list <- unique(c(gene_list1,gene_list2))
  Ta <- seur.obj.matrix2[gene_list,cluster]
  Mb <- seur.obj.matrix[gene_list,]
  solv <- nnls(Mb,Ta)
  corr <- solv$x
  list2[[cluster]] <- corr
}
mat1 <- do.call(rbind,list1)
mat2 <- do.call(rbind,list2)
?do.call
b <- 2*(mat1 + 0.01) * t(mat2 + 0.01)
?t
row.names(b) <- paste0('c',1:nrow(b)-1)
colnames(b) <- paste0('c',1:ncol(b)-1)

pheatmap::pheatmap(b,cluster_rows = F,cluster_cols = FALSE)

library(pheatmap)
install.packages('psych')
library(psych)
colnames(seur.obj@meta.data)
Idents(seur.obj) ='Seurat_clusters'
exp=AverageExpression(seur.obj)
exp
?Idents
coorda <- corr.test(exp$RNA,exp$RNA,method='spearman')
pheatmap(coorda$r)

对于不同发育时间，如果强行整合可能导致明显的批次效应，掩盖最初的结果。

2.代码说明

（1）for循环构建scRNA列表

scRNAlist <- list()
for(i in 1:length(dir)){
counts <- Read10X(data.dir = dir[i])
scRNAlist[[i]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)
}

工作环境文文件夹中有i组数据，将所有数据合并至一个list，即scRNAlist。

(2) intersect()

在两个向量上执行集并、交、(非对称!)差、相等和隶属。union(x, y)
intersect(x, y)
setdiff(x, y)
setequal(x, y)
is.element(el, set)
#is.element(x, y) is identical to x %in% y.

(3) AggregateExpression

返回每个标识类的聚合(求和)表达式值AggregateExpression(
object,
assays = NULL,
features = NULL,
return.seurat = FALSE,
group.by = “ident”,
add.ident = NULL,
slot = “data”,
verbose = TRUE,
…
)

`object`	Seurat object
`assays`	Which assays to use. Default is all assays
`features`	Features to analyze. Default is all features in the assay
`return.seurat`	Whether to return the data as a Seurat object. Default is FALSE
`group.by`	Categories for grouping (e.g, ident, replicate, celltype); ‘ident’ by default
`add.ident`	(Deprecated) Place an additional label on each cell prior to pseudobulking (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example)
`slot`	Slot(s) to use; if multiple slots are given, assumed to follow the order of ‘assays’ (if specified) or object’s assays
`verbose`	Print messages and show progress bar