单细胞学习(五):通过聚类角度判断哪些cluster可能属于同一细胞类型

1.总体代码

##清空环境
rm(list=ls())
#设置工作路径
###加载所需要的包
library(Seurat)
library(tidyverse)
library(dplyr)
library(patchwork)

x=list.files()

dir = c('BC2/', "BC21/")
names(dir) = c('BC2',  'BC21')      
?Read10X

counts <- Read10X(data.dir =dir)
scRNA1 = CreateSeuratObject(counts,min.cells = 3, min.features = 200)
table(scRNA1@meta.data$orig.ident)
dir[1]

scRNAlist <- list()
for(i in 1:length(dir)){
  counts <- Read10X(data.dir = dir[i])
  scRNAlist[[i]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)
}

counts <- Read10X(data.dir = "BC2/")
scRNAlist[[1]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)

counts <- Read10X(data.dir = "BC3/")
scRNAlist[[2]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)

counts <- Read10X(data.dir = "BC5/")
scRNAlist[[3]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)



save(scRNAlist,file = "scRNAlist.Rdata")
load("F:/gl/huada/scRNAlist.Rdata")

for (i in 1:length(scRNAlist)) {
  scRNAlist[[i]] <- NormalizeData(scRNAlist[[i]])
  scRNAlist[[i]] <- FindVariableFeatures(scRNAlist[[i]], selection.method = "vst",nfeatures = 3000)
}


seur.obj <- scRNAlist[[1]]

seur.obj <- seur.obj %>%
  Seurat::NormalizeData(verbose = F) %>%
  FindVariableFeatures(selection.method = 'vst') %>%
  ScaleData(verbose = F) %>%
  RunPCA(pc.genes = seur.obj@var.genes,npcs = 100,verbose= F) %>%
  FindNeighbors(dims= 1:10) %>%
  FindClusters(resolution= 0.5) %>%
  RunUMAP(dims= 1:10)

seur.obj2 <- scRNAlist[[2]]
seur.obj2 <- seur.obj2 %>%
  Seurat::NormalizeData(verbose = F) %>%
  FindVariableFeatures(selection.method = 'vst') %>%
  ScaleData(verbose = F) %>%
  RunPCA(pc.genes = seur.obj2@var.genes,npcs = 100,verbose= F) %>%
  FindNeighbors(dims= 1:10) %>%
  FindClusters(resolution= 0.5) %>%
  RunUMAP(dims= 1:10)

#确保两个数据集的特征数相等
shared_genes <- intersect(rownames(seur.obj),row.names(seur.obj2))
?intersect

#计算cluster之和并归一化

seur.obj.matrix <- AggregateExpression(seur.obj,assays = 'RNA',features = shared_genes,slot = 'count')$RNA
#除每个cluster的重复
seur.obj.matrix <- seur.obj.matrix / rep(colSums(seur.obj.matrix),each = nrow(seur.obj.matrix))
seur.obj.matrix <- log10(seur.obj.matrix *100000 +1)

seur.obj.matrix2 <- AggregateExpression(seur.obj2,assays = 'RNA',features = shared_genes,slot = 'count')$RNA
seur.obj.matrix2 <- seur.obj.matrix2 / rep(colSums(seur.obj.matrix2),each = nrow(seur.obj.matrix2))
seur.obj.matrix2 <- log10(seur.obj.matrix2 *100000 +1)

?AggregateExpression
?rep
##为了提高准确性,先根据目标数据集中的给定细胞类型和整体细胞类型中位数的倍数变化,选择前200个;然后根据目标数据集中给定的细胞类型和其他细胞类型最大值的倍数变化,选择前200个,对两个结果合并。
cluster <- 3
seur.obj.gene <- seur.obj.matrix[,cluster + 1]
gene_fc <- seur.obj.gene / apply(seur.obj.matrix,1,median)
gene_list1 <- names(sort(gene_fc,decreasing = T)[1:200])

gene_fc <- seur.obj.gene / apply(seur.obj.matrix[,-(cluster +1)],1,max)
gene_list2 <- names(sort(gene_fc,decreasing = T)[1:200])
gene_list <- unique(c(gene_list1,gene_list2))

#3,在相关系数不为负的限制下,求Ta

Ta <- seur.obj.matrix[gene_list,cluster + 1]
Mb <- seur.obj.matrix2[gene_list,]
install.packages('lsei')
library(lsei)
solv <- nnls(Mb,Ta)
corr <- solv$x
corr

#调换预测数据集和目标数据集,重新进行上述计算
# 批量计算各个类群的回归系数
cluster <- 0
ncol(seur.obj.matrix)
list1 <- list()
for (cluster in seq(1,ncol(seur.obj.matrix))){
  seur.obj.gene <- seur.obj.matrix[,cluster]
  gene_fc <- seur.obj.gene / apply(seur.obj.matrix,1,median)
  gene_list1 <- names(sort(gene_fc,decreasing = T)[1:200])
  
  gene_fc <- seur.obj.gene / apply(seur.obj.matrix[,-(cluster)],1,max)
  gene_list2 <- names(sort(gene_fc,decreasing = T)[1:200])
  gene_list <- unique(c(gene_list1,gene_list2))
  Ta <- seur.obj.matrix[gene_list,cluster]
  Mb <- seur.obj.matrix2[gene_list,]
  solv <- nnls(Mb,Ta)
  corr <- solv$x
  list1[[cluster]] <- corr
}
cluster <- c()
list2 <- list()
for (cluster in seq(1,ncol(seur.obj.matrix2))){
  seur.obj.gene <- seur.obj.matrix2[,cluster]
  gene_fc <- seur.obj.gene / apply(seur.obj.matrix2,1,median)
  gene_list1 <- names(sort(gene_fc,decreasing = T)[1:200])
  
  gene_fc <- seur.obj.gene / apply(seur.obj.matrix2[,-(cluster)],1,max)
  gene_list2 <- names(sort(gene_fc,decreasing = T)[1:200])
  gene_list <- unique(c(gene_list1,gene_list2))
  Ta <- seur.obj.matrix2[gene_list,cluster]
  Mb <- seur.obj.matrix[gene_list,]
  solv <- nnls(Mb,Ta)
  corr <- solv$x
  list2[[cluster]] <- corr
}
mat1 <- do.call(rbind,list1)
mat2 <- do.call(rbind,list2)
?do.call
b <- 2*(mat1 + 0.01) * t(mat2 + 0.01)
?t
row.names(b) <- paste0('c',1:nrow(b)-1)
colnames(b) <- paste0('c',1:ncol(b)-1)

pheatmap::pheatmap(b,cluster_rows = F,cluster_cols = FALSE)

library(pheatmap)
install.packages('psych')
library(psych)
colnames(seur.obj@meta.data)
Idents(seur.obj) ='Seurat_clusters'
exp=AverageExpression(seur.obj)
exp
?Idents
coorda <- corr.test(exp$RNA,exp$RNA,method='spearman')
pheatmap(coorda$r)

对于不同发育时间,如果强行整合可能导致明显的批次效应,掩盖最初的结果。

2.代码说明

(1)for循环构建scRNA列表

scRNAlist <- list()
for(i in 1:length(dir)){
 counts <- Read10X(data.dir = dir[i])
 scRNAlist[[i]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)
}

工作环境文文件夹中有i组数据,将所有数据合并至一个list,即scRNAlist。

(2) intersect()

在两个向量上执行集并、交、(非对称!)差、相等和隶属。union(x, y)
intersect(x, y)
setdiff(x, y)
setequal(x, y)
is.element(el, set)
#is.element(x, y) is identical to x %in% y.

(3) AggregateExpression

返回每个标识类的聚合(求和)表达式值AggregateExpression(
object,
assays = NULL,
features = NULL,
return.seurat = FALSE,
group.by = “ident”,
add.ident = NULL,
slot = “data”,
verbose = TRUE,

)

objectSeurat object
assaysWhich assays to use. Default is all assays
featuresFeatures to analyze. Default is all features in the assay
return.seuratWhether to return the data as a Seurat object. Default is FALSE
group.byCategories for grouping (e.g, ident, replicate, celltype); ‘ident’ by default
add.ident(Deprecated) Place an additional label on each cell prior to pseudobulking (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example)
slotSlot(s) to use; if multiple slots are given, assumed to follow the order of ‘assays’ (if specified) or object’s assays
verbosePrint messages and show progress bar

(4) rep()

rep复制x中的值。它是一个泛型函数。

each

非负整数。x的每个元素每次都重复。其他输入将被强制转换为整型或双精度向量,并获取第一个元素。如果NA或无效,则视为1。

(5)nnls()

当部分或全部解值受到非负性约束时,这些函数对于解决最小二乘或二次规划问题特别有用。可以进一步限制神经网络限制系数有一个固定的正和。

(6)do.call()

根据名称或函数以及要传递给它的参数列表构造并执行函数调用。do.call(what, args, quote = FALSE, envir = parent.frame())

图片[1]-单细胞学习(五):通过聚类角度判断哪些cluster可能属于同一细胞类型-吃了吃了
图片[2]-单细胞学习(五):通过聚类角度判断哪些cluster可能属于同一细胞类型-吃了吃了
图片[3]-单细胞学习(五):通过聚类角度判断哪些cluster可能属于同一细胞类型-吃了吃了
© 版权声明
THE END
喜欢就支持一下吧
点赞7 分享
评论 抢沙发
头像
欢迎您留下宝贵的见解!
提交
头像

昵称

取消
昵称表情代码图片

    暂无评论内容