1.总体代码
##清空环境
rm(list=ls())
#设置工作路径
###加载所需要的包
library(Seurat)
library(tidyverse)
library(dplyr)
library(patchwork)
x=list.files()
dir = c('BC2/', "BC21/")
names(dir) = c('BC2', 'BC21')
?Read10X
counts <- Read10X(data.dir =dir)
scRNA1 = CreateSeuratObject(counts,min.cells = 3, min.features = 200)
table(scRNA1@meta.data$orig.ident)
dir[1]
scRNAlist <- list()
for(i in 1:length(dir)){
counts <- Read10X(data.dir = dir[i])
scRNAlist[[i]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)
}
counts <- Read10X(data.dir = "BC2/")
scRNAlist[[1]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)
counts <- Read10X(data.dir = "BC3/")
scRNAlist[[2]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)
counts <- Read10X(data.dir = "BC5/")
scRNAlist[[3]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)
save(scRNAlist,file = "scRNAlist.Rdata")
load("F:/gl/huada/scRNAlist.Rdata")
for (i in 1:length(scRNAlist)) {
scRNAlist[[i]] <- NormalizeData(scRNAlist[[i]])
scRNAlist[[i]] <- FindVariableFeatures(scRNAlist[[i]], selection.method = "vst",nfeatures = 3000)
}
seur.obj <- scRNAlist[[1]]
seur.obj <- seur.obj %>%
Seurat::NormalizeData(verbose = F) %>%
FindVariableFeatures(selection.method = 'vst') %>%
ScaleData(verbose = F) %>%
RunPCA(pc.genes = seur.obj@var.genes,npcs = 100,verbose= F) %>%
FindNeighbors(dims= 1:10) %>%
FindClusters(resolution= 0.5) %>%
RunUMAP(dims= 1:10)
seur.obj2 <- scRNAlist[[2]]
seur.obj2 <- seur.obj2 %>%
Seurat::NormalizeData(verbose = F) %>%
FindVariableFeatures(selection.method = 'vst') %>%
ScaleData(verbose = F) %>%
RunPCA(pc.genes = seur.obj2@var.genes,npcs = 100,verbose= F) %>%
FindNeighbors(dims= 1:10) %>%
FindClusters(resolution= 0.5) %>%
RunUMAP(dims= 1:10)
#确保两个数据集的特征数相等
shared_genes <- intersect(rownames(seur.obj),row.names(seur.obj2))
?intersect
#计算cluster之和并归一化
seur.obj.matrix <- AggregateExpression(seur.obj,assays = 'RNA',features = shared_genes,slot = 'count')$RNA
#除每个cluster的重复
seur.obj.matrix <- seur.obj.matrix / rep(colSums(seur.obj.matrix),each = nrow(seur.obj.matrix))
seur.obj.matrix <- log10(seur.obj.matrix *100000 +1)
seur.obj.matrix2 <- AggregateExpression(seur.obj2,assays = 'RNA',features = shared_genes,slot = 'count')$RNA
seur.obj.matrix2 <- seur.obj.matrix2 / rep(colSums(seur.obj.matrix2),each = nrow(seur.obj.matrix2))
seur.obj.matrix2 <- log10(seur.obj.matrix2 *100000 +1)
?AggregateExpression
?rep
##为了提高准确性,先根据目标数据集中的给定细胞类型和整体细胞类型中位数的倍数变化,选择前200个;然后根据目标数据集中给定的细胞类型和其他细胞类型最大值的倍数变化,选择前200个,对两个结果合并。
cluster <- 3
seur.obj.gene <- seur.obj.matrix[,cluster + 1]
gene_fc <- seur.obj.gene / apply(seur.obj.matrix,1,median)
gene_list1 <- names(sort(gene_fc,decreasing = T)[1:200])
gene_fc <- seur.obj.gene / apply(seur.obj.matrix[,-(cluster +1)],1,max)
gene_list2 <- names(sort(gene_fc,decreasing = T)[1:200])
gene_list <- unique(c(gene_list1,gene_list2))
#3,在相关系数不为负的限制下,求Ta
Ta <- seur.obj.matrix[gene_list,cluster + 1]
Mb <- seur.obj.matrix2[gene_list,]
install.packages('lsei')
library(lsei)
solv <- nnls(Mb,Ta)
corr <- solv$x
corr
#调换预测数据集和目标数据集,重新进行上述计算
# 批量计算各个类群的回归系数
cluster <- 0
ncol(seur.obj.matrix)
list1 <- list()
for (cluster in seq(1,ncol(seur.obj.matrix))){
seur.obj.gene <- seur.obj.matrix[,cluster]
gene_fc <- seur.obj.gene / apply(seur.obj.matrix,1,median)
gene_list1 <- names(sort(gene_fc,decreasing = T)[1:200])
gene_fc <- seur.obj.gene / apply(seur.obj.matrix[,-(cluster)],1,max)
gene_list2 <- names(sort(gene_fc,decreasing = T)[1:200])
gene_list <- unique(c(gene_list1,gene_list2))
Ta <- seur.obj.matrix[gene_list,cluster]
Mb <- seur.obj.matrix2[gene_list,]
solv <- nnls(Mb,Ta)
corr <- solv$x
list1[[cluster]] <- corr
}
cluster <- c()
list2 <- list()
for (cluster in seq(1,ncol(seur.obj.matrix2))){
seur.obj.gene <- seur.obj.matrix2[,cluster]
gene_fc <- seur.obj.gene / apply(seur.obj.matrix2,1,median)
gene_list1 <- names(sort(gene_fc,decreasing = T)[1:200])
gene_fc <- seur.obj.gene / apply(seur.obj.matrix2[,-(cluster)],1,max)
gene_list2 <- names(sort(gene_fc,decreasing = T)[1:200])
gene_list <- unique(c(gene_list1,gene_list2))
Ta <- seur.obj.matrix2[gene_list,cluster]
Mb <- seur.obj.matrix[gene_list,]
solv <- nnls(Mb,Ta)
corr <- solv$x
list2[[cluster]] <- corr
}
mat1 <- do.call(rbind,list1)
mat2 <- do.call(rbind,list2)
?do.call
b <- 2*(mat1 + 0.01) * t(mat2 + 0.01)
?t
row.names(b) <- paste0('c',1:nrow(b)-1)
colnames(b) <- paste0('c',1:ncol(b)-1)
pheatmap::pheatmap(b,cluster_rows = F,cluster_cols = FALSE)
library(pheatmap)
install.packages('psych')
library(psych)
colnames(seur.obj@meta.data)
Idents(seur.obj) ='Seurat_clusters'
exp=AverageExpression(seur.obj)
exp
?Idents
coorda <- corr.test(exp$RNA,exp$RNA,method='spearman')
pheatmap(coorda$r)
对于不同发育时间,如果强行整合可能导致明显的批次效应,掩盖最初的结果。
2.代码说明
(1)for循环构建scRNA列表
scRNAlist <- list()
for(i in 1:length(dir)){
counts <- Read10X(data.dir = dir[i])
scRNAlist[[i]] <- CreateSeuratObject(counts, min.cells = 3, min.features =300)
}
工作环境文文件夹中有i组数据,将所有数据合并至一个list,即scRNAlist。
(2) intersect()
在两个向量上执行集并、交、(非对称!)差、相等和隶属。union(x, y)
intersect(x, y)
setdiff(x, y)
setequal(x, y)
is.element(el, set)
#is.element(x, y) is identical to x %in% y.
(3) AggregateExpression
返回每个标识类的聚合(求和)表达式值AggregateExpression(
object,
assays = NULL,
features = NULL,
return.seurat = FALSE,
group.by = “ident”,
add.ident = NULL,
slot = “data”,
verbose = TRUE,
…
)
object | Seurat object |
---|---|
assays | Which assays to use. Default is all assays |
features | Features to analyze. Default is all features in the assay |
return.seurat | Whether to return the data as a Seurat object. Default is FALSE |
group.by | Categories for grouping (e.g, ident, replicate, celltype); ‘ident’ by default |
add.ident | (Deprecated) Place an additional label on each cell prior to pseudobulking (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example) |
slot | Slot(s) to use; if multiple slots are given, assumed to follow the order of ‘assays’ (if specified) or object’s assays |
verbose | Print messages and show progress bar |
(4) rep()
rep复制x中的值。它是一个泛型函数。
each
非负整数。x的每个元素每次都重复。其他输入将被强制转换为整型或双精度向量,并获取第一个元素。如果NA或无效,则视为1。
(5)nnls()
当部分或全部解值受到非负性约束时,这些函数对于解决最小二乘或二次规划问题特别有用。可以进一步限制神经网络限制系数有一个固定的正和。
(6)do.call()
根据名称或函数以及要传递给它的参数列表构造并执行函数调用。do.call(what, args, quote = FALSE, envir = parent.frame())
暂无评论内容