.libPaths() #The data is too large to go to the Linux server for processing .libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2","/home/data/refdir/Rlib/", "/usr/local /lib/R/library")) getwd() dir.create("./enrichments") setwd("/home/data/t040413/silicosis/fibroblast_myofibroblast/enrichments/") ####### Difference Analysis ###############################33 library(clusterProfiler) library(Seurat) library(SeuratWrappers) library(ggplot2) library(dplyr) library(stringr) library(openxlsx) library(org.Mm.eg.db) library("reshape") library("RColorBrewer") # #remotes::install_github('satijalab/seurat-wrappers') # #usethis::create_github_token() #usethis::edit_r_environ() getwd() file="/home/data/t040413/silicosis/fibroblast_myofibroblast/enrichments/0.69" dir. create(file) setwd(file) getwd() ?loadWorkbook #library(xlsx) #Reading files is much slower than openxlsx! ! ! ! ! ## Double check the file path! ! ! ! ! ! ! ! ! ! ! ! markers = read.table("/home/data/t040413/silicosis/fibroblast_myofibroblast/markers_after_reclustername.txt",header = T) head(markers) #View(markers) names(table(markers$cluster)) #markers$"Myofibroblast/vascular smooth muscle cell"<-"myo-fib" #markers$cluster[markers$cluster=="Myofibroblast/vascular smooth muscle cell"]="fib-myo" #Change the illegal name of the 17th cluster to a normal name, because when naming the exported xlsx used custer's name names(table(markers$cluster)) k <- keys(org.Mm.eg.db, keytype = "ENTREZID") gene.list <- select(org.Mm.eg.db, keys = k, columns = c("SYMBOL", "ENSEMBL"), keytype = "ENTREZID")## 73306 3 head(gene.list) #Self-built function Enter the go enrichment entry and the name of the file to return the corresponding pdf file and xlsx file clusterProfilerOut = function(enrichRes,outfile){ enrichRes$function.gene.num = unlist(strsplit(enrichRes[,"BgRatio"],"/"))[seq(1,length(unlist(strsplit(enrichRes[,"BgRatio"], "/"))),2)] enrichRes$GeneRatio = enrichRes$Count / as.numeric(enrichRes$function.gene.num) #calculate generatio write.xlsx(enrichRes,paste0(outfile, ".xlsx"),col.names=T,row.names=F) if (dim(enrichRes)[1]>=10) {enrichRes.use = enrichRes[1:10,]} else{enrichRes.use = enrichRes[1:dim(enrichRes)[1],]}#Displayed rich Set entries no more than 10 enrichRes.use = enrichRes.use[order(enrichRes.use$GeneRatio, decreasing=T),] #Display in descending order of generatio enrichRes.use$Description = factor(enrichRes.use$Description,levels=rev(enrichRes.use$Description)) gap = (max(enrichRes.use$GeneRatio) - min(enrichRes.use$GeneRatio))/5 # make and output pdf pdf(paste0(outfile, ".pdf")) p = ggplot(enrichRes.use,mapping = aes(x=GeneRatio,y=Description)) + geom_point(aes(size=Count,color=p.adjust)) + theme_bw() + scale_color_gradient(low = "blue", high = "red") + scale_x_continuous(expand = c(gap, gap)) + ylab(NULL) + scale_y_discrete(labels=function(x) str_wrap(x, width=60)) print(p) dev.off() } getwd() #At this time, manually copy the all_cluster_markers.xlsx file to the working directory path=getwd() #dir(path) files=dir(path) files #Get all file names in the working directory getwd() ''' path="G:/silicosis/sicosis/silicosis_ST/yll/0214/harmony_cluster/d_all/r=0.6_11clusters_0.69fc" dir. create(path) setwd(path) ''' getwd() head(markers) #library(xlsx) for (i in files) { #i="all_cluster_markers.xlsx" input = read.table(paste(path, i, sep = "/"),header = T)#Read xlsx file Note that sometimes you need to add row.names and sometimes you don’t need to add head(input) # input$cluster[input$cluster=="Myofibroblast/vascular smooth muscle cell"]="fib-myo" #Change the illegal name of the 17th cluster to a normal name, because when naming the exported xlsx used custer's name input$symbol <- input$gene head(input) input$FeatureName.entrez = gene.list[match(input$symbol, gene.list$SYMBOL),"ENTREZID"]#Add the column FeatureName.entrez corresponding to symbol and entrezid on the basis of the input file head(input) #input$change=ifelse(input$avg_log2FC>0,"up","down") input.sub<-input[input$p_val_adj<0.001,] #Take out all p values less than 0.001 print(head(input. sub)) dim(input.sub) input.sub = na.omit(input[,c("cluster","FeatureName.entrez","avg_log2FC","p_val_adj")]) head(input.sub) colnames(input.sub) = c("cluster","gene", "log2FC", "PValue") print(head(input. sub)) subset_data=input.sub head(subset_data) cluster_number<-names(table(subset_data$cluster)) #Get the number of clusters in the file length(cluster_number) cluster_number str(cluster_number) names(table(subset_data$cluster))[1] subset_data[subset_data$cluster=="0",] ##Pay attention to the naming of the cluster in the file. It is best not to have special characters such as slash|, which is easy to make mistakes #Pay attention to whether the p value needs to be constrained? #Note whether a plus or minus sign is required to indicate up and down? #for (cluster_num in 0:(length(cluster_number)-1)) { #for cluster is a number ## Only look at the up-regulated genes, pay attention to modify the name! ! ! ! ! ! ! ! ! ! ! ! ! subset_data = subset(input.sub, log2FC>0.69 &PValue<0.05) head(subset_data) cluster_number<-names(table(subset_data$cluster)) #Get the number of clusters in the file cluster_number for (cluster_num in cluster_number) { ##For cluster is a character ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!! All must be changed!!!!!!!!!!!!!!!!!!!!! to change keyType = "ENTREZID", OrgDb = org.Mm.eg.db, ont = "ALL", readable = TRUE, pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH") res = ego[ego$p.adjust<0.05,] print(dim(res)) print(table(res$ONTOLOGY)) if(nrow(res) != 0){#go, use a custom function to draw pictures, pay attention to remove the file attribute name of i when naming clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_up_enrichGO")) } ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene, organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1) res = ego.KEGG[ego.KEGG$p.adjust<0.05,] print(dim(res)) if(nrow(res) !=0){#kegg clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_up_enrichKEGG")) } save(ego,ego.KEGG,file = paste0(cluster_num,"_enrichments_up.Rdata")) } ## Only look at down-regulated genes, pay attention to modify the name! ! ! ! ! ! ! ! ! ! ! ! ! subset_data = subset(input.sub, log2FC<(-0.69) &PValue<0.05) head(subset_data) cluster_number<-names(table(subset_data$cluster)) #Get the number of clusters in the file cluster_number for (cluster_num in cluster_number) { ##For cluster is a character ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!! All must be changed!!!!!!!!!!!!!!!!!!!!! to change keyType = "ENTREZID", OrgDb = org.Mm.eg.db, ont = "ALL", readable = TRUE, pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH") res = ego[ego$p.adjust<0.05,] print(dim(res)) print(table(res$ONTOLOGY)) if(nrow(res) != 0){#go, use a custom function to draw pictures, pay attention to remove the file attribute name of i when naming clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_down_enrichGO")) } ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene, organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1) res = ego.KEGG[ego.KEGG$p.adjust<0.05,] print(dim(res)) if(nrow(res) !=0){#kegg clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_down_enrichKEGG")) } } ## No distinction between up and down ################################### subset_data = subset(input.sub, abs(log2FC)>0.69 &PValue<0.05) head(subset_data) cluster_number<-names(table(subset_data$cluster)) #Get the number of clusters in the file cluster_number for (cluster_num in cluster_number) { ##For cluster is a character ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!! All must be changed!!!!!!!!!!!!!!!!!!!!! to change keyType = "ENTREZID", OrgDb = org.Mm.eg.db, ont = "ALL", readable = TRUE, pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH") res = ego[ego$p.adjust<0.05,] print(dim(res)) print(table(res$ONTOLOGY)) if(nrow(res) != 0){#go, use a custom function to draw pictures, pay attention to remove the file attribute name of i when naming clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_both_enrichGO")) } ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene, organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1) res = ego.KEGG[ego.KEGG$p.adjust<0.05,] print(dim(res)) if(nrow(res) !=0){#kegg clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_both_enrichKEGG")) } } } dev.off() path="D:/ARDS_scripts_1012/ARDS/Step2_harmony_f200_R3/ws/Fibroblast/0.5/sepsis_Fibroblast_r0.5.rds" load(path) table(subset_data$stim,subset_data$seurat_clusters) DimPlot(subset_data,group.by = "stim") DimPlot(subset_data, split.by = "stim", group.by = "seurat_clusters", label = TRUE)