enrichments enrichment analysis findallmarkers results in enrichment analysis mice

.libPaths()

#The data is too large to go to the Linux server for processing
.libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2","/home/data/refdir/Rlib/", "/usr/local /lib/R/library"))


getwd()

dir.create("./enrichments")
setwd("/home/data/t040413/silicosis/fibroblast_myofibroblast/enrichments/")

####### Difference Analysis
###############################33
library(clusterProfiler)
library(Seurat)
library(SeuratWrappers)
library(ggplot2)
library(dplyr)
library(stringr)
library(openxlsx)
library(org.Mm.eg.db)
library("reshape")
library("RColorBrewer")

#
#remotes::install_github('satijalab/seurat-wrappers')
#
#usethis::create_github_token()
#usethis::edit_r_environ()

getwd()
file="/home/data/t040413/silicosis/fibroblast_myofibroblast/enrichments/0.69"
dir. create(file)
setwd(file)
getwd()

?loadWorkbook
#library(xlsx) #Reading files is much slower than openxlsx! ! ! ! !
## Double check the file path! ! ! ! ! ! ! ! ! ! ! !
markers = read.table("/home/data/t040413/silicosis/fibroblast_myofibroblast/markers_after_reclustername.txt",header = T)
head(markers)
#View(markers)
names(table(markers$cluster))
#markers$"Myofibroblast/vascular smooth muscle cell"<-"myo-fib"

#markers$cluster[markers$cluster=="Myofibroblast/vascular smooth muscle cell"]="fib-myo" #Change the illegal name of the 17th cluster to a normal name, because when naming the exported xlsx used custer's name
names(table(markers$cluster))

k <- keys(org.Mm.eg.db, keytype = "ENTREZID")
gene.list <- select(org.Mm.eg.db, keys = k, columns = c("SYMBOL", "ENSEMBL"), keytype = "ENTREZID")## 73306 3
head(gene.list)

#Self-built function Enter the go enrichment entry and the name of the file to return the corresponding pdf file and xlsx file
clusterProfilerOut = function(enrichRes,outfile){
  enrichRes$function.gene.num = unlist(strsplit(enrichRes[,"BgRatio"],"/"))[seq(1,length(unlist(strsplit(enrichRes[,"BgRatio"], "/"))),2)]
  enrichRes$GeneRatio = enrichRes$Count / as.numeric(enrichRes$function.gene.num) #calculate generatio
  write.xlsx(enrichRes,paste0(outfile, ".xlsx"),col.names=T,row.names=F)
  
  if (dim(enrichRes)[1]>=10) {enrichRes.use = enrichRes[1:10,]} else{enrichRes.use = enrichRes[1:dim(enrichRes)[1],]}#Displayed rich Set entries no more than 10
  enrichRes.use = enrichRes.use[order(enrichRes.use$GeneRatio, decreasing=T),] #Display in descending order of generatio
  enrichRes.use$Description = factor(enrichRes.use$Description,levels=rev(enrichRes.use$Description))
  gap = (max(enrichRes.use$GeneRatio) - min(enrichRes.use$GeneRatio))/5
  
  # make and output pdf
  pdf(paste0(outfile, ".pdf"))
  p = ggplot(enrichRes.use,mapping = aes(x=GeneRatio,y=Description)) +
    geom_point(aes(size=Count,color=p.adjust)) + theme_bw() +
    scale_color_gradient(low = "blue", high = "red") +
    scale_x_continuous(expand = c(gap, gap)) + ylab(NULL) +
    scale_y_discrete(labels=function(x) str_wrap(x, width=60))
  print(p)
  dev.off()
}



getwd() #At this time, manually copy the all_cluster_markers.xlsx file to the working directory
path=getwd()


#dir(path)

files=dir(path)
files #Get all file names in the working directory
getwd()


'''
path="G:/silicosis/sicosis/silicosis_ST/yll/0214/harmony_cluster/d_all/r=0.6_11clusters_0.69fc"
dir. create(path)
setwd(path)
'''
getwd()
head(markers)
#library(xlsx)

for (i in files) {
  #i="all_cluster_markers.xlsx"
  input = read.table(paste(path, i, sep = "/"),header = T)#Read xlsx file Note that sometimes you need to add row.names and sometimes you don’t need to add
  head(input)
  # input$cluster[input$cluster=="Myofibroblast/vascular smooth muscle cell"]="fib-myo" #Change the illegal name of the 17th cluster to a normal name, because when naming the exported xlsx used custer's name
  
  input$symbol <- input$gene
  head(input)
  input$FeatureName.entrez = gene.list[match(input$symbol, gene.list$SYMBOL),"ENTREZID"]#Add the column FeatureName.entrez corresponding to symbol and entrezid on the basis of the input file
  head(input)
  #input$change=ifelse(input$avg_log2FC>0,"up","down")
  input.sub<-input[input$p_val_adj<0.001,] #Take out all p values less than 0.001
  print(head(input. sub))
  dim(input.sub)
  
  input.sub = na.omit(input[,c("cluster","FeatureName.entrez","avg_log2FC","p_val_adj")])
  head(input.sub)
  colnames(input.sub) = c("cluster","gene", "log2FC", "PValue")
  print(head(input. sub))
  subset_data=input.sub
  head(subset_data)
  
  
  cluster_number<-names(table(subset_data$cluster)) #Get the number of clusters in the file
  length(cluster_number)
  cluster_number
  str(cluster_number)
  
  names(table(subset_data$cluster))[1]
  subset_data[subset_data$cluster=="0",]
  
  
  ##Pay attention to the naming of the cluster in the file. It is best not to have special characters such as slash|, which is easy to make mistakes
  #Pay attention to whether the p value needs to be constrained?
  #Note whether a plus or minus sign is required to indicate up and down?
  #for (cluster_num in 0:(length(cluster_number)-1)) { #for cluster is a number
  
  ## Only look at the up-regulated genes, pay attention to modify the name! ! ! ! ! ! ! ! ! ! ! ! !
  subset_data = subset(input.sub, log2FC>0.69 &PValue<0.05)
  head(subset_data)
  cluster_number<-names(table(subset_data$cluster)) #Get the number of clusters in the file
  cluster_number
  for (cluster_num in cluster_number) { ##For cluster is a character
    
    
    ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!! All must be changed!!!!!!!!!!!!!!!!!!!!! to change
                 keyType = "ENTREZID",
                 OrgDb = org.Mm.eg.db,
                 ont = "ALL", readable = TRUE,
                 pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")
    res = ego[ego$p.adjust<0.05,]
    print(dim(res))
    print(table(res$ONTOLOGY))
    
    if(nrow(res) != 0){#go, use a custom function to draw pictures, pay attention to remove the file attribute name of i when naming
      clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_up_enrichGO"))
    }
    
    ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene,
                          organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)
    res = ego.KEGG[ego.KEGG$p.adjust<0.05,]
    print(dim(res))
    
    if(nrow(res) !=0){#kegg
      clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_up_enrichKEGG"))
    }
    
    
    
    save(ego,ego.KEGG,file = paste0(cluster_num,"_enrichments_up.Rdata"))
    
  }
  
  
  
  
  
  
  ## Only look at down-regulated genes, pay attention to modify the name! ! ! ! ! ! ! ! ! ! ! ! !
  subset_data = subset(input.sub, log2FC<(-0.69) &PValue<0.05)
  head(subset_data)
  cluster_number<-names(table(subset_data$cluster)) #Get the number of clusters in the file
  cluster_number
  for (cluster_num in cluster_number) { ##For cluster is a character
    
    
    ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!! All must be changed!!!!!!!!!!!!!!!!!!!!! to change
                 keyType = "ENTREZID",
                 OrgDb = org.Mm.eg.db,
                 ont = "ALL", readable = TRUE,
                 pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")
    res = ego[ego$p.adjust<0.05,]
    print(dim(res))
    print(table(res$ONTOLOGY))
    
    if(nrow(res) != 0){#go, use a custom function to draw pictures, pay attention to remove the file attribute name of i when naming
      clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_down_enrichGO"))
    }
    
    ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene,
                          organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)
    res = ego.KEGG[ego.KEGG$p.adjust<0.05,]
    print(dim(res))
    
    if(nrow(res) !=0){#kegg
      clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_down_enrichKEGG"))
    }
    
  }
  
  
  
  
  
  ## No distinction between up and down ###################################
  subset_data = subset(input.sub, abs(log2FC)>0.69 &PValue<0.05)
  head(subset_data)
  cluster_number<-names(table(subset_data$cluster)) #Get the number of clusters in the file
  cluster_number
  for (cluster_num in cluster_number) { ##For cluster is a character
    
    
    ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!! All must be changed!!!!!!!!!!!!!!!!!!!!! to change
                 keyType = "ENTREZID",
                 OrgDb = org.Mm.eg.db,
                 ont = "ALL", readable = TRUE,
                 pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")
    res = ego[ego$p.adjust<0.05,]
    print(dim(res))
    print(table(res$ONTOLOGY))
    
    if(nrow(res) != 0){#go, use a custom function to draw pictures, pay attention to remove the file attribute name of i when naming
      clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_both_enrichGO"))
    }
    
    ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene,
                          organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)
    res = ego.KEGG[ego.KEGG$p.adjust<0.05,]
    print(dim(res))
    
    if(nrow(res) !=0){#kegg
      clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_both_enrichKEGG"))
    }
    
  }
  
}


dev.off()







path="D:/ARDS_scripts_1012/ARDS/Step2_harmony_f200_R3/ws/Fibroblast/0.5/sepsis_Fibroblast_r0.5.rds"
load(path)
table(subset_data$stim,subset_data$seurat_clusters)

DimPlot(subset_data,group.by = "stim")
DimPlot(subset_data, split.by = "stim",
        group.by = "seurat_clusters",
        label = TRUE)