getwd() dir.create("/home/data/t040413/silicosis/fibroblast_myofibroblast/monocle5.17") setwd("/home/data/t040413/silicosis/fibroblast_myofibroblast/monocle5.17") #The data is too large to go to the Linux server for processing .libPaths(c("/home/data/refdir/Rlib/", "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2", "/usr/local /lib/R/library")) library(Seurat) load("/home/data/t040413/silicosis/fibroblast_myofibroblast/subset_data_fibroblast_myofibroblast.rds") DotPlot(subset_data,features = c("Inmt","Gpx3","Fn1","Acta2","Serpinf1","Pi16", "Col15a","Adamdec1","Npnt","Fbln1","Bmp4", 'Lrrc15',"Hhip","Comp", "Cxcl15","Cxcl12","Ccl19")) + RotatedAxis() #BiocManager::install("monocle",force = TRUE) #library(monocle) #devtools::load_all("/home/data/t040413/R/yll/usr/local/lib/R/site-library/monocle/") #library(monocle) DimPlot(subset_data, label = T) head([email protected]) subset_data$cell.type=subset_data$seurat_clusters Idents(subset_data)=subset_data$seurat_clusters #subset_data$cell.type #Idents(subset_data)=subset_data$Idents.subset_data. table(Idents(subset_data)) DefaultAssay(subset_data) new.metadata <- merge([email protected], data.frame(Idents(subset_data)), by = "row.names",sort = FALSE) head(new. metadata) rownames(new.metadata)<-new.metadata[,1] head([email protected]) identical(rownames(new.metadata),rownames([email protected])) [email protected] <- new.metadata table(subset_data$cell.type,Idents(subset_data)) head(subset_data) expression_matrix <- as(as.matrix(subset_data@assays$RNA@counts), 'sparseMatrix') head(expression_matrix) identical(colnames(expression_matrix),rownames(new.metadata)) devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/") library(Seurat) head([email protected]) cell_metadata <- new('AnnotatedDataFrame', [email protected]) head([email protected]) head(cell_metadata) gene_annotation <- new('AnnotatedDataFrame',data=data.frame(gene_short_name = row.names(subset_data), row.names = row.names(subset_data))) head(gene_annotation) fData(gene_annotation) length(Idents(subset_data)) DimPlot(subset_data,group.by = "cell.type",label = T) DimPlot(subset_data, label = T) monocle_cds <- monocle::newCellDataSet(expression_matrix, phenoData = cell_metadata, featureData = gene_annotation, lowerDetectionLimit = 0.5, expressionFamily = negbinomial. size()) #################################################### #################################### ##Normalized###### cds <- monocle_cds cds <- estimateSizeFactors(cds) cds <- estimateDispersions(cds) ## Removing 110 outliers #The cell.type below is the meta information of subset_Data #library("BiocGenerics")#Parallel computing #devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/") #https://www.jianshu.com/p/5d6fd4561bc0# diff_test_res <- differentialGeneTest(cds,fullModelFormulaStr = "~ cell.type") pData(cds) deg=subset(diff_test_res,qval<0.01) deg=deg[order(deg$qval,decreasing = T),] head(deg) ordergene=rownames(deg) pdf("train.ordergenes.pdf") plot_ordering_genes(cds) dev.off() length(ordergene) ### inference the pseudotrajectory ############################################## ################################################## # step1: select genes for ordering setOrderingFilter() # ordering_genes <- row.names (subset(diff_test_res, qval < 0.01)) length(ordering_genes)#6354 cds <- setOrderingFilter(cds, ordering_genes) # step2: dimension reduction=> reduceDimension() DDRTree # cds <- reduceDimension(cds, max_components = 2, method = 'DDRTree') #source("./order_cells.R") #unloadNamespace('monocle') #devtools::load_all("../monocle_2.26.0 (1).tar/monocle_2.26.0 (1)/monocle/") #devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/") cds <- orderCells(cds) getwd() save(diff_test_res, cds, subset_data,file = "/home/data/t040413/silicosis/fibroblast_myofibroblast/monocle5.17/cds_noyet_root_diff_genes.Rdata") devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/") load("/home/data/t040413/silicosis/fibroblast_myofibroblast/monocle5.17/cds_noyet_root_diff_genes.Rdata") pData(cds) deg=subset(diff_test_res,qval<0.01) deg=deg[order(deg$qval,decreasing = T),] head(deg) ordergene=rownames(deg) 1#Color according to cds@phenoData@data information pdf("1.pseudutime.cell.type.pre.order.pdf") plot_cell_trajectory(cds, color_by = "cell.type") dev.off() pdf("1.pseudutime.stim.pre.order.pdf") plot_cell_trajectory(cds, color_by = "stim") dev.off() pdf("1.pseudutime.State.pre.order.pdf") plot_cell_trajectory(cds, color_by = "State") dev.off() ###### split ######## pdf("2.split.pseudutime.Seurat.cell.type.pdf") plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~cell.type) dev.off() pdf("2.split.pseudutime.stim.pdf") plot_cell_trajectory(cds, color_by = "stim") + facet_wrap(~stim) dev.off() #https://www.jianshu.com/p/5d6fd4561bc0 pdf("4.split.pseudutime.Seurat.State.pdf") plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~State) dev.off() pdf("3.split.pseudutime.Seurat.cell.type_State.pdf") plot_cell_trajectory(cds, color_by = 'State') + facet_wrap(~cell.type) dev.off() getwd() table(pData(cds)$State,pData(cds)$cell.type) openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$cell.type), "State_cellType_summary.xlsx", colnames=T, rownames=T) table(pData(cds)$State,pData(cds)$stim) openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$stim), "State_Stim_summary.xlsx", colnames=T, rownames=T) ##we set the state 2 as root ########state 2 with most cells in Endothelial cells #Who is set here as root? ? DimPlot(subset_data, label = T) table(Idents(subset_data)) DefaultAssay(subset_data)<-"RNA" DimPlot(subset_data, label = T) dev.off() table(subset_data$cell.type) getwd() phenoData(cds) %>%varMetadata() data(phenoData(cds)) View(phenoData(cds)) cds <- orderCells(cds,root_state=1) pdf("4.pseudutime.Pseudotime.pdf") p=plot_cell_trajectory(cds, color_by = "Pseudotime") print(p) dev.off() 2# Cell density map along the time axis library(ggpubr) df=pData(cds) #take out cds@phenoData@data content head(df) ggplot(df,aes(x = Pseudotime,colour=cell.type,fill=cell.type)) + geom_density(bw=0.5, size=1, alpha=0.5) + theme_classic2() 3# Extract cells of interest for visualization # For example, if you are interested in state 7 df=pData(cds) cells_7=subset(df,State=="7") %>%rownames() #Save the quasi-time series analysis table write.csv(df,file = "./pseudotime.csv") 4# Visualize the specified gene 4.1# Visualize the ordergene top5 gene and take out its object keygenes=head(ordergene,5) cds_subset=cds[keygenes,] # Visualize genes plot_genes_in_pseudotime(cds_subset = cds_subset, color_by = "cell.type") ##Visualization: in state/celltype/pseudotime p1 <- plot_genes_in_pseudotime(cds_subset, color_by = "State") p2 <- plot_genes_in_pseudotime(cds_subset, color_by = "cell.type") p3 <- plot_genes_in_pseudotime(cds_subset, color_by = "Pseudotime") plotc <- p1|p2|p3 plotc ggsave("./Genes_pseudotimeplot.pdf", plot = plotc, width = 16, height = 8) 4.2# Specified gene s.genes=unique(c( "Fn1", "Dcn", #"Fap", "Fbln1", #"Ccl19", # inflammatory 0 'Ccl6',"Il1b","Lyz2", #ap 1 #'H2-Aa',"H2-Ab1","H2-Eb1", # inflammatory? 5 # "Pf4", #inflamm grem1 7 "Grem1", #universal fib 2 "Dpt","Serpinf1","Pi16","Cd34", #specialized fib 3 "Selenbp1","Sod3", "Inmt","Gpx3","Npnt","Scube2","Wnt2","Vcam1", #Hhip 4 "Hhip","Mgp", #myo6 "Acta2","Myh11", #ccl21 8 "Ccl21a", # acta 9 #Endothelial-like fib 10 "Egfl7" #delete 11 ) ) plot_genes_jitter(cds[s.genes,], grouping = "State") getwd() ggsave(filename = "markersgene.pdf",width = 8, height = 100, limitsize = FALSE) plot_genes_in_pseudotime(cds[s.genes,], color_by = "State") ggsave(filename = "markersgene_genes_in_pseudotime.pdf",width = 8, height = 100, limitsize = FALSE) 5# Pseudo-chronological display of single gene expression changes pData(cds)$Inmt=log2(exprs(cds)["Inmt",] + 1) plot_cell_trajectory(cds,color_by = "Inmt") plot_cell_trajectory(cds,color_by = "Inmt") + scale_color_continuous() pData(cds)$Serpinf1=log2(exprs(cds)["Serpinf1",] + 1) plot_cell_trajectory(cds,color_by = "Serpinf1") plot_cell_trajectory(cds,color_by = "Serpinf1") + scale_color_continuous() 6# Pseudotemporal differential gene heat map diff_test=diff_test_res sig_gene_names <- row.names(subset(diff_test, qval < 1e-04)) p2 = plot_pseudotime_heatmap(cds[sig_gene_names,], num_clusters=5, show_rownames=T, return_heatmap=T) plot_pseudotime_heatmap(cds[s.genes,], num_clusters=5, show_rownames=T, return_heatmap=T) ggsave("pseudotime/pseudotime_heatmap2_selectedmarkergenes.png", width = 5, height = 130, limitsize = FALSE) 6.1# Extract the genes of each cluster separately and analyze a certain subgroup extraction p2$tree_row p2$gtable clusters=cutree(p2$tree_row,k=5) clustering = data.frame(clusters) head (clustering) clustering[,1]=as.character(clustering[,1]) colnames(clustering)="Gene_Clusters" table (clustering) write.csv(clustering,file = "./time_gene_clustering_all.csv") 7#Calculate differential genes according to pseudotime diff_pseudotime_test_res <- differentialGeneTest(cds[s.genes,], fullModelFormulaStr = "~ sm.ns(Pseudotime)") plot_pseudotime_heatmap(cds[rownames(diff_pseudotime_test_res),],num_clusters=5, show_rownames=T, return_heatmap=T) diff_test_res %>% head() 8##Analysis of branch points BEAM_res=BEAM(cds[ordergene[1:400],], branch_point = 1, cores =1, progenitor_method='sequential_split') #progenitor_method = c('sequential_split', 'duplicate'), BEAM_res=BEAM(cds[s.genes,], branch_point = 1, cores =1, progenitor_method='sequential_split') #progenitor_method = c('sequential_split', 'duplicate'), BEAM_res=BEAM(cds[rownames(subset(diff_test_res,qval<0.01)),], branch_point = 1, cores =1, progenitor_method='sequential_split') #progenitor_method = c('sequential_split', 'duplicate'), head(BEAM_res) # will return the significance of each gene, and the significant genes are those genes that change with different branches # This step is very slow BEAM_res=BEAM_res[,c("gene_short_name","pval","qval")] head(BEAM_res) dim(BEAM_res) saveRDS(BEAM_res, file = "BEAM_res.rds") col_sum=colSums(exprs(cds)[rownames(BEAM_res),]) col_sum #https://zhuanlan.zhihu.com/p/378365295 plot_genes_branched_heatmap(cds[rownames(BEAM_res),col_sum!=0], branch_point = 1, #Specify the branch drawn, num_clusters = 7, show_rownames = T) #Whether to return some important information tmp1=plot_genes_branched_heatmap(cds[rownames(BEAM_res),col_sum!=0], branch_point = 1, #Specify the branch drawn, num_clusters = 7, show_rownames = T, return_heatmap = T ) #Whether to return some important information ggsave("beam_branched_heatmap.pdf",width = 7, height = 70, limitsize = FALSE) pdf("branched_heatmap.pdf",width = 5,height = 6) tmp1$ph_res dev.off() ls() 8.1# Take out the genes of the corresponding cluster, sort and save the differential genes according to the heat map results #head(df) #https://zhuanlan.zhihu.com/p/378365295 #hp.genes=p$ph_res$tree_row$labels[p$ph_res$tree_row$order] 8.2##Extract the details of drawing # Set the max. print option to a higher value options(max. print = 99999) sink("a.txt") plot_genes_branched_heatmap(cds[rownames(BEAM_res),col_sum!=0], branch_point = 1, num_clusters = 7, cores = 1, return_heatmap=TRUE) dev.off() sink() sink(NULL) # Check if the output is redirected head(deg) if (is. null(sink. number())) { # Output is not redirected, proceed with printing head(deg) } else { # Output is redirected, stop the redirection and print sink(NULL) head(deg) } a=read.table("./a.txt",sep = "\t") 8.3 #enrichment analysis gene_group=tmp1$annotation_row gene_group$gene=rownames(gene_group) library(clusterProfiler) library(org.Hs.eg.db) allcluster_go = data. frame() for (i in unique(gene_group$Cluster)) { small_gene_group=filter(gene_group,gene_group$Cluster==i) df_name=bitr(small_gene_group$gene, fromType="SYMBOL", toType=c("ENTREZID"), OrgDb="org.Hs.eg.db") go <- enrichGO(gene = unique(df_name$ENTREZID), OrgDb = org.Hs.eg.db, keyType = 'ENTREZID', ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.05, qvalueCutoff = 0.2, readable = TRUE) go_res = go@result if (dim(go_res)[1] != 0) { go_res$cluster=i allcluster_go = rbind(allcluster_go, go_res) } } head(allcluster_go[,c("ID","Description","qvalue","cluster")]) getwd() ##############https://cloud.tencent.com/developer/article/1692225 #####################################3 #Once we have a trajectory, we can use differentialGeneTest() to find genes #that have an expression pattern that varies according to pseudotime. getwd() FeaturePlot(subset_data,features = "Inmt",split.by = "stim",label = T) pdf("/home/data/t040413/silicosis/fibroblast_myofibroblast/2_featureplot_clusters_Split_Inmt.pdf",width = 19,height = 5) p=FeaturePlot(subset_data,features = "Inmt",split.by = "stim",label = T) print(p) dev.off() plot_pseudotime_heatmap(cds[c('CX3CR1',"SPP1"),], num_clusters = 5, # cores = 4, show_rownames = T) #############################cds content fData(cds) %>%head() pData(cds) %>%head() subset(fData(cds), gene_short_name %in% c("TPM1", "MYH3", "CCNB2", "GAPDH")) colours=seq(1,12,1) p1=plot_cell_trajectory(cds,x=1,y=2,color_by = "cell.type") + theme(legend.position = "none",panel.border = element_blank()) + # remove the first legend scale_color_manual(values =colours ) #colours() p2=plot_complex_cell_trajectory(cds,x=1,y=2, color_by = "cell.type") + scale_color_manual(values = colours) + #colours() theme(legend. title = element_blank()) p1|p2 ############# Change map of genes of interest head([email protected]) plot_genes_jitter(cds[c("TPM1", "MYH3", "CCNB2", "GAPDH"),], grouping = "cell.type", color_by = "cell.type", plot_trend = TRUE) + facet_wrap( ~ feature_label, scales= "free_y") #######Quasi-time series heat map sig_gene_names=markers_for_eachcluster %>% group_by(cluster) %>% top_n(n = 5,wt = avg_log2FC) %>% ##There is a big difference between adding quotation marks and not adding them select(gene) %>% ungroup() %>% pull (gene) getwd() p1 = plot_pseudotime_heatmap(cds[sig_gene_names,], num_clusters=3, show_rownames=T, return_heatmap=T) ggsave("pseudotime/pseudotime_heatmap1.png", plot = p1, width = 5, height = 8) #################################################### ################################ #https://davetang.org/muse/2017/10/01/getting-started-monocle/