silicosis monocle2 fibroblast



getwd()
dir.create("/home/data/t040413/silicosis/fibroblast_myofibroblast/monocle5.17")
setwd("/home/data/t040413/silicosis/fibroblast_myofibroblast/monocle5.17")
#The data is too large to go to the Linux server for processing
.libPaths(c("/home/data/refdir/Rlib/", "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2", "/usr/local /lib/R/library"))
library(Seurat)

load("/home/data/t040413/silicosis/fibroblast_myofibroblast/subset_data_fibroblast_myofibroblast.rds")



DotPlot(subset_data,features = c("Inmt","Gpx3","Fn1","Acta2","Serpinf1","Pi16",
                                 "Col15a","Adamdec1","Npnt","Fbln1","Bmp4",
                                 'Lrrc15',"Hhip","Comp",
                                 "Cxcl15","Cxcl12","Ccl19")) + RotatedAxis()


#BiocManager::install("monocle",force = TRUE)
#library(monocle)
#devtools::load_all("/home/data/t040413/R/yll/usr/local/lib/R/site-library/monocle/")

#library(monocle)

DimPlot(subset_data, label = T)
head([email protected])
subset_data$cell.type=subset_data$seurat_clusters
Idents(subset_data)=subset_data$seurat_clusters #subset_data$cell.type
#Idents(subset_data)=subset_data$Idents.subset_data.


table(Idents(subset_data))
DefaultAssay(subset_data)
new.metadata <- merge([email protected],
                      data.frame(Idents(subset_data)),
                      by = "row.names",sort = FALSE)
head(new. metadata)
rownames(new.metadata)<-new.metadata[,1]
head([email protected])
identical(rownames(new.metadata),rownames([email protected]))

[email protected] <- new.metadata
table(subset_data$cell.type,Idents(subset_data))
head(subset_data)

expression_matrix <- as(as.matrix(subset_data@assays$RNA@counts), 'sparseMatrix')
head(expression_matrix)
identical(colnames(expression_matrix),rownames(new.metadata))


devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")
library(Seurat)

head([email protected])
cell_metadata <- new('AnnotatedDataFrame', [email protected])
head([email protected])
head(cell_metadata)

gene_annotation <- new('AnnotatedDataFrame',data=data.frame(gene_short_name = row.names(subset_data),
                                                            row.names = row.names(subset_data)))
head(gene_annotation)
fData(gene_annotation)

length(Idents(subset_data))
DimPlot(subset_data,group.by = "cell.type",label = T)
DimPlot(subset_data, label = T)

monocle_cds <- monocle::newCellDataSet(expression_matrix,
                                       phenoData = cell_metadata,
                                       featureData = gene_annotation,
                                       lowerDetectionLimit = 0.5,
                                       expressionFamily = negbinomial. size())

#################################################### ####################################

##Normalized######
cds <- monocle_cds
cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds) ## Removing 110 outliers #The cell.type below is the meta information of subset_Data
#library("BiocGenerics")#Parallel computing
#devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")
#https://www.jianshu.com/p/5d6fd4561bc0#

diff_test_res <- differentialGeneTest(cds,fullModelFormulaStr = "~ cell.type")

pData(cds)
deg=subset(diff_test_res,qval<0.01)
deg=deg[order(deg$qval,decreasing = T),]
head(deg)
ordergene=rownames(deg)

pdf("train.ordergenes.pdf")
plot_ordering_genes(cds)
dev.off()

length(ordergene)
### inference the pseudotrajectory ############################################## ##################################################
# step1: select genes for ordering setOrderingFilter() #
ordering_genes <- row.names (subset(diff_test_res, qval < 0.01))
length(ordering_genes)#6354

cds <- setOrderingFilter(cds, ordering_genes)
# step2: dimension reduction=> reduceDimension() DDRTree #
cds <- reduceDimension(cds, max_components = 2, method = 'DDRTree')

#source("./order_cells.R")
#unloadNamespace('monocle')
#devtools::load_all("../monocle_2.26.0 (1).tar/monocle_2.26.0 (1)/monocle/")
#devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")


cds <- orderCells(cds)
getwd()
save(diff_test_res, cds,
     subset_data,file = "/home/data/t040413/silicosis/fibroblast_myofibroblast/monocle5.17/cds_noyet_root_diff_genes.Rdata")


devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")

load("/home/data/t040413/silicosis/fibroblast_myofibroblast/monocle5.17/cds_noyet_root_diff_genes.Rdata")


pData(cds)
deg=subset(diff_test_res,qval<0.01)
deg=deg[order(deg$qval,decreasing = T),]
head(deg)
ordergene=rownames(deg)



1#Color according to cds@phenoData@data information
pdf("1.pseudutime.cell.type.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "cell.type")
dev.off()

pdf("1.pseudutime.stim.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "stim")
dev.off()

pdf("1.pseudutime.State.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "State")
dev.off()
###### split ########
pdf("2.split.pseudutime.Seurat.cell.type.pdf")
plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~cell.type)
dev.off()

pdf("2.split.pseudutime.stim.pdf")
plot_cell_trajectory(cds, color_by = "stim") + facet_wrap(~stim)
dev.off()

#https://www.jianshu.com/p/5d6fd4561bc0

pdf("4.split.pseudutime.Seurat.State.pdf")
plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~State)
dev.off()


pdf("3.split.pseudutime.Seurat.cell.type_State.pdf")
plot_cell_trajectory(cds, color_by = 'State') + facet_wrap(~cell.type)
dev.off()

getwd()
table(pData(cds)$State,pData(cds)$cell.type)
openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$cell.type), "State_cellType_summary.xlsx", colnames=T, rownames=T)

table(pData(cds)$State,pData(cds)$stim)
openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$stim), "State_Stim_summary.xlsx", colnames=T, rownames=T)
##we set the state 2 as root ########state 2 with most cells in Endothelial cells
#Who is set here as root? ?
DimPlot(subset_data, label = T)
table(Idents(subset_data))
DefaultAssay(subset_data)<-"RNA"
DimPlot(subset_data, label = T)
dev.off()

table(subset_data$cell.type)
getwd()
phenoData(cds) %>%varMetadata()
data(phenoData(cds))

View(phenoData(cds))

cds <- orderCells(cds,root_state=1)


pdf("4.pseudutime.Pseudotime.pdf")
p=plot_cell_trajectory(cds, color_by = "Pseudotime")
print(p)
dev.off()


2# Cell density map along the time axis
library(ggpubr)
df=pData(cds) #take out cds@phenoData@data content
head(df)
ggplot(df,aes(x = Pseudotime,colour=cell.type,fill=cell.type)) +
  geom_density(bw=0.5, size=1, alpha=0.5) +
  theme_classic2()

3# Extract cells of interest for visualization
# For example, if you are interested in state 7
df=pData(cds)
cells_7=subset(df,State=="7") %>%rownames()

#Save the quasi-time series analysis table
write.csv(df,file = "./pseudotime.csv")


4# Visualize the specified gene
4.1# Visualize the ordergene top5 gene and take out its object
keygenes=head(ordergene,5)
cds_subset=cds[keygenes,]
# Visualize genes
plot_genes_in_pseudotime(cds_subset = cds_subset, color_by = "cell.type")

##Visualization: in state/celltype/pseudotime
p1 <- plot_genes_in_pseudotime(cds_subset, color_by = "State")
p2 <- plot_genes_in_pseudotime(cds_subset, color_by = "cell.type")
p3 <- plot_genes_in_pseudotime(cds_subset, color_by = "Pseudotime")
plotc <- p1|p2|p3
plotc
ggsave("./Genes_pseudotimeplot.pdf", plot = plotc, width = 16, height = 8)

4.2# Specified gene
s.genes=unique(c(
  "Fn1", "Dcn", #"Fap",
  "Fbln1", #"Ccl19",
  # inflammatory 0
  'Ccl6',"Il1b","Lyz2",
  
  #ap 1
  #'H2-Aa',"H2-Ab1","H2-Eb1",
  
  
  # inflammatory? 5
  # "Pf4",
  
  
  
  #inflamm grem1 7
  "Grem1",
  
  
  #universal fib 2
  "Dpt","Serpinf1","Pi16","Cd34",
  
  
  #specialized fib 3
  "Selenbp1","Sod3", "Inmt","Gpx3","Npnt","Scube2","Wnt2","Vcam1",
  
  #Hhip 4
  "Hhip","Mgp",
  
  
  
  #myo6
  "Acta2","Myh11",
  
  #ccl21 8
  "Ccl21a",
  
  
  # acta 9
  
  #Endothelial-like fib 10
  "Egfl7"
  
  #delete 11
  
  
)
)
plot_genes_jitter(cds[s.genes,], grouping = "State")
getwd()
ggsave(filename = "markersgene.pdf",width = 8, height = 100, limitsize = FALSE)

plot_genes_in_pseudotime(cds[s.genes,], color_by = "State")

ggsave(filename = "markersgene_genes_in_pseudotime.pdf",width = 8, height = 100, limitsize = FALSE)


5# Pseudo-chronological display of single gene expression changes
pData(cds)$Inmt=log2(exprs(cds)["Inmt",] + 1)
plot_cell_trajectory(cds,color_by = "Inmt")
plot_cell_trajectory(cds,color_by = "Inmt") + scale_color_continuous()


pData(cds)$Serpinf1=log2(exprs(cds)["Serpinf1",] + 1)
plot_cell_trajectory(cds,color_by = "Serpinf1")
plot_cell_trajectory(cds,color_by = "Serpinf1") + scale_color_continuous()



6# Pseudotemporal differential gene heat map
diff_test=diff_test_res
sig_gene_names <- row.names(subset(diff_test, qval < 1e-04))
p2 = plot_pseudotime_heatmap(cds[sig_gene_names,], num_clusters=5,
                             show_rownames=T, return_heatmap=T)
plot_pseudotime_heatmap(cds[s.genes,], num_clusters=5,
                        show_rownames=T, return_heatmap=T)

ggsave("pseudotime/pseudotime_heatmap2_selectedmarkergenes.png", width = 5, height = 130,
       limitsize = FALSE)

6.1# Extract the genes of each cluster separately and analyze a certain subgroup extraction
p2$tree_row
p2$gtable
clusters=cutree(p2$tree_row,k=5)
clustering = data.frame(clusters)
head (clustering)
clustering[,1]=as.character(clustering[,1])
colnames(clustering)="Gene_Clusters"
table (clustering)
write.csv(clustering,file = "./time_gene_clustering_all.csv")

7#Calculate differential genes according to pseudotime
diff_pseudotime_test_res <- differentialGeneTest(cds[s.genes,],
            fullModelFormulaStr = "~ sm.ns(Pseudotime)")

plot_pseudotime_heatmap(cds[rownames(diff_pseudotime_test_res),],num_clusters=5,
                        show_rownames=T, return_heatmap=T)



diff_test_res %>% head()
8##Analysis of branch points
BEAM_res=BEAM(cds[ordergene[1:400],], branch_point = 1, cores =1,
              progenitor_method='sequential_split') #progenitor_method = c('sequential_split', 'duplicate'),

BEAM_res=BEAM(cds[s.genes,], branch_point = 1, cores =1,
              progenitor_method='sequential_split') #progenitor_method = c('sequential_split', 'duplicate'),


BEAM_res=BEAM(cds[rownames(subset(diff_test_res,qval<0.01)),], branch_point = 1, cores =1,
              progenitor_method='sequential_split') #progenitor_method = c('sequential_split', 'duplicate'),
head(BEAM_res)
# will return the significance of each gene, and the significant genes are those genes that change with different branches
# This step is very slow
BEAM_res=BEAM_res[,c("gene_short_name","pval","qval")]
head(BEAM_res)
dim(BEAM_res)
saveRDS(BEAM_res, file = "BEAM_res.rds")

col_sum=colSums(exprs(cds)[rownames(BEAM_res),])
col_sum
#https://zhuanlan.zhihu.com/p/378365295
plot_genes_branched_heatmap(cds[rownames(BEAM_res),col_sum!=0],
                            branch_point = 1, #Specify the branch drawn,
                            num_clusters = 7,
                            show_rownames = T) #Whether to return some important information

tmp1=plot_genes_branched_heatmap(cds[rownames(BEAM_res),col_sum!=0],
                            branch_point = 1, #Specify the branch drawn,
                            num_clusters = 7,
                            show_rownames = T,
                            return_heatmap = T ) #Whether to return some important information

ggsave("beam_branched_heatmap.pdf",width = 7, height = 70, limitsize = FALSE)
pdf("branched_heatmap.pdf",width = 5,height = 6)
tmp1$ph_res
dev.off()

ls()


8.1# Take out the genes of the corresponding cluster, sort and save the differential genes according to the heat map results
#head(df) #https://zhuanlan.zhihu.com/p/378365295
#hp.genes=p$ph_res$tree_row$labels[p$ph_res$tree_row$order]




8.2##Extract the details of drawing
# Set the max. print option to a higher value
options(max. print = 99999)
sink("a.txt")
plot_genes_branched_heatmap(cds[rownames(BEAM_res),col_sum!=0],
                            branch_point = 1, num_clusters = 7, cores = 1, return_heatmap=TRUE)
dev.off()
sink()
sink(NULL)
# Check if the output is redirected
head(deg)
if (is. null(sink. number())) {
  # Output is not redirected, proceed with printing
  head(deg)
} else {
  # Output is redirected, stop the redirection and print
  sink(NULL)
  head(deg)
}




a=read.table("./a.txt",sep = "\t")


8.3 #enrichment analysis
gene_group=tmp1$annotation_row
gene_group$gene=rownames(gene_group)

library(clusterProfiler)
library(org.Hs.eg.db)
allcluster_go = data. frame()
for (i in unique(gene_group$Cluster)) {
  small_gene_group=filter(gene_group,gene_group$Cluster==i)
  df_name=bitr(small_gene_group$gene, fromType="SYMBOL", toType=c("ENTREZID"), OrgDb="org.Hs.eg.db")
  go <- enrichGO(gene = unique(df_name$ENTREZID),
                 OrgDb = org.Hs.eg.db,
                 keyType = 'ENTREZID',
                 ont = "BP",
                 pAdjustMethod = "BH",
                 pvalueCutoff = 0.05,
                 qvalueCutoff = 0.2,
                 readable = TRUE)
  go_res = go@result
  if (dim(go_res)[1] != 0) {
    go_res$cluster=i
    allcluster_go = rbind(allcluster_go, go_res)
  }
}
head(allcluster_go[,c("ID","Description","qvalue","cluster")])


getwd()
##############https://cloud.tencent.com/developer/article/1692225
#####################################3
#Once we have a trajectory, we can use differentialGeneTest() to find genes
#that have an expression pattern that varies according to pseudotime.



getwd()
FeaturePlot(subset_data,features = "Inmt",split.by = "stim",label = T)

pdf("/home/data/t040413/silicosis/fibroblast_myofibroblast/2_featureplot_clusters_Split_Inmt.pdf",width = 19,height = 5)
p=FeaturePlot(subset_data,features = "Inmt",split.by = "stim",label = T)
print(p)
dev.off()

plot_pseudotime_heatmap(cds[c('CX3CR1',"SPP1"),],
                        num_clusters = 5,
                        # cores = 4,
                        show_rownames = T)

#############################cds content
fData(cds) %>%head()
pData(cds) %>%head()

subset(fData(cds),
       gene_short_name %in% c("TPM1", "MYH3", "CCNB2", "GAPDH"))




colours=seq(1,12,1)
p1=plot_cell_trajectory(cds,x=1,y=2,color_by = "cell.type") +
  theme(legend.position = "none",panel.border = element_blank()) + # remove the first legend
  scale_color_manual(values =colours ) #colours()

p2=plot_complex_cell_trajectory(cds,x=1,y=2,
                                color_by = "cell.type") +
  scale_color_manual(values = colours) + #colours()
  theme(legend. title = element_blank())

p1|p2




############# Change map of genes of interest
head([email protected])

plot_genes_jitter(cds[c("TPM1", "MYH3", "CCNB2", "GAPDH"),],
                  grouping = "cell.type", color_by = "cell.type", plot_trend = TRUE) +
  facet_wrap( ~ feature_label, scales= "free_y")


#######Quasi-time series heat map
sig_gene_names=markers_for_eachcluster %>%
  group_by(cluster) %>% top_n(n = 5,wt = avg_log2FC) %>% ##There is a big difference between adding quotation marks and not adding them
  select(gene) %>% ungroup() %>%
  pull (gene)

getwd()
p1 = plot_pseudotime_heatmap(cds[sig_gene_names,], num_clusters=3,
                             show_rownames=T, return_heatmap=T)
ggsave("pseudotime/pseudotime_heatmap1.png", plot = p1, width = 5, height = 8)



#################################################### ################################
#https://davetang.org/muse/2017/10/01/getting-started-monocle/