Article Directory
- Reproduce picture
- Set the working path and load related R packages
- Read the data set
- data visualization
- Calculate mean and standard deviation
- Visualization process
Follow the “Nature” official publication to learn graphing, and today I reproduce a dual-axis graph in the Nature article – a grouped histogram on the left and a line scatter graph on the right.
Reproduce image
A in the picture is what we are going to reproduce today. This picture is a dual-axis chart that combines a histogram and a scatter chart.
Set the working path and load related R packages
rm(list = ls()) # Clear the current environment variables setwd("C:/Users/Zz/Desktop/Official Account SES") # Set the working path # Load R package library(ggplot2) library(tidyverse)
Read data set
cData <- read_csv("cData.csv") head(cData) # Weeks Type lfValue rgValue # <dbl> <chr> <dbl> <dbl> # 1 20 By week of testing 2500 1.3 # 2 20 By week of testing 2550 1.5 # 3 20 By week of testing 2450 1.45 # 4 21 By week of testing 2750 1.2 # 5 21 By week of testing 2780 1.25 # 6 21 By week of testing 2680 1.18
Data visualization
# Species composition stacked area chart library(ggplot2) library(ggalluvial) ggplot(data = top10, aes(x = Depth, y = Abundance, fill = reorder(Phylum, -Abundance), color = reorder(Phylum, -Abundance), stratum = reorder(Phylum, -Abundance) , alluvium = reorder(Phylum, -Abundance))) + geom_alluvium(aes(fill = reorder(Phylum, -Abundance)), alpha = 0.7, decreasing = FALSE) + geom_stratum(aes(fill = reorder(Phylum, Abundance)), width = 0.3, size = 0.1, color = "black") + scale_y_continuous(expand = c(0, 0)) + theme_bw() + facet_grid(. ~ Treat, scales = "fixed") + scale_fill_manual(values = c("#EB7369", "#CF8B0B", "#9D9F20", "#2BB077", "#2BB077", "#1BB3B7", "#29A4DE", "#8989C1", "#B174AD", "#DE66A1"), name = "Phylum") + scale_color_manual(values = c("#EB7369", "#CF8B0B", "#9D9F20", "#2BB077", "#2BB077", "#1BB3B7", "#29A4DE", "#8989C1", "#B174AD", "#DE66A1")) + guides(color = "none") + theme( panel.grid=element_blank(), panel.spacing.x = unit(0, units = "cm"), strip.background = element_rect( color = "white", fill = "white", linetype = "solid", size = 1), strip.placement = "outside", axis.line.y.left = element_line(color = "black", size = 0.7), axis.line.x.bottom = element_line(color = "black", size = 0.7), strip.text.x = element_text(size = 14, face = "bold"), axis.text = element_text(face = "bold", size = 12, color = "black"), axis.title = element_text(face = "bold", size = 14, color = "black"), legend.title = element_text(face = "bold", size = 12, color = "black"), legend.text = element_text(face = "bold", size = 12, color = "black"), axis.ticks.x = element_line(size = 1), axis.ticks.y = element_line(size = 1), ) + labs(x = "Depth",y= "Relative Abundance of Phylum (%)")
The data includes the following indicators: 2 (left and right) numerical variables, 2 categorical variables.
Before visualizing, we need to first think about the elements in the picture and what they are composed of.
- Calculate the mean and standard deviation for each group or treatment;
Calculate mean and standard deviation
cData_summary <- cData %>% group_by(Weeks, Type) %>% summarize( avg_lfValue = mean(lfValue), sd_lfValue = sd(lfValue), avg_rgValue = mean(rgValue), sd_rgValue = sd(rgValue), ) cData_summary # Weeks Type avg_lfValue sd_lfValue avg_rgValue sd_rgValue # <dbl> <chr> <dbl> <dbl> <dbl> <dbl> # 1 20 By week of onset 2623. 25.2 1.98 0.0764 # 2 20 By week of testing 2500 50 1.42 0.104 # 3 21 By week of onset 3543. 40.4 1.74 0.0361 # 4 21 By week of testing 2737. 51.3 1.21 0.0361 # 5 22 By week of onset 2770 26.5 1.28 0.0300 # 6 22 By week of testing 2160 60 1.10 0.0839 # 7 23 By week of onset 2143. 40.4 1.31 0.0208 # 8 23 By week of testing 1777. 75.1 1.02 0.0153 # 9 24 By week of onset 1823. 25.2 1.15 0.0300 # 10 24 By week of testing 1667. 61.1 1.07 0.0265 # 11 25 By week of onset 1690 36.1 1.23 0.0208 # 12 25 By week of testing 1610 36.1 1.2 0.0300 # 13 26 By week of onset 1607. 30.6 1.18 0.0252 # 14 26 By week of testing 1673. 30.6 1.16 0.0361
Visualization process
ggplot() + geom_bar( data = cData_summary %>% mutate(Type = factor(Type, levels = c("By week of testing","By week of onset"))), aes(x = Weeks, y = avg_lfValue, fill = Type), alpha = 0.5, stat = "identity", position = position_dodge(0.75), width = 0.75 ) + geom_errorbar( data = cData_summary %>% mutate(Type = factor(Type, levels = c("By week of testing","By week of onset"))), aes(x = Weeks, y = avg_lfValue, ymin = avg_lfValue - sd_lfValue, ymax = avg_lfValue + sd_lfValue, group = Type), color = "black", position = position_dodge(0.75), width = 0.2 ) + geom_line( data = cData_summary %>% mutate(Type = factor(Type, levels = c("By week of testing","By week of onset"))), aes(x = Weeks, avg_rgValue*1950, group = Type, color = Type), position = position_dodge(0.75), linewidth = 0.8 ) + geom_point( data = cData_summary %>% mutate(Type = factor(Type, levels = c("By week of testing","By week of onset"))), aes(x = Weeks, y = avg_rgValue*1950, color = Type), position = position_dodge(0.75), size = 2.5 ) + scale_x_continuous( breaks = seq(20, 26, 1) ) + scale_y_continuous(name = c("Number of laboratory-confirmed\\ sympotomatic cases"), sec.axis = sec_axis(~ ./1950, name = c("Test positivity rate (%)"), breaks = seq(0, 2, 1)), limits = c(0, 4000), breaks = seq(0, 4000, 500), expand = c(0, 0)) + scale_color_manual( values = c("#FE8F3C", "#1E899A") ) + scale_fill_manual( values = c("#FE8F3C", "#1E899A") ) + theme_bw() + theme( legend.position = c(0.9, 0.9), legend.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x.bottom = element_text(color = "black", size = 12), axis.text.y.left = element_text(color = "black", size = 12), axis.text.y.right = element_text(color = "#44909A", size = 12), axis.title.y.right = element_text(color = "#44909A", size = 12, angle = 90), axis.line.y.right = element_line(color = "#44909A"), axis.ticks.y.right = element_line(color = "#44909A"), axis.title = element_text(color = "black", size = 12) ) + labs( x = "Week", color = "", fill = "" )
The reproduction effect is relatively perfect. For details, you can refer to the code in the article. If you have any questions, you can leave a message for discussion~