Follow the official publication of Nature to learn graphing | Dual-axis columnar + polyline scatter plot!

Article Directory

  • Reproduce picture
  • Set the working path and load related R packages
  • Read the data set
  • data visualization
  • Calculate mean and standard deviation
  • Visualization process

Follow the “Nature” official publication to learn graphing, and today I reproduce a dual-axis graph in the Nature article – a grouped histogram on the left and a line scatter graph on the right.

Reproduce image


A in the picture is what we are going to reproduce today. This picture is a dual-axis chart that combines a histogram and a scatter chart.

Set the working path and load related R packages

rm(list = ls()) # Clear the current environment variables
setwd("C:/Users/Zz/Desktop/Official Account SES") # Set the working path
# Load R package
library(ggplot2)
library(tidyverse)

Read data set

cData <- read_csv("cData.csv")
head(cData)
# Weeks Type lfValue rgValue
# <dbl> <chr> <dbl> <dbl>
# 1 20 By week of testing 2500 1.3
# 2 20 By week of testing 2550 1.5
# 3 20 By week of testing 2450 1.45
# 4 21 By week of testing 2750 1.2
# 5 21 By week of testing 2780 1.25
# 6 21 By week of testing 2680 1.18

Data visualization

# Species composition stacked area chart
library(ggplot2)
library(ggalluvial)
ggplot(data = top10,
       aes(x = Depth, y = Abundance, fill = reorder(Phylum, -Abundance),
           color = reorder(Phylum, -Abundance),
           stratum = reorder(Phylum, -Abundance) ,
           alluvium = reorder(Phylum, -Abundance))) +
  geom_alluvium(aes(fill = reorder(Phylum, -Abundance)),
                alpha = 0.7, decreasing = FALSE) +
  geom_stratum(aes(fill = reorder(Phylum, Abundance)),
                   width = 0.3, size = 0.1, color = "black") +
  scale_y_continuous(expand = c(0, 0)) +
  theme_bw() +
  facet_grid(. ~ Treat, scales = "fixed") +
  scale_fill_manual(values = c("#EB7369", "#CF8B0B", "#9D9F20", "#2BB077", "#2BB077",
                                "#1BB3B7", "#29A4DE", "#8989C1", "#B174AD",
                                "#DE66A1"), name = "Phylum") +
  scale_color_manual(values = c("#EB7369", "#CF8B0B", "#9D9F20", "#2BB077", "#2BB077",
                                "#1BB3B7", "#29A4DE", "#8989C1", "#B174AD",
                                "#DE66A1")) +
  guides(color = "none") +
  theme(
    panel.grid=element_blank(),
    panel.spacing.x = unit(0, units = "cm"),
    strip.background = element_rect(
      color = "white", fill = "white",
      linetype = "solid", size = 1),
    strip.placement = "outside",
    axis.line.y.left = element_line(color = "black", size = 0.7),
    axis.line.x.bottom = element_line(color = "black", size = 0.7),
    strip.text.x = element_text(size = 14, face = "bold"),
    axis.text = element_text(face = "bold",
                             size = 12, color = "black"),
    axis.title = element_text(face = "bold",
                              size = 14, color = "black"),
    legend.title = element_text(face = "bold",
                                size = 12, color = "black"),
    legend.text = element_text(face = "bold", size = 12, color = "black"),
    axis.ticks.x = element_line(size = 1),
    axis.ticks.y = element_line(size = 1),
  ) +
  labs(x = "Depth",y= "Relative Abundance of Phylum (%)")

The data includes the following indicators: 2 (left and right) numerical variables, 2 categorical variables.

Before visualizing, we need to first think about the elements in the picture and what they are composed of.

  • Calculate the mean and standard deviation for each group or treatment;

Calculate mean and standard deviation

cData_summary <- cData %>%
  group_by(Weeks, Type) %>%
  summarize(
    avg_lfValue = mean(lfValue),
    sd_lfValue = sd(lfValue),
    avg_rgValue = mean(rgValue),
    sd_rgValue = sd(rgValue),
  )
cData_summary
# Weeks Type avg_lfValue sd_lfValue avg_rgValue sd_rgValue
# <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 20 By week of onset 2623. 25.2 1.98 0.0764
# 2 20 By week of testing 2500 50 1.42 0.104
# 3 21 By week of onset 3543. 40.4 1.74 0.0361
# 4 21 By week of testing 2737. 51.3 1.21 0.0361
# 5 22 By week of onset 2770 26.5 1.28 0.0300
# 6 22 By week of testing 2160 60 1.10 0.0839
# 7 23 By week of onset 2143. 40.4 1.31 0.0208
# 8 23 By week of testing 1777. 75.1 1.02 0.0153
# 9 24 By week of onset 1823. 25.2 1.15 0.0300
# 10 24 By week of testing 1667. 61.1 1.07 0.0265
# 11 25 By week of onset 1690 36.1 1.23 0.0208
# 12 25 By week of testing 1610 36.1 1.2 0.0300
# 13 26 By week of onset 1607. 30.6 1.18 0.0252
# 14 26 By week of testing 1673. 30.6 1.16 0.0361

Visualization process

ggplot() +
  geom_bar(
    data = cData_summary %>%
             mutate(Type = factor(Type, levels = c("By week of testing","By week of onset"))),
           aes(x = Weeks, y = avg_lfValue, fill = Type),
           alpha = 0.5, stat = "identity", position = position_dodge(0.75), width = 0.75
    ) +
  geom_errorbar(
    data = cData_summary %>%
      mutate(Type = factor(Type, levels = c("By week of testing","By week of onset"))),
    aes(x = Weeks, y = avg_lfValue,
        ymin = avg_lfValue - sd_lfValue, ymax = avg_lfValue + sd_lfValue,
        group = Type), color = "black",
    position = position_dodge(0.75), width = 0.2
  ) +
  geom_line(
    data = cData_summary %>%
      mutate(Type = factor(Type, levels = c("By week of testing","By week of onset"))),
    aes(x = Weeks, avg_rgValue*1950, group = Type, color = Type),
    position = position_dodge(0.75), linewidth = 0.8
    ) +
  geom_point(
    data = cData_summary %>%
      mutate(Type = factor(Type, levels = c("By week of testing","By week of onset"))),
    aes(x = Weeks, y = avg_rgValue*1950, color = Type),
    position = position_dodge(0.75), size = 2.5
  ) +
  scale_x_continuous(
    breaks = seq(20, 26, 1)
    ) +
  scale_y_continuous(name = c("Number of laboratory-confirmed\\
 sympotomatic cases"),
                     sec.axis = sec_axis(~ ./1950,
                                         name = c("Test positivity rate (%)"),
                                         breaks = seq(0, 2, 1)),
                     limits = c(0, 4000),
                     breaks = seq(0, 4000, 500),
                     expand = c(0, 0)) +
  scale_color_manual(
    values = c("#FE8F3C", "#1E899A")
                     ) +
  scale_fill_manual(
    values = c("#FE8F3C", "#1E899A")
  ) +
  theme_bw() +
  theme(
    legend.position = c(0.9, 0.9),
    legend.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.x.bottom = element_text(color = "black", size = 12),
    axis.text.y.left = element_text(color = "black", size = 12),
    axis.text.y.right = element_text(color = "#44909A", size = 12),
    axis.title.y.right = element_text(color = "#44909A", size = 12, angle = 90),
    axis.line.y.right = element_line(color = "#44909A"),
    axis.ticks.y.right = element_line(color = "#44909A"),
    axis.title = element_text(color = "black", size = 12)
    ) +
  labs(
    x = "Week",
    color = "",
    fill = ""
    )

The reproduction effect is relatively perfect. For details, you can refer to the code in the article. If you have any questions, you can leave a message for discussion~