How to use the forestplot package to draw a forest plot showing the magnitude of multiple effects

[Speaking Genes in Brief] The forest plot can display the effect size and confidence interval of multiple research results.

About last week, a friend asked me for the code to draw a forest map, and threw a script in the past. Later, I realized that this ancestral code seems to have not been updated for a long time. In today’s article, let’s learn about forestplot, a package that specializes in drawing forests.

Text

library(forestplot)
library(dplyr)

Text table

forestplot can use text tables to draw graphs:

# Cochrane data from the 'rmeta'-package
base_data <- tibble::tibble(mean = c(0.578, 0.165, 0.246, 0.700, 0.348, 0.139, 1.017),
                            lower = c(0.372, 0.018, 0.072, 0.333, 0.083, 0.016, 0.365),
                            upper = c(0.898, 1.517, 0.833, 1.474, 1.455, 1.209, 2.831),
                            study = c("Auckland", "Block", "Doran", "Gamsu",
                                      "Morrison", "Papageorgiou", "Tauesch"),
                            deaths_steroid = c("36", "1", "4", "14", "3", "1", "8"),
                            deaths_placebo = c("60", "5", "11", "20", "7", "7", "10"),
                            OR = c("0.58", "0.16", "0.25", "0.70", "0.35", "0.14", "1.02"))

base_data |>
  forestplot(labeltext = c(study, deaths_steroid, deaths_placebo, OR),
             clip = c(0.1, 2.5),
             xlog = TRUE) |>
  fp_set_style(box = "royalblue",
               line = "darkblue",
               summary = "royalblue") |>
  fp_add_header(study = c("", "Study"),
                deaths_steroid = c("Deaths", "(steroid)"),
                deaths_placebo = c("Deaths", "(placebo)"),
                OR = c("", "OR")) |>
  fp_append_row(mean = 0.531,
                lower = 0.386,
                upper = 0.731,
                study = "Summary",
                OR = "0.53",
                is.summary = TRUE) |>
  fp_set_zebra_style("#EFEFEF")

7962e58e7dc4fb22663703671495a4dc.png

Summary line

Add lines for the summary rows, and the resulting forest plot resembles a three-line table:

base_data |>
  forestplot(labeltext = c(study, deaths_steroid, deaths_placebo, OR),
             clip = c(0.1, 2.5),
             xlog = TRUE) |>
  fp_add_lines() |>
  fp_set_style(box = "royalblue",
               line = "darkblue",
               summary = "royalblue",
               align = "lrrr",
               hrz_lines = "#999999") |>
  fp_add_header(study = c("", "Study"),
                deaths_steroid = c("Deaths", "(steroid)") |>
                  fp_align_center(),
                deaths_placebo = c("Deaths", "(placebo)") |>
                  fp_align_center(),
                OR = c("", fp_align_center("OR"))) |>
  fp_append_row(mean = 0.531,
                lower = 0.386,
                upper = 0.731,
                study = "Summary",
                OR = "0.53",
                is.summary = TRUE)

0aeed28d0ab930bcd28c01e1b84a8b1d.png

Of course, the summary line can also be in another form:

base_data |>
  forestplot(labeltext = c(study, deaths_steroid, deaths_placebo, OR),
             clip = c(0.1, 2.5),
             xlog = TRUE) |>
  fp_add_lines(h_3 = gpar(lty = 2),
               h_11 = gpar(lwd = 1, columns = 1:4, col = "#000044")) |>
  fp_set_style(box = "royalblue",
               line = "darkblue",
               summary = "royalblue",
               align = "lrrr",
               hrz_lines = "#999999") |>
  fp_add_header(study = c("", "Study"),
                deaths_steroid = c("Deaths", "(steroid)") |>
                  fp_align_center(),
                deaths_placebo = c("Deaths", "(placebo)") |>
                  fp_align_center(),
                OR = c("", fp_align_center("OR"))) |>
  fp_append_row(mean = 0.531,
                lower = 0.386,
                upper = 0.731,
                study = "Summary",
                OR = "0.53",
                is.summary = TRUE)

7e5d630c9bb39a5858306e69c6bb905d.png

Add vertices to whiskers

base_data |>
  forestplot(labeltext = c(study, deaths_steroid, deaths_placebo, OR),
             clip = c(0.1, 2.5),
             vertices = TRUE,
             xlog = TRUE) |>
  fp_add_lines(h_3 = gpar(lty = 2),
               h_11 = gpar(lwd = 1, columns = 1:4, col = "#000044")) |>
  fp_set_style(box = "royalblue",
               line = "darkblue",
               summary = "royalblue",
               align = "lrrr",
               hrz_lines = "#999999") |>
  fp_add_header(study = c("", "Study"),
                deaths_steroid = c("Deaths", "(steroid)") |>
                  fp_align_center(),
                deaths_placebo = c("Deaths", "(placebo)") |>
                  fp_align_center(),
                OR = c("", fp_align_center("OR"))) |>
  fp_append_row(mean = 0.531,
                lower = 0.386,
                upper = 0.731,
                study = "Summary",
                OR = "0.53",
                is.summary = TRUE)

b49d3e6041d51a22de2946d236dbd85f.png

Positioning graphic elements

base_data |>
  forestplot(labeltext = c(study, deaths_steroid, deaths_placebo, OR),
             clip = c(0.1, 2.5),
             vertices = TRUE,
             xlog = TRUE) |>
  fp_add_lines(h_3 = gpar(lty = 2),
               h_11 = gpar(lwd = 1, columns = 1:4, col = "#000044")) |>
  fp_set_style(box = "royalblue",
               line = "darkblue",
               summary = "royalblue",
               hrz_lines = "#999999") |>
  fp_add_header(study = c("", "Study"),
                deaths_steroid = c("Deaths", "(steroid)"),
                deaths_placebo = c("Deaths", "(placebo)"),
                OR = c("", "OR")) |>
  fp_append_row(mean = 0.531,
                lower = 0.386,
                upper = 0.731,
                study = "Summary",
                OR = "0.53",
                is.summary = TRUE) |>
  fp_decorate_graph(box = gpar(lty = 2, col = "lightgray"),
                    graph. pos = 4) |>
  fp_set_zebra_style("#f9f9f9")

46e4824a8799a7252e4c9e2a2e2c598b.png

Using expressions

Sometimes it is convenient to use non-ascii letters if we are giving a regression output. In this section we will use my research to compare health-related quality of life at 1 year after total hip arthroplasty in Sweden and Denmark:

data(dfHRQoL)

dfHRQoL |>
  filter(group == "Sweden") |>
  mutate(est = sprintf("%.2f", mean), .after = labeltext) |>
  forestplot(labeltext = c(labeltext, est),
             xlab = "EQ-5D index") |>
  fp_add_header(est = expression(beta)) |>
  fp_set_style(box = "royalblue",
               line = "darkblue",
               summary = "royalblue")

d78812f34cc3d9a1a798ca1f962c80b3.png

Change font

Changing the font might give the table a completely different feel:

# You can set directly the font to desired value, the next three lines are just for handling MacOs on CRAN
font <- "mono"
if (grepl("Ubuntu", Sys. info()["version"])) {
  font <- "HersheyGothicEnglish"
}
dfHRQoL |>
  filter(group == "Sweden") |>
  mutate(est = sprintf("%.2f", mean), .after = labeltext) |>
  forestplot(labeltext = c(labeltext, est),
             xlab = "EQ-5D index") |>
  fp_add_header(est = "Est.") |>
  fp_set_style(box = "royalblue",
               line = "darkblue",
               summary = "royalblue",
               txt_gp = fpTxtGp(label = gpar(fontfamily = font)))

3910ec28a165701dcaba63d77fdca386.png

Confidence interval

Sometimes the confidence exceeds a certain range, and the confidence interval can be truncated by adding an arrow:

dfHRQoL |>
  filter(group == "Sweden") |>
  mutate(est = sprintf("%.2f", mean), .after = labeltext) |>
  forestplot(labeltext = c(labeltext, est),
             clip = c(-.1, Inf),
             xlab = "EQ-5D index") |>
  fp_set_style(box = "royalblue",
               line = "darkblue",
               summary = "royalblue")

62c3569e953aaf787642437f0bb4f7c5.png

Custom box size

dfHRQoL |>
  filter(group == "Sweden") |>
  mutate(est = sprintf("%.2f", mean), .after = labeltext) |>
  forestplot(labeltext = c(labeltext, est),
             boxsize = 0.2,
             clip = c(-.1, Inf),
             xlab = "EQ-5D index") |>
  fp_set_style(box = "royalblue",
               line = "darkblue",
               summary = "royalblue")

5a7db6d3d62a4e87079cccb5890c7615.png

If you want to keep the relative size, you need to provide a wrapper for the transform box’s draw function. Here’s how to do that, and how to combine multiple forestplots into one image:

fp_sweden <- dfHRQoL |>
  filter(group == "Sweden") |>
  mutate(est = sprintf("%.2f", mean), .after = labeltext) |>
  forestplot(labeltext = c(labeltext, est),
             title = "Sweden",
             clip = c(-.1, Inf),
             xlab = "EQ-5D index",
             new_page = FALSE)

fp_denmark <- dfHRQoL |>
  filter(group == "Denmark") |>
  mutate(est = sprintf("%.2f", mean), .after = labeltext) |>
  forestplot(labeltext = c(labeltext, est),
             title = "Denmark",
             clip = c(-.1, Inf),
             xlab = "EQ-5D index",
             new_page = FALSE)

library(grid)
grid. newpage()
borderWidth <- unit(4, "pt")
width <- unit(convertX(unit(1, "npc") - borderWidth, unitTo = "npc", valueOnly = TRUE)/2, "npc")
pushViewport(viewport(layout = grid.layout(nrow = 1,
                                           ncol = 3,
                                           widths = unit.c(width,
                                                           borderWidth,
                                                           width))
                      )
             )
pushViewport(viewport(layout. pos. row = 1,
                      layout. pos. col = 1))
fp_sweden |>
  fp_set_style(box = "royalblue",
               line = "darkblue",
               summary = "royalblue")
upViewport()
pushViewport(viewport(layout. pos. row = 1,
                      layout. pos. col = 2))
grid.rect(gp = gpar(fill = "#dddddd", col = "#eeeeee"))
upViewport()
pushViewport(viewport(layout. pos. row = 1,
                      layout. pos. col = 3))
fp_denmark |>
  fp_set_style(box = "royalblue",
               line = "darkblue",
               summary = "royalblue")
upViewport(2)

a08653c9a1f62589ea94631004c269e7.png

Multiple confidence bands

dfHRQoL |>
  group_by(group) |>
  forestplot(clip = c(-.1, 0.075),
             ci.vertices = TRUE,
             ci.vertices.height = 0.05,
             boxsize = .1,
             xlab = "EQ-5D index") |>
  fp_add_lines("steelblue") |>
  fp_add_header("Variable") |>
  fp_set_style(box = c("blue", "darkred") |> lapply(function(x) gpar(fill = x, col = "#555555")),
               default = gpar(vertices = TRUE))

5e9cd8e1eebe3a320615e80e79e47bc0.png

Estimated metrics

It is possible to choose between many different estimation metrics. Using the example above, we can set the Danish result to be a circle.

dfHRQoL |>
  group_by(group) |>
  forestplot(fn.ci_norm = c(fpDrawNormalCI, fpDrawCircleCI),
             boxsize = .25, # We set the box size to better visualize the type
             line.margin = .1, # We need to add this to avoid crowding
             clip = c(-.125, 0.075),
             xlab = "EQ-5D index") |>
  fp_set_style(box = c("blue", "darkred") |> lapply(function(x) gpar(fill = x, col = "#555555")),
               default = gpar(vertices = TRUE)) |>
  fp_set_zebra_style("#F5F9F9")

2af7f00612933c00d048244c0383399c.png

Choose a line style

dfHRQoL |>
  group_by(group) |>
  forestplot(fn.ci_norm = c(fpDrawNormalCI, fpDrawCircleCI),
             boxsize = .25, # We set the box size to better visualize the type
             line.margin = .1, # We need to add this to avoid crowding
             clip = c(-.125, 0.075),
             lty.ci = c(1, 2),
             xlab = "EQ-5D index") |>
  fp_set_style(box = c("blue", "darkred") |> lapply(function(x) gpar(fill = x, col = "#555555")),
               default = gpar(vertices = TRUE))

cc3ec6d5cef87747bc0fd0888649e1a5.png

Legend

Legends are added automatically when using group_by, but we can also control them directly via the legend parameter:

dfHRQoL |>
  group_by(group) |>
  forestplot(legend = c("Swedes", "Danes"),
             fn.ci_norm = c(fpDrawNormalCI, fpDrawCircleCI),
             boxsize = .25, # We set the box size to better visualize the type
             line.margin = .1, # We need to add this to avoid crowding
             clip = c(-.125, 0.075),
             xlab = "EQ-5D index") |>
  fp_set_style(box = c("blue", "darkred") |> lapply(function(x) gpar(fill = x, col = "#555555")))

2eb53cc5c89af127bbc11aa32caf16c0.png

It can be further customized by setting the legend_args argument with the fpLegend function:

dfHRQoL |>
  group_by(group) |>
  forestplot(legend = c("Swedes", "Danes"),
             legend_args = fpLegend(pos = list(x = .85, y = 0.25),
                                    gp = gpar(col = "#CCCCCC", fill = "#F9F9F9")),
             fn.ci_norm = c(fpDrawNormalCI, fpDrawCircleCI),
             boxsize = .25, # We set the box size to better visualize the type
             line.margin = .1, # We need to add this to avoid crowding
             clip = c(-.125, 0.075),
             xlab = "EQ-5D index") |>
  fp_set_style(box = c("blue", "darkred") |> lapply(function(x) gpar(fill = x, col = "#555555")))

b79ece7151f3fb49525cdec933d40c1e.png

Other

If the automation’s ticks don’t match what you want, they can be easily changed using the xticks parameter:

dfHRQoL |>
  group_by(group) |>
  forestplot(fn.ci_norm = c(fpDrawNormalCI, fpDrawCircleCI),
             boxsize = .25, # We set the box size to better visualize the type
             line.margin = .1, # We need to add this to avoid crowding
             clip = c(-.125, 0.075),
             xticks = c(-.1, -0.05, 0, .05),
             xlab = "EQ-5D index") |>
  fp_set_style(box = c("blue", "darkred") |> lapply(function(x) gpar(fill = x, col = "#555555")))

fe3ca5cc0a64971150bcd27072737075.png

Scales can be further customized by adding a “labels” attribute to the scale, here’s an example that suppresses the tick text for every other tick:

xticks <- seq(from = -.1, to = .05, by = 0.025)
xtlab <- rep(c(TRUE, FALSE), length.out = length(xticks))
attr(xticks, "labels") <- xtlab

dfHRQoL |>
  group_by(group) |>
  forestplot(fn.ci_norm = c(fpDrawNormalCI, fpDrawCircleCI),
             boxsize = .25, # We set the box size to better visualize the type
             line.margin = .1, # We need to add this to avoid crowding
             clip = c(-.125, 0.075),
             xticks = xticks,
             xlab = "EQ-5D index") |>
  fp_set_style(box = c("blue", "darkred") |> lapply(function(x) gpar(fill = x, col = "#555555")))

161c1b1809d5e7c5620f92cb68a6a2d7.png

Sometimes you have a very tall graph and you want to add guide lines so that the tick marks are easier to see:

dfHRQoL |>
  group_by(group) |>
  forestplot(fn.ci_norm = c(fpDrawNormalCI, fpDrawCircleCI),
             boxsize = .25, # We set the box size to better visualize the type
             line.margin = .1, # We need to add this to avoid crowding
             clip = c(-.125, 0.075),
             xticks = c(-.1, -0.05, 0, .05),
             zero = 0,
             xlab = "EQ-5D index") |>
  fp_set_style(box = c("blue", "darkred") |> lapply(function(x) gpar(fill = x, col = "#555555"))) |>
  fp_decorate_graph(grid = structure(c(-.1, -.05, .05),
                              gp = gpar(lty = 2, col = "#CCCCFF")))

c659bfa2001e07415ed748cc8564b79c.png

It is easy to customize which gridlines to use and what type they should be by adding a gpar object to the vector:

dfHRQoL |>
  group_by(group) |>
  forestplot(fn.ci_norm = c(fpDrawNormalCI, fpDrawCircleCI),
             boxsize = .25, # We set the box size to better visualize the type
             line.margin = .1, # We need to add this to avoid crowding
             clip = c(-.125, 0.075),
             xlab = "EQ-5D index") |>
  fp_set_style(box = c("blue", "darkred") |> lapply(function(x) gpar(fill = x, col = "#555555"))) |>
  fp_decorate_graph(grid = structure(c(-.1, -.05, .05),
                                     gp = gpar(lty = 2, col = "#CCCCFF")))

9b75df0bfdf184eb84d919157ad864d2.png

If you’re not familiar with struct calls, it’s equivalent to making a vector and then setting an attribute, for example:

grid_arg <- c(-.1, -.05, .05)
attr(grid_arg, "gp") <- gpar(lty = 2, col = "#CCCCFF")

identical(grid_arg,
          structure(c(-.1, -.05, .05),
                    gp = gpar(lty = 2, col = "#CCCCFF")))
# Returns TRUE

references:

https://cran.r-project.org/web/packages/forestplot/vignettes/forestplot.html

3a93f0e9c06811e16f319afbf8309367.png