# Web address for this script file: http://www.dorak.info/r/metafor_hr.R 
# sample dataset for practice: http://www.dorak.info/r/data.csv

########################## GENERAL INFORMATION ON metafor PACKAGE ###########################
			      	  DO NOT RUN AS AN R SCRIPT
				    SCRIPT IS GIVEN BELOW

R Documentation: https://www.rdocumentation.org/packages/metafor
Manual: https://cran.r-project.org/web/packages/metafor/metafor.pdf 
Package website: https://www.metafor-project.org/doku.php
Reference paper: Viechtbauer W. Conducting meta-analyses in R with the metafor package. J Stat Soft 2010;36(3):1-48 (https://www.jstatsoft.org/v036/i03)
Github page: https://wviechtb.github.io/metafor/reference/metafor-package.html
Video introduction: https://www.youtube.com/watch?v=9ZpZwPAMEoo (this is the first of 15 videos; more at https://www.youtube.com/user/MichaelTBrannick/videos)
A flow diagram of meta-analysis functions: type  vignette("diagram") in R after loading metafor (OR @ http://www.metafor-project.org/doku.php/diagram)
A contrast of meta and metafor packages for meta-analyses in R (Ecol Evol 2020): https://onlinelibrary.wiley.com/doi/epdf/10.1002/ece3.6747 

# the function  rma.uni()  is for the generic inverse variance method 
  # Function page: https://wviechtb.github.io/metafor/reference/metafor-package.html#the-rma-uni-function 

rma.uni(yi, vi, slab, method, digits = 2)
 Arguments: 
 - yi and vi: the observed outcomes (yi / TE) in natural logarithm and the corresponding sampling variances (vi / vTE) also calculated from lnCI-lower and lnCI-upper
 - slab: optional vector with labels for the studies (slab = study labels)
 - method: character string specifying whether a fixed- or a random/mixed-effects mode should be fitted 
   A fixed-effects model (with or without moderators) is fitted   when using method="FE"
   Random/mixed-effects models are fitted by setting method equal to one of the following: "DL", "HE", "SJ", 
   "ML", "REML", "EB", "HS", or "GENQ" (default is "REML"). See below.
  A random-effects model can be fitted with setting method to one of the various estimators for the amount of heterogeneity:
    method="DL" = DerSimonian-Laird estimator. SEE  https://pubmed.ncbi.nlm.nih.gov/26053175
    method="HE" = Hedges estimator
    method="HS" = Hunter-Schmidt estimator
    method="SJ" = Sidik-Jonkman estimator (recommended for random effect models in Weber et asl 2021 @ https://pubmed.ncbi.nlm.nih.gov/33264488)
    method="ML" = maximum-likelihood estimator
    method="REML" = restricted maximum-likelihood estimator (default). SEE: https://pubmed.ncbi.nlm.nih.gov/30067315 
    method="EB" = empirical Bayes estimator
    method="PM" = Paule-Mandel estimator
    method="GENQ" = generalized Q-statistic estimator
 - digits: integer specifying the number of decimal places to which the printed results should be rounded (if unspecified, the default is 4). You do not need more than 2 decimals in the P value or effect size.
 - note that tau-squared is t2 (check the Greek letters @ https://www.rapidtables.com/math/symbols/greek_alphabet.html)

# the  plot()  function is for the generation of the plots
plot(rma_object, qqplot = TRUE)  # produces four plots from the results object of rma(): These are a forest plot, funnel plot,   radial plot, 
                                   and a plot of the standardized residuals unless qqplot=TRUE, then, last plot is replaced by a normal QQ plot of the standardized residuals.  
# Each plot is explained here: http://www.metafor-project.org/doku.php/plots 
# Each of those four plots (and more) can be produced separately by using the following functions: 
  - funnel plots (funnel() function)
  - forest plots (forest() and addpoly() functions)
  - Baujat plots (baujat() function)
  - L'Abbe plots (labbe() function)
  - radial (Galbraith) plots (radial() function)
  - GOSH plots (gosh() function)
  - profile likelihood plots (profile() function)
  - normal quantile-quantile (QQ) plots (qqnorm() function)

# The metafor package also includes over 40 datasets from published meta-analyses that can be used for teaching and illustration purposes.

################################ RUNNING METAFOR ######################################
                                    R SCRIPT

# Start R and load the library "metafor"
# Install if necessary with: 
install.packages("metafor") 
library(metafor)		# loads the library metafor

### For the script below and folow-up scrip in metaviz.R, the input file is "ma_df.csv" derived from "HR_Variance_Calculation.xlsx" by entering the HR and its 95% CI
### The results are assigned to an R object called "results" and the rest of the script (and metaviz.R) uses that R object for lots more analyses and graphical display 

## Import the CSV file containing the results from included studies (study name, effect size, 95% CI limits, natural logarithm (ln) transformations:
df <- read.csv("data.csv")	# reads the dataset into R and names it 'df' which should include the yi (HR values-ln transformed) and vi (variance of HR-ln transformed) columns
df				# displays df (the dataset assigned to an R object)

## Run meta-analysis:
   # Here, the results are assigned to an R object called 'results', which is an rma object.

# Heterogeneity estimation method: PM
results <- rma.uni(yi = df$yi, vi = df$vi, slab = df$StudyName, method = "PM", digits = 2)
results	  # # with PM method (recommended by Veroniki et al, 2015 [https://onlinelibrary.wiley.com/doi/10.1002/jrsm.1164] and Pertopoulou & Mavridis, 2019 [https://pubmed.ncbi.nlm.nih.gov/28815652])

  # Heterogeneity estimation method: REML (which is the default if no method is specified) 
    # results <- rma.uni(yi = df$yi, vi = df$vi, slab = df$StudyName, method = "REML", digits = 2)
    # results # with REML method (recommended method by Langan et al, 2019 [https://pubmed.ncbi.nlm.nih.gov/30067315])

  # Heterogeneity estimation method: DL
    # results <- rma.uni(yi = df$yi, vi = df$vi, slab = df$StudyName, method = "DL", digits = 2)
    # results # with DL method (recommended as the alternative method by Langan et al, 2015 [https://pubmed.ncbi.nlm.nih.gov/26053175])

  # Heterogeneity estimation method: SJ
    # results <- rma.uni(yi = df$yi, vi = df$vi, slab = df$StudyName, method = "SJ", digits = 2)
    # results # with SJ model (recommended by Weber et al, 2021 [https://pubmed.ncbi.nlm.nih.gov/33264488])

# Meta-Analysis based on the Method by Henmi and Copas (2010) [https://onlinelibrary.wiley.com/doi/epdf/10.1002/sim.4029]:
  # Function page: https://rdrr.io/cran/metafor/man/hc.html
hc(results)
  # An alternative method for random effects model (with no moderators) explained in the paper by Henmi & Copas, 2010 [https://onlinelibrary.wiley.com/doi/epdf/10.1002/sim.4029]

# Permutation test (an alternative statistical result):
  # Function page: https://rdrr.io/cran/metafor/man/permutest.html
    # A very low power test when the number of studies is small (<6). Only suitable when k = 6 or greater.
permutest(results)

# To see the full results as a data frame:
results_df  <-  data.frame(results)
results_df

# To export the results data frame as a CSV:
write.csv(results_df, "results.csv")

## Plots:
   # See Charting the landscape of graphical displays for meta-analysis and systematic reviews- a comprehensive review, taxonomy, and feature analysis (BMC Med Res Meth 2020) @ https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-020-0911-9
# Forest plot:
  # Function page: https://rdrr.io/cran/metafor/man/forest.rma.html
    # The function forest.default() allows a lot of annotation (SEE: https://rdrr.io/cran/metafor/man/forest.default.html)
forest(results, addpred = TRUE, order = "obs", showweights = TRUE, header = TRUE, transf = exp)		# ordered by observed effect sizes
  #  forest(results, addpred = TRUE, order = "pre", showweights = TRUE, header = TRUE, transf = exp)	# ordered by precision (CIs) ~ weight 

# Funnel plot to assess publication bias in meta-analysis:
  # Function page: https://wviechtb.github.io/metafor/reference/funnel.html
funnell(results)
  # OR with options: 
funnel(results, refline = 0)		# centres the funnel plot to the null effect line rather than the summary effect size point estimate
funnel(results, refline2 = 0)		# superposes a second funnel plot with its centre at the effect size = null 
funnel(results, legend = TRUE)	# adds a legend for the background colour
funnel(results, yaxis = "vi")		# changes the default standard error to variance for y-axis in the plot 
funnel(results, back = "lightblue", shade = "lightgray", hlines = "pink", col = "darkblue")	# changes the colours used in the plot
  # default colours: back = "lightgray", shade = "white", hlines = "white", col = "black"
funnel(results, label = TRUE)		# displays the study labels for each study (default is label = FALSE)
funnel(results, label = "out")	# displays the study labels only for the studies falling outside the funnel 
 # funnel() with options:
funnel(results, label = "out", back = "lightblue", shade = "lightgray", hlines = "pink", col = "darkblue", refline2 = 0, legend = TRUE)

## Interpretation of funnel plot (Cochrane Handbook @ https://handbook-5-1.cochrane.org/chapter_10/10_4_1_funnel_plots.htm)
   # The name ‘funnel plot’ arises from the fact that precision of the estimated intervention effect increases as the size of the study increases (~ variance decreases). Effect estimates from small studies will therefore scatter more widely at the bottom of the graph, with the spread narrowing among larger studies. In the absence of bias, the plot should approximately resemble a symmetrical (inverted) funnel. This is illustrated in Panel A of Figure 10.4.a, in which the effect estimates in the larger studies are close to the true intervention odds ratio of 0.4.
   # If there is publication bias, for example because smaller studies without statistically significant effects (shown as open circles in Figure 10.4.a, Panel A) remain unpublished, this will lead to an asymmetrical appearance of the funnel plot with a gap in a bottom corner of the graph (Panel B). In this situation the effect calculated in a meta-analysis will tend to overestimate the intervention effect (Egger 1997a, Villar 1997). The more pronounced the asymmetry, the more likely it is that the amount of bias will be substantial.
   # X-axis is the effect size/ES (treatment effect/TE) and the y-axis may be one of several metrics (default = standard error (SE)). When the standard error/SE is used on the y axis of a funnel plot, it is conventional to reverse the axis (0 on top rather than at the bottom) so that the most precise studies are displayed at the top of the plot. 
   # This creates a funnel shape where the most precise studies (smaller SEs) with estimates of effect size near the true value are closer to the null effect (OR/RR/HR = 1) and closely scattered around it (null effect), 
   #  but those studies with large SEs (due to small sample size among other reasons) have a wider distribution of ESs. If there is no symmetry towards the bottom of the funnel, those missing studies represent the missing ones (not published due to publication bias). 
   # If no bias is present, the shape of plotted studies should be symmetrical about the true population effect size and get narrower as the sample size increases (or standard error is decreased).
   # In an ideal funnel plot, studies are all on top of the funnel (lowest SE or largest sample size; presumably also all well powered) and symmetrically distributed in the horizontal line). 
   # See examples at # metafor website: https://www.metafor-project.org/doku.php/plots:contour_enhanced_funnel_plot; 
			   # Cochrane Handbook @ https://handbook-5-1.cochrane.org/chapter_10/10_4_1_funnel_plots.htm; 
			   # Recommendations for examining and interpreting funnel plot asymmetry in meta-analyses of randomised controlled trials (BMJ 2011) @ https://www.bmj.com/content/343/bmj.d4002; 
			   # Charting the landscape of graphical displays for meta-analysis and systematic reviews- a comprehensive review, taxonomy, and feature analysis (BMC Med Res Meth 2020) @ https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-020-0911-9 

# Generating multiple plots based on the R object 'results' (an rma.uni object) using a single function:
plot(results, addpred = TRUE, showweights = TRUE, header = TRUE, transf = exp, qqplot = TRUE) 
  # generates four plots (incl. forest and funnel plots with effect sizes and CI limits transformed back to linear scale from log scale) 
    # Forest plot: overall (summary) results 
    # Funnel plot: to assess publication bias 
    # Radial plot: model validation plot assessing the distribution of residual values 
    # QQ plot: model validation plot assessing the distribution of residual values
  # options used in the function: 
    # addpred = TRUE adds the prediction interval (default is FALSE)
    # showweights = TRUE adds the weights for each study (default is FALSE)
    # header = TRUE adds the header for the columns (default is FALSE)
    # transf = exp  applies the exponential transformation (anti-log) to the values (HR etc) used in the plot (otherwise they will be in log scale)

# Contour-enhanced funnel plot: 
  # Function page: https://www.metafor-project.org/doku.php/plots:contour_enhanced_funnel_plot
funnel(results, level = c(90, 95, 99), shade = c("white", "gray55", "gray75"), refline = 0, legend = TRUE)
  # Interpretation of contour-enhanced funnel plot: 
    # Reference: Peters et al. JCE 2008 (https://www.sciencedirect.com/science/article/pii/S0895435607004350)
      # This contour overlay aids the interpretation of the funnel plot. 
      # For example, if studies appear to be missing in areas of statistical non-significance, then this adds credence to the possibility that the asymmetry may be due to publication bias. 
      # Conversely, if the supposed missing studies are in areas of higher statistical significance, this would suggest the cause of the asymmetry may be more likely to be due to factors other than publication bias, such as variable study quality."

  # Contour-Enhanced Funnel Plots for Meta-Analysis (https://journals.sagepub.com/doi/10.1177/1536867X0800800206):
      # "Although asymmetry in the appearance of a funnel plot is often interpreted as being caused by publication bias, in reality the asymmetry could be due to other factors that cause systematic differences in the results of large and small studies, for example, confounding factors such as differential study quality."
      # "Funnel plots can be enhanced by adding contours of statistical significance to aid in interpreting the funnel plot. If studies appear to be missing in areas of low statistical significance, then it is possible that the asymmetry is due to publication bias. If studies appear to be missing in areas of high statistical significance, then publication bias is a less likely cause of the funnel asymmetry."

## Validation tests:
# Trimfill method for assessing publication bias (for estimating and adjusting for the number and outcomes of missing studies):
  # Function page for trimfill(): https://rdrr.io/cran/metafor/man/trimfill.html 
trimfill(results) 
  # shows a hypothetical result after trimming extreme results and adding/filling presumed missing results
    # if this result is not considerably different from the original summary result, publication bias is unlikely

# Statistical assessment of publication bias (funnel plot asymmetry) - equivalent to meta::metabias.meta() :
 # Rank Correlation Test for Funnel Plot Asymmetry (non-parametric):
ranktest(results)
 # Output: Kendall's tau (effect size) and P value; if P < 0.05, it is evidence for publication bias (funnel plot asymmetry)

 # Regression Test for Funnel Plot Asymmetry (parametric):
regtest(results)
 # Output: test for funnel plot asymmetry with Z (effect size) and P value; if P<0.05, it is evidence for publication bias (funnel plot asymmetry)
   # Quantifying Publication Bias in Meta-Analysis: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5953768 (Egger's test and other metrics) 
   # "Various statistical tests have been proposed for publication bias in the funnel plot, such as Begg’s rank test (Begg and Mazumdar, 1994) and Egger’s regression test (Egger et al., 1997) and its extensions (e.g., Macaskill et al., 2001; Rothstein et al., 2005; Harbord et al., 2006; Peters et al., 2006). 
   # "The rank test examines the correlation between the effect sizes and their corresponding sampling variances; a strong correlation implies publication bias." 
   # "Egger’s test regresses the standardized effect sizes on their precisions; in the absence of publication bias, the regression intercept is expected to be zero." 
   # "Note that Egger's regression is equivalent to a weighted regression of the effect sizes on their standard errors, weighted by the inverse of their variances; the weighted regression’s slope, instead of the intercept, is expected to be zero in the absence of publication bias (Rothstein et al., 2005)."
     # Egger et al. Bias in meta-analysis detected by a simple, graphical test. BMJ 1997 (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2127453/pdf/9310563.pdf)

# Test of Excess Significance
  # Function page: https://wviechtb.github.io/metafor/reference/tes.html 
tes(results)
  # Output: P value for the test of excess significance (if P < 0.05, there is likely to be an excess of significant studies, i.e., publication bias) 

# Fitting selection models (selmodel) to identify the model of potential publication bias in a meta-analysis: 
  # Function page: https://wviechtb.github.io/metafor/reference/selmodel.html (see explanation in this page and the examples)
sel <- selmodel(results, type="power")		# there are other types to specify the type of selection model
sel

# The results object from  metafor  can be used as input to generate advanced visualisations using the R package metaviz (https://rdrr.io/cran/metaviz)