# WIKIPEDIA: The survival function is a function that gives the probability that a patient of interest will survive beyond any specified time.
# A key assumption of the exponential survival function is that the hazard rate is constant.
# This script is for a survival analysis example using the "survival" package and leukemia dataset built-in the same package (with alternative methods using the package "coin")
# In this example, first "leukemia" dataset is renamed "df" and the column names are modified to make the command more informative
# The standard survival analysis function: fit <- survfit(Surv(survivaltime, event) ~ group, data = df)
library("survival") # load the R package "survival"
data(leukemia) # load the dataset "leukemia"
df <- leukemia # rename the dataset "df" (optional)
df$survivaltime <- df$time # rename the time variable as "survivaltime"
df$event <- df$status # rename the event variable as event
df$group <- df$x # rename the treatment group as "group"
## Getting the survival object called "fit":
fit <- survfit(Surv(survivaltime, event) ~ group, data = df)
# model the survival data
## Kaplan-Meier Plot:
plot(fit) # plot the K-M curve based on the created model
summary(fit) # get the details of the survival model used to create the K-M curve
# A little improvement on the basic plot:
plot(fit, lty = c(1,2), col = c("green", "red"), xlab = "Time (days)", ylab = "Survival Probability", main = "An Example Survival Curve")
legend(115, 1.01, legend = c("CT Maintained", "CT not Maintained"), col = c("green", "red"), lty = 1:2, cex = 0.8)
# The numbers in legend(x,y) correspond to x and y coordinates where the legend box is wanted to be placed. This can be replaced by "topright" as below.
# OR
# plot(fit, lty = c(1,2), col = c("green", "red"), xlab = "Time (days)", ylab = "Survival Probability", main = "An Example Survival Curve")
# legend("topright", legend = c("CT Maintained", "CT not Maintained"), col = c("green", "red"), lty = 1:2, cex = 0.8)
## Log-rank statistics for equality of survivor functions by survival::survdiff()
# SEE: https://rdrr.io/cran/survival/man/survdiff.html
surv_diff <- survdiff(Surv(survivaltime, event) ~ group, data = df)
surv_diff
## Alternative way of doing a log-rank test by survival::coxph()
# SEE: https://rdrr.io/cran/survival/man/coxph.html
coxph_model <- coxph_model <- coxph(Surv(survivaltime, event) ~ group, data = df) # creates a coxph object
# when no co-variates are included in a Cox PH model, it yields a result similar to the log-rank test
summary(coxph_model) # log-rank test result (univariable Cox PH)
## Log-rank test equivalents [ coin::logrank_test() ]:
# https://www.rdocumentation.org/packages/coin/versions/1.3-1/topics/SurvivalTests
#logrank_test(object, ties.method = c("mid-ranks", "Hothorn-Lausen", "average-scores"), # default: "mid-ranks"
# type = c("logrank", "Gehan-Breslow", "Tarone-Ware", "Prentice", # default: "log-rank"
# "Prentice-Marek", "Andersen-Borgan-Gill-Keiding",
# "Fleming-Harrington", "Gaugler-Kim-Liao", "Self"),
# rho = NULL, gamma = NULL, … # depend on the selections abpve (defaults : NULL)
# )
install.packages("coin")
library(coin)
logrank_test(Surv(survivaltime, event) ~ group, data = df) # log-rank statistics using default values (with coin package)
logrank_test(Surv(survivaltime, event) ~ group, data = df, # using alternative methods
distribution = "exact", ties.method = "average-scores", type = "Prentice")
## Cox Proportional Hazard Modelling (multivariable)
# The dataset used above "leukemia" has no co-variates.
# For this part the dataset survival::veteran will be used.
library(survival)
data(veteran)
survival_object = Surv(veteran$time, veteran$status)
coxph_model <- coxph(survival_object ~ trt + celltype + karno + diagtime + age + prior, data = veteran) # creates a coxph object
# if we want to include interactions, they can be added to the model like any other covariate but as products using the (:) operator like: + age:karno
# if we want to run a stratified analysis for the strata of any variable (for example, a variable that violates the PH assumption):
# we use the strata() argument like: + strata(prior) by adding it to the covariates list
summary(coxph_model)
# The multivariable model shows that the trt variable (treatment) has no statistically significant association with time-to-death but the potential confounders "celltype" and "karno" are associated with time-to-death.
# For more details on Cox PH analysis, see the script: survival_coxph.R
# Forest plot for hazard ratios for each variable included in the model using survminer::ggforest() :
# SEE: https://www.rdocumentation.org/packages/survminer/versions/0.4.9/topics/ggforest
library(survminer)
ggforest(coxph_model, data = veteran)
## Checking the Assumptions of Cox Proportional Hazard Modelling
# See the script: survival_coxph.R
#######################################################################################################
# For a more detailed survival analysis, statistics and plotting, see the script: survival_survminer.R
# See also: R Graphics Manual > Survival: http://www.imsbio.co.jp/RGM/R_image_list?facetValue=Survival