# WIKIPEDIA: The survival function is a function that gives the probability that a patient of interest will survive beyond any specified time. # A key assumption of the exponential survival function is that the hazard rate is constant. # This script is for a survival analysis example using the "survival" package and leukemia dataset built-in the same package (with alternative methods using the package "coin") # In this example, first "leukemia" dataset is renamed "df" and the column names are modified to make the command more informative # The standard survival analysis function: fit <- survfit(Surv(survivaltime, event) ~ group, data = df) library("survival") # load the R package "survival" data(leukemia) # load the dataset "leukemia" df <- leukemia # rename the dataset "df" (optional) df$survivaltime <- df$time # rename the time variable as "survivaltime" df$event <- df$status # rename the event variable as event df$group <- df$x # rename the treatment group as "group" ## Getting the survival object called "fit": fit <- survfit(Surv(survivaltime, event) ~ group, data = df) # model the survival data ## Kaplan-Meier Plot: plot(fit) # plot the K-M curve based on the created model summary(fit) # get the details of the survival model used to create the K-M curve # A little improvement on the basic plot: plot(fit, lty = c(1,2), col = c("green", "red"), xlab = "Time (days)", ylab = "Survival Probability", main = "An Example Survival Curve") legend(115, 1.01, legend = c("CT Maintained", "CT not Maintained"), col = c("green", "red"), lty = 1:2, cex = 0.8) # The numbers in legend(x,y) correspond to x and y coordinates where the legend box is wanted to be placed. This can be replaced by "topright" as below. # OR # plot(fit, lty = c(1,2), col = c("green", "red"), xlab = "Time (days)", ylab = "Survival Probability", main = "An Example Survival Curve") # legend("topright", legend = c("CT Maintained", "CT not Maintained"), col = c("green", "red"), lty = 1:2, cex = 0.8) ## Log-rank statistics for equality of survivor functions by survival::survdiff() # SEE: https://rdrr.io/cran/survival/man/survdiff.html surv_diff <- survdiff(Surv(survivaltime, event) ~ group, data = df) surv_diff ## Alternative way of doing a log-rank test by survival::coxph() # SEE: https://rdrr.io/cran/survival/man/coxph.html coxph_model <- coxph_model <- coxph(Surv(survivaltime, event) ~ group, data = df) # creates a coxph object # when no co-variates are included in a Cox PH model, it yields a result similar to the log-rank test summary(coxph_model) # log-rank test result (univariable Cox PH) ## Log-rank test equivalents [ coin::logrank_test() ]: # https://www.rdocumentation.org/packages/coin/versions/1.3-1/topics/SurvivalTests #logrank_test(object, ties.method = c("mid-ranks", "Hothorn-Lausen", "average-scores"), # default: "mid-ranks" # type = c("logrank", "Gehan-Breslow", "Tarone-Ware", "Prentice", # default: "log-rank" # "Prentice-Marek", "Andersen-Borgan-Gill-Keiding", # "Fleming-Harrington", "Gaugler-Kim-Liao", "Self"), # rho = NULL, gamma = NULL, … # depend on the selections abpve (defaults : NULL) # ) install.packages("coin") library(coin) logrank_test(Surv(survivaltime, event) ~ group, data = df) # log-rank statistics using default values (with coin package) logrank_test(Surv(survivaltime, event) ~ group, data = df, # using alternative methods distribution = "exact", ties.method = "average-scores", type = "Prentice") ## Cox Proportional Hazard Modelling (multivariable) # The dataset used above "leukemia" has no co-variates. # For this part the dataset survival::veteran will be used. library(survival) data(veteran) survival_object = Surv(veteran$time, veteran$status) coxph_model <- coxph(survival_object ~ trt + celltype + karno + diagtime + age + prior, data = veteran) # creates a coxph object # if we want to include interactions, they can be added to the model like any other covariate but as products using the (:) operator like: + age:karno # if we want to run a stratified analysis for the strata of any variable (for example, a variable that violates the PH assumption): # we use the strata() argument like: + strata(prior) by adding it to the covariates list summary(coxph_model) # The multivariable model shows that the trt variable (treatment) has no statistically significant association with time-to-death but the potential confounders "celltype" and "karno" are associated with time-to-death. # For more details on Cox PH analysis, see the script: survival_coxph.R # Forest plot for hazard ratios for each variable included in the model using survminer::ggforest() : # SEE: https://www.rdocumentation.org/packages/survminer/versions/0.4.9/topics/ggforest library(survminer) ggforest(coxph_model, data = veteran) ## Checking the Assumptions of Cox Proportional Hazard Modelling # See the script: survival_coxph.R ####################################################################################################### # For a more detailed survival analysis, statistics and plotting, see the script: survival_survminer.R # See also: R Graphics Manual > Survival: http://www.imsbio.co.jp/RGM/R_image_list?facetValue=Survival