## FROM: http://www.sthda.com/english/wiki/survival-analysis-basics # R package "survival": https://cran.r-project.org/web/packages/survival/vignettes/survival.pdf # R package "survminer": https://cran.r-project.org/web/packages/survminer/readme/README.html # STHDA guide to survival analysis: http://www.sthda.com/english/wiki/survival-analysis-basics # STHDA guide for "survminer": http://www.sthda.com/english/wiki/survminer-r-package-survival-data-analysis-and-visualization ## Install the packages if not already installed install.packages(c("survival", "survminer")) ## Load the packages library("survival") library("survminer") ## Load the dataset "lung" to be used in the example below data(lung) # loads the dataset head(lung) # shows the top six rows for information # Fields in the dataset # inst: Institution code # time: Survival time in days # status: censoring status 1=censored, 2=dead # "censored" includes survived ones till the end of follow-up # age: Age in years # sex: Male=1 Female=2 # ph.ecog: ECOG performance score (0=good - 5=bad) # ph.karno: Karnofsky performance score (bad=0 to good=100) rated by physicians # pat.karno: Karnofsky performance score as rated by patients # meal.cal: Calories consumed at meals # wt.loss: Weight loss in last six months ## Compute survival curves for each sex with survfit() fit <- survfit(Surv(time, status) ~ sex, data = lung) print(fit) # prints the results (n, events, median and 95% CI for each stratum) ## Summary of survival curves summary(fit) # "fit" is the name given to the model above ## Access to the short summary table summary(fit)$table # The median survival times for each group represent the time at which the survival probability, S(t), is 0.5 ## Plotting survival curve # Change color, linetype by strata, risk.table color by strata ggsurvplot(fit, pval = TRUE, conf.int = TRUE, risk.table = TRUE, # Add risk table risk.table.col = "strata", # Change risk table color by groups linetype = "strata", # Change line type by groups surv.median.line = "hv", # Specify median survival ggtheme = theme_bw(), # Change ggplot2 theme palette = c("#E7B800", "#2E9FDF") ) ## Customising the survival curves ggsurvplot( fit, # survfit object with calculated statistics generated above (using survfit function) pval = TRUE, # show p-value of log-rank test conf.int = TRUE, # show confidence intervals for # point estimaes of survival curves conf.int.style = "step", # customize style of confidence intervals xlab = "Time in days", # customize X axis label break.time.by = 200, # break X axis in time intervals by 200 ggtheme = theme_light(), # customize plot and risk table with a theme risk.table = "abs_pct", # absolute number and percentage at risk risk.table.y.text.col = T, # colour risk table text annotations risk.table.y.text = FALSE, # show bars instead of names in text annotations # in legend of risk table ncensor.plot = TRUE, # plot the number of censored subjects at time t surv.median.line = "hv", # add the median survival pointer legend.labs = c("Male", "Female"), # change legend labels palette = c("#E7B800", "#2E9FDF") # custom color palettes ) ## Shortening the survival curve (from 1000 to 600 days) using the argument xlim ggsurvplot(fit, conf.int = TRUE, risk.table.col = "strata", # Change risk table color by groups ggtheme = theme_bw(), # Change ggplot2 theme palette = c("#E7B800", "#2E9FDF"), xlim = c(0, 600)) ## Plotting cumulative events ggsurvplot(fit, conf.int = TRUE, risk.table.col = "strata", # Change risk table color by groups ggtheme = theme_bw(), # Change ggplot2 theme palette = c("#E7B800", "#2E9FDF"), fun = "event") ## Log-rank statistics for equality of survivor functions ## Comparing survival curves between males and females with log-rank statistics surv_diff <- survdiff(Surv(time, status) ~ sex, data = lung) surv_diff # * * * * * * * * * # #### A much more basic survival analysis and plot from the R script "survival.R" # The standard survival analysis function: fit <- survfit(Surv(survivaltime, event) ~ group, data = df) library("survival") data(leukemia) str(leukemia) #'data.frame': 23 obs. of 3 variables: # time : num 9 13 13 18 23 28 31 34 45 48 ... # status: num 1 1 0 1 1 0 1 1 0 1 ... # x : Factor w/ 2 levels "Maintained","Nonmaintained": 1 1 1 1 1 1 1 1 1 1 ... # In the dataset "leukemia" # time: survival time # status: censoring status 1=censored, 2=dead # x: the two groups to be compared (factor) surv_diff <- survdiff(Surv(time, status) ~ x, data = leukemia) # log-rank test surv_diff # statistics results surv_fit <- survfit(Surv(time, status) ~ x, data = leukemia) # An R object to be used in plotting (K-M plot) surv_fit # prints the results (n, events, median and 95% CI for each stratum) plot(surv_fit) # simple K-M plot ## Survival Analysis Links: # OpenIntro: https://www.openintro.org/download.php?file=survival_analysis_in_R # https://www.datacamp.com/community/tutorials/survival-analysis-R # https://www.emilyzabor.com/tutorials/survival_analysis_in_r_tutorial.html # https://www.statmethods.net/stats/power.html # https://rviews.rstudio.com/2017/09/25/survival-analysis-with-r/ # http://www.sthda.com/english/wiki/survival-analysis-basics # https://stats.idre.ucla.edu/r/examples/asa/r-applied-survival-analysis-ch-2/ # https://www.r-bloggers.com/survival-analysis-with-r/ # https://www.r-bloggers.com/steps-to-perform-survival-analysis-in-r/ # http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/R/R7_LogisticRegression-Survival/R7_LogisticRegression-Survival4.html # R Packages for Survival Analysis: # https://cran.r-project.org/web/packages/survival/vignettes/survival.pdf # https://cran.r-project.org/web/packages/survminer/readme/README.html # https://rdrr.io/cran/gsDesign/man/nSurvival.html