## FROM: http://www.sthda.com/english/wiki/survival-analysis-basics
# R package "survival": https://cran.r-project.org/web/packages/survival/vignettes/survival.pdf
# R package "survminer": https://cran.r-project.org/web/packages/survminer/readme/README.html
# STHDA guide to survival analysis: http://www.sthda.com/english/wiki/survival-analysis-basics
# STHDA guide for "survminer": http://www.sthda.com/english/wiki/survminer-r-package-survival-data-analysis-and-visualization
## Install the packages if not already installed
install.packages(c("survival", "survminer"))
## Load the packages
library("survival")
library("survminer")
## Load the dataset "lung" to be used in the example below
data(lung) # loads the dataset
head(lung) # shows the top six rows for information
# Fields in the dataset
# inst: Institution code
# time: Survival time in days
# status: censoring status 1=censored, 2=dead
# "censored" includes survived ones till the end of follow-up
# age: Age in years
# sex: Male=1 Female=2
# ph.ecog: ECOG performance score (0=good - 5=bad)
# ph.karno: Karnofsky performance score (bad=0 to good=100) rated by physicians
# pat.karno: Karnofsky performance score as rated by patients
# meal.cal: Calories consumed at meals
# wt.loss: Weight loss in last six months
## Compute survival curves for each sex with survfit()
fit <- survfit(Surv(time, status) ~ sex, data = lung)
print(fit) # prints the results (n, events, median and 95% CI for each stratum)
## Summary of survival curves
summary(fit) # "fit" is the name given to the model above
## Access to the short summary table
summary(fit)$table
# The median survival times for each group represent the time at which the survival probability, S(t), is 0.5
## Plotting survival curve
# Change color, linetype by strata, risk.table color by strata
ggsurvplot(fit,
pval = TRUE, conf.int = TRUE,
risk.table = TRUE, # Add risk table
risk.table.col = "strata", # Change risk table color by groups
linetype = "strata", # Change line type by groups
surv.median.line = "hv", # Specify median survival
ggtheme = theme_bw(), # Change ggplot2 theme
palette = c("#E7B800", "#2E9FDF")
)
## Customising the survival curves
ggsurvplot(
fit, # survfit object with calculated statistics generated above (using survfit function)
pval = TRUE, # show p-value of log-rank test
conf.int = TRUE, # show confidence intervals for
# point estimaes of survival curves
conf.int.style = "step", # customize style of confidence intervals
xlab = "Time in days", # customize X axis label
break.time.by = 200, # break X axis in time intervals by 200
ggtheme = theme_light(), # customize plot and risk table with a theme
risk.table = "abs_pct", # absolute number and percentage at risk
risk.table.y.text.col = T, # colour risk table text annotations
risk.table.y.text = FALSE, # show bars instead of names in text annotations
# in legend of risk table
ncensor.plot = TRUE, # plot the number of censored subjects at time t
surv.median.line = "hv", # add the median survival pointer
legend.labs =
c("Male", "Female"), # change legend labels
palette =
c("#E7B800", "#2E9FDF") # custom color palettes
)
## Shortening the survival curve (from 1000 to 600 days) using the argument xlim
ggsurvplot(fit,
conf.int = TRUE,
risk.table.col = "strata", # Change risk table color by groups
ggtheme = theme_bw(), # Change ggplot2 theme
palette = c("#E7B800", "#2E9FDF"),
xlim = c(0, 600))
## Plotting cumulative events
ggsurvplot(fit,
conf.int = TRUE,
risk.table.col = "strata", # Change risk table color by groups
ggtheme = theme_bw(), # Change ggplot2 theme
palette = c("#E7B800", "#2E9FDF"),
fun = "event")
## Log-rank statistics for equality of survivor functions
## Comparing survival curves between males and females with log-rank statistics
surv_diff <- survdiff(Surv(time, status) ~ sex, data = lung)
surv_diff
# * * * * * * * * * #
#### A much more basic survival analysis and plot from the R script "survival.R"
# The standard survival analysis function: fit <- survfit(Surv(survivaltime, event) ~ group, data = df)
library("survival")
data(leukemia)
str(leukemia)
#'data.frame': 23 obs. of 3 variables:
# time : num 9 13 13 18 23 28 31 34 45 48 ...
# status: num 1 1 0 1 1 0 1 1 0 1 ...
# x : Factor w/ 2 levels "Maintained","Nonmaintained": 1 1 1 1 1 1 1 1 1 1 ...
# In the dataset "leukemia"
# time: survival time
# status: censoring status 1=censored, 2=dead
# x: the two groups to be compared (factor)
surv_diff <- survdiff(Surv(time, status) ~ x, data = leukemia) # log-rank test
surv_diff # statistics results
surv_fit <- survfit(Surv(time, status) ~ x, data = leukemia) # An R object to be used in plotting (K-M plot)
surv_fit # prints the results (n, events, median and 95% CI for each stratum)
plot(surv_fit) # simple K-M plot
## Survival Analysis Links:
# OpenIntro: https://www.openintro.org/download.php?file=survival_analysis_in_R
# https://www.datacamp.com/community/tutorials/survival-analysis-R
# https://www.emilyzabor.com/tutorials/survival_analysis_in_r_tutorial.html
# https://www.statmethods.net/stats/power.html
# https://rviews.rstudio.com/2017/09/25/survival-analysis-with-r/
# http://www.sthda.com/english/wiki/survival-analysis-basics
# https://stats.idre.ucla.edu/r/examples/asa/r-applied-survival-analysis-ch-2/
# https://www.r-bloggers.com/survival-analysis-with-r/
# https://www.r-bloggers.com/steps-to-perform-survival-analysis-in-r/
# http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/R/R7_LogisticRegression-Survival/R7_LogisticRegression-Survival4.html
# R Packages for Survival Analysis:
# https://cran.r-project.org/web/packages/survival/vignettes/survival.pdf
# https://cran.r-project.org/web/packages/survminer/readme/README.html
# https://rdrr.io/cran/gsDesign/man/nSurvival.html