# Cox proportional hazard (PH) modeling with library(survival)and data(veteran) # FROM: https://rviews.rstudio.com/2017/09/25/survival-analysis-with-r # Kleinbaum, D.G. and Klein, M. Survival Analysis, A Self Learning Text Springer (2005) also uses veterans dataset as an example # USAGE: coxph(survival_object, data, method) # survival_object = Surv(time, event); event = status variable; method = "efron" by default; other options are "breslow" and "exact". # SEE: http://www.sthda.com/english/wiki/cox-proportional-hazards-model, especially for the interpretation of multivariable Cox proportional hazard model results # Generic functions in R such as "summary()" and "plot()" are used in conjunction with survival functions to display survival estimates and plots. # A sample script to do a multivariable Cox PH analysis using the dataset veteran (within the survival package): library(survival) data(veteran) args(coxph) # check the arguments of coxph() coxph_model <- coxph(Surv(time, status) ~ trt + celltype + karno + diagtime + age + prior, data = veteran) # trt is the treatment group variable (like caco) # celltype + karno + diagtime + age + prior are the co-variates (potential confounders) summary(coxph_model) # OR library(survival) data(veteran) survival_object = Surv(veteran$time, veteran$status) coxph_model <- coxph(survival_object ~ trt + celltype + karno + diagtime + age + prior, data = veteran) # creates a coxph object # if we want to include interactions, they can be added to the model like any other covariate but as products using the (:) operator like: + age:karno # if we want to run a stratified analysis for the strata of any variable (for example, a variable that violates the PH assumption): # we use the strata() argument like: + strata(prior) by adding it to the covariates list summary(coxph_model) # The multivariable model shows that the trt variable (treatment) has no statistically significant association with time-to-death but the potential confounders "celltype" and "karno" are associated with time-to-death. # Forest plot for hazard ratios for each variable included in the model using survminer::ggforest() : # SEE: https://www.rdocumentation.org/packages/survminer/versions/0.4.9/topics/ggforest library(survminer) ggforest(coxph_model, data = veteran) ## Testing proportional hazards assumption # URL: http://www.sthda.com/english/wiki/cox-model-assumptions # See also: https://stats.idre.ucla.edu/r/examples/asa/r-applied-survival-analysis-ch-6 # The proportional hazards (PH) assumption can be checked using statistical tests and graphical diagnostics based on the scaled Schoenfeld residuals. # In principle, the Schoenfeld residuals are independent of time. A plot that shows a non-random pattern against time is evidence of violation of the PH assumption. # The function cox.zph() [in the survival package] provides a convenient solution to test the proportional hazards assumption for each covariate included in a Cox regression model fit (using Schoenfeld residuals). # For each covariate, the function cox.zph() correlates the corresponding set of scaled Schoenfeld residuals with time, to test for independence between residuals and time. Additionally, it performs a global test for the model as a whole. # The proportional hazard assumption is supported by a non-significant relationship between residuals and time, and refuted by a significant relationship. check.ph <- cox.zph(coxph) check.ph # P values for some covariates and global P are <0.05 suggesting the violation of time-independence of PH assumption plot(check.ph) # For graphocal inspection of the PH assumption by plotting the Schoenfeld residuals # Systematic departures from a horizontal line are indicative of non-PHs, therefore, violation of Cox model assumption that estimates ß1,ß2,ß3 (y-axis) do not vary much over time. # var argument of cox.zph(): The set of variables for which plots are desired. # By default, plots are produced in turn for each variable of a model. This may result in displaying the last variable's plot only. # Selection of a single variable allows other features to be added to the plot, e.g., a horizontal line at zero or a main title. # Usage of the var argument: plot(check.ph, var = 1) # plot for trt plot(check.ph, var = 2) # plot for celltype plot(check.ph, var = 3) # plot for karno (which shows violation of the PH assumption) plot(check.ph, var = 4) # plot for diagtime plot(check.ph, var = 5) # plot for age plot(check.ph, var = 6) # plot for prior (the last covariate in the model) ## It is possible to do a graphical diagnostic using the function ggcoxzph() [in the survminer package], which produces, for each covariate, graphs of the scaled Schoenfeld residuals against the transformed time. # ggcoxzph() also produces P values for the violation of the PH assumption (check the P value for karno co-variate) library(survminer) ggcoxzph(check.ph) ## Testing influential observations # To test influential observations or outliers, the function ggcoxdiagnostics()[in survminer package] provides a convenient solution for checking influential observations. #### Springer book p. 93: Fig. 7.3 Change in coefficient (dfbeta) residuals for age  #### The plot is shown in Fig. 7.3. There we see that no single patient changes the estimate of the “age” coefficient by more than 0.003, which is less than 10% of the value of that coefficient. Still, we see that patients 46 and 68 have the most influence over the parameter estimate for age, and one may check these data points to ensure that there are no errors in recording the data.  ggcoxdiagnostics(coxph, type = "dfbeta", linear.predictions = TRUE) # fit: an object of class coxph.object # type: the type of residuals to present on Y axis. Allowed values include one of c("martingale", "deviance", "score", "schoenfeld", "dfbeta", "dfbetas", "scaledsch", "partial"). Default is martingale. # Specifying the argument type = “dfbeta”, plots the estimated changes in the regression coefficients upon deleting each observation in turn. # linear.predictions: a logical value indicating whether to show linear predictions for observations (TRUE) or just indexed of observations (FALSE) on X axis. # It’s also possible to check outliers by visualizing the deviance residuals. The deviance residual is a normalized transform of the martingale residual. These residuals should be roughtly symmetrically distributed about zero with a standard deviation of 1. # Positive values correspond to individuals that “died too soon” compared to expected survival times. # Negative values correspond to individual that “lived too long”. # Very large or small values are outliers, which are poorly predicted by the model. ggcoxdiagnostics(coxph, type = "deviance", linear.predictions = TRUE) ## Testing non-linearity # Plotting the Martingale residuals against continuous covariates is a common approach used to detect nonlinearity. # To assess the functional form of a continuous variable in a Cox proportional hazards model, the function ggcoxfunctional() [in the survminer R package] is used. # The function ggcoxfunctional() displays graphs of continuous covariates against martingale residuals of null cox proportional hazards model. # This might help to properly choose the functional form of continuous variable in the Cox model. Fitted lines with lowess function should be linear to satisfy the Cox proportional hazards model assumptions. ggcoxfunctional(Surv(time, status) ~ age + log(age) + sqrt(age), data = veteran) # There seems to be slight non-linearity for the age variable # To fit a non-linear (parametric) PH model, Weibull method can be used (in coxph() function) ## Running parametric models: # The survreg() function runs parametric accelerated failure time (AFT) models. # The key assumption for an AFT model is not hazard ratios are constant over time, but instead survival time accelerates (or decelerates) by a constant factor when comparing different levels of covariates. # The "parametric" AFT model uses the Weibull distribution, which is a special case of exponential distribution (with the parameter p = 1) # A Weibull AFT model is run with the survreg() function: survival_object = Surv(veteran$time, veteran$status) coxph.weibull <- survreg(survival_object ~ trt + celltype + karno + diagtime + age + prior, data = veteran, dist = "weibull") # runs the Weibull AFT model summary(coxph.weibull) # the estimate for the Weibull shape parameter is obtained by taking the reciprocal of the scale factor in this output # the larger the shape parameter (>1), the shorter the tail on the right of the Weibull distribution # the smaller the shape parameter (<1), the longer the tail on the right of the Weibull distribution # values near 1 indicates no difference from exponential distribution ## Overplotting Non-parametric (K-M) vs Parametric (Weibull) survival models ## FROM: https://stackoverflow.com/questions/9151591/how-to-plot-the-survival-curve-generated-by-survreg-package-survival-of-r # Using the dataset "veteran" # create a Surv object survobj <- with(veteran, Surv(time, status)) # plot kaplan-meier estimate, per treatment group (i.e., trt variable) fKM <- survfit(survobj ~ as.factor(trt), data = veteran) plot(fKM) # plot Cox PH survival curves, per trt sCox <- coxph(survobj ~ as.factor(trt), data=veteran) lines(survfit(sCox, newdata=data.frame(trt=1)), col='green') lines(survfit(sCox, newdata=data.frame(trt=2)), col='green') # plot weibull survival curves, per sex, sWei <- survreg(survobj ~ as.factor(trt), dist='weibull', data=veteran) lines(predict(sWei, newdata=list(trt=1), type="quantile", p=seq(.01,.99,by=.01)), seq(.99,.01,by=-.01), col="red") lines(predict(sWei, newdata=list(trt=2), type="quantile", p=seq(.01,.99,by=.01)), seq(.99,.01,by=-.01), col="red") * * * * * * ## Using the dataset "lung" # create a Surv object s <- with(lung, Surv(time, status)) # plot kaplan-meier estimate, per sex fKM <- survfit(s ~ sex, data=lung) plot(fKM) # plot Cox PH survival curves, per sex sCox <- coxph(s ~ as.factor(sex), data=lung) lines(survfit(sCox, newdata=data.frame(sex=1)), col='green') lines(survfit(sCox, newdata=data.frame(sex=2)), col='green') # plot Weibull survival curves, per sex sWei <- survreg(s ~ as.factor(sex), dist='weibull', data=lung) lines(predict(sWei, newdata=list(sex=1), type="quantile", p=seq(.01, .99, by = .01)), seq(.99, .01, by = -.01), col="red") lines(predict(sWei, newdata=list(sex=2), type="quantile", p=seq(.01, .99, by = .01)), seq(.99, .01, by = -.01), col="red") ############################################## SEARCH GOOGLE using "R plot for coxph.weibull" - http://rstudio-pubs-static.s3.amazonaws.com/5560_c24449c468224fd4af9f3e512a24e07d.html - https://cran.r-project.org/web/packages/SurvRegCensCov/vignettes/weibull.pdf - https://stackoverflow.com/questions/9151591/how-to-plot-the-survival-curve-generated-by-survreg-package-survival-of-r - https://rstudio-pubs-static.s3.amazonaws.com/5588_72eb65bfbe0a4cb7b655d2eee0751584.html READ: - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6015946 - https://www.r-bloggers.com/survival-analysis-1 & https://www.r-bloggers.com/survival-analysis-2 - https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-020-3478-x - http://www.sthda.com/english/wiki/survminer-r-package-survival-data-analysis-and-visualization - Simsurv video: https://www.youtube.com/watch?v=fJTYsncvpvI