###################################### ## Survival Analysis - R Tutorial ## ###################################### # Please read the tutorial's general instructions first, and prepare # for loading an external datafile in case you have one to use. # The content of this file can be pasted directly into the R console. # It should blurp lost of errors only at the point where you # are supposed to have an external dataset available. # However, it is much handier to use the "display file" command # within the R menu to look at this file, and paste command by command # to the console, using control-V. Take your time to experiment a bit # with the listed commands. # You will need the "survival" library from Terry Therneau, # and MASS for some datasets. # We load them here. library(survival) library(MASS) #--------------------------------------------- # .Rsurv: Survival Analysis in R. #--------------------------------------------- # Content: # # I. Survival Curves # II. Parametric Survival Curves # III. Cox Proportional hazards model # ---------------------- # | I. Survival Curves | # ---------------------- # Usually, data files have to be converted into survival objects. # This is done using the Surv() function data(aml) # Acute myelogenous leukemia survival data summary(aml) sresp<-Surv(aml$time, aml$status) # take a look at the result: sresp # In such a "Surv" survival object # a plus is added to censored observations s1<-survfit(sresp~x,data=aml) # Function survfit() computes an estimate of a survival curve # for censored data using either the Kaplan-Meier or the # Fleming-Harrington method. summary(s1) plot(s1,lty=2:3,col=2:3) legend(80,0.8,c("maintained","no-nmaintained"),lty=2:3,col=2:3) # crosses represent censoring times in the plot. survdiff(sresp~x,data=aml) # This carries out a log-rank test. Consult Venables and Ripley (2002) # on their appropriateness. # ------------------------- # | II. Parametric Models | # ------------------------- # These are a bit out of fashion, but useful for exploration. # Different parametric models make different assumptions concerning # the hazard rate and the survival function. Consult a book to # find out more detail. # Keep in mind that the Weibull model generalizes the exponential. data(leuk) summary(leuk) # Contains survival times of leukemia patients. # Covariates are white-blood count and ag, a test result (present or absent). summary(s2<-survreg(Surv(time)~ag*log(wbc),data=leuk,dist="exponential")) summary(s3<-survreg(Surv(time)~ag+log(wbc),data=leuk,dist="exponential")) anova(s2,s3) summary(s4<-survreg(Surv(time)~ag+log(wbc),data=leuk,dist="weibull")) # scale is nearly one , so the weibull model # is not much better than the exponential. summary(s5<-survreg(Surv(time)~ag+log(wbc),data=leuk,dist="loglogistic")) summary(s6<-survreg(Surv(time)~log(wbc),data=leuk,dist="weibull")) anova(s4,s6) # --------------------------------------- # | III. Cox Proportional hazards model | # --------------------------------------- summary(scox1<-coxph(Surv(time)~ag+log(wbc),data=leuk)) update(scox1,~.-1) # remove the intercept. summary(scox2<-coxph(Surv(time)~strata(ag)+log(wbc),data=leuk)) # Coxph() fits a Cox proportional hazards regression model. # Here we see the command strata() appear, which means that we # fit separate baseline hazards per "ag" stratum. # Time dependent variables, time dependent strata, multiple events per subject, # and other extensions can incorporated using the counting process # formulation of Andersen and Gill. # Consult Therneau and Grambsch, Venables and Ripley (2002) for examples. #--------------------------------------------------------------------- # Most of the material in this short tutorial comes from # Modern Applied Statistics with S # by W. N. Venables and B. D. Ripley (2002) # T. M. Therneau and P. M. Grambsch (2000) # Modelling Survival Data. # Tom Van Dooren, version 16/10/2002