follic=read.table('c:/office/ProfDev/AnnieRobert/CRex/follic.csv', sep=',',header=T,na.strings='.') ## use dftime as time variable cens=(follic$rcens==1)+2*(follic$rcens==0 & follic$stat==1) follic$rmtime=apply(cbind(follic$dftime,follic$maltime),1,min) follic$rcens2=follic$rcens*(follic$dftime<=follic$maltime) cens2=(follic$rcens2==1)+ 2*(follic$rcens2==0 & (follic$stat==1 | follic$mcens==1)) library(cmprsk) par(las=1) ### a) fit=cuminc(follic$dftime,cens) forplot=list(list(fit$'1 1'$time,fit$'1 1'$est)) plot.cuminc(forplot,wh=c(1,2),xlim=c(0,35),ylim=c(0,1), xlab='Time to relapse',ylab='Probability of relapse') #### b) fit=cuminc(follic$rmtime,cens2) forplot=list(list(fit$'1 1'$time,fit$'1 1'$est)) ## to superimpose par(new=T) plot.cuminc(forplot,wh=c(1,2),xlim=c(0,35),ylim=c(0,1),lty=2, xlab='Time to relapse',ylab='Probability of relapse') #### c) fitkm=survfit(Surv(dftime,rcens)~1,data=follic) lines(fitkm,col='red',fun='event',mark.time=F) #### d) fitkm=survfit(Surv(rmtime,rcens2)~1,data=follic) lines(fitkm,col='red',lty=2,fun='event',mark.time=F) legend(0,1,lty=c(1,2,1,2),bty='n',col=c('black','black','red','red'), legend=c('Only death as CR','Death and second malignancy as CR', '1-KM for a)','1-KM for b)')) ### e) fit=cuminc(follic$dftime,cens) at10=timepoints(fit,times=10) at10 est=1-at10$est[1] se=sqrt(at10$var[1]) alpha=0.05 zalpha=qnorm(1-alpha/2) A=zalpha*se/(est*log(est)) L1=est*(exp(A)) L2=est*(exp(-A)) c(est,L1,L2) 1-c(est,L1,L2) ## estimator 1-est ## the two limits of the confidence intervals 1-L1 1-L2 ####2) x=(follic$age>60)+0 fit=cuminc(follic$dftime,cens,x) fit ## plot for relapse forplot=list(list(fit$'0 1'$time,fit$'0 1'$est), list(fit$'1 1'$time,fit$'1 1'$est)) plot.cuminc(forplot,wh=c(0,1),lty=c(1,2), curvlab=c('Age<=60','Age>60'), xlab='Time to relapse',ylab='Probability of relapse') ## plot for competing risks forplot=list(list(fit$'0 2'$time,fit$'0 2'$est), list(fit$'1 2'$time,fit$'1 2'$est)) plot.cuminc(forplot,wh=c(0,1),lty=c(1,2), curvlab=c('Age<=60','Age>60'), xlab='Time to death without relapse', ylab='Probability of death without a relapse') ## 3) x=cbind(age=follic$age, stage=follic$clinstg, bulk=follic$blktxcat) fit=crr(follic$dftime,cens,x) fit coef=fit$coef se=sqrt(diag(fit$var)) hr=exp(coef) pv=2*(1-pnorm(abs(coef/se))) data.frame(var=colnames(x),coef,hr,se,pv) px=cbind(age=c(30,60),stage=c(1,2),bulk=c(0,1)) px pfit=predict.crr(fit,cov1=px) plot.predict.crr(pfit,lty=c(1,2), xlab='Time to relapse', ylab='Predicted probability of relapse') legend(0,0.75,lty=c(1,2),bty='n', legend=c('30 years old, stage I, without bulk', '60 years old, stage II with bulk')) ## 4) x=cbind(age=follic$age, stage=follic$clinstg, bulk=follic$blktxcat) fit=crr(follic$dftime,cens,x) fit coef=fit$coef se=sqrt(diag(fit$var)) hr=exp(coef) pv=2*(1-pnorm(abs(coef/se))) data.frame(var=colnames(x),coef,hr,se,pv) ## a) plot(fit$uftime,fit$res[,1],xlab='Unique failure times', ylab='Schoenfeld residuals',main='Age') lines(lowess(fit$uftime,fit$res[,1],iter=0)) plot(fit$uftime,fit$res[,2],xlab='Unique failure times', ylab='Schoenfeld residuals',main='Clinical stage') lines(lowess(fit$uftime,fit$res[,2],iter=0)) plot(fit$uftime,fit$res[,3],xlab='Unique failure times', ylab='Schoenfeld residuals',main='Bulk') lines(lowess(fit$uftime,fit$res[,3],iter=0)) ## b) tf=function(x) { y=cbind(x,x,x) return(y) } fitT=crr(follic$dftime,cens,cov1=x,cov2=x,tf=tf) fitT coef=fitT$coef se=sqrt(diag(fitT$var)) hr=exp(coef) pv=2*(1-pnorm(abs(coef/se))) data.frame(var=c(colnames(x),paste(colnames(x),'*time')),coef,hr,se,pv) ## c) fit=crr(follic$dftime,cens,cov1=x) fitT=crr(follic$dftime,cens,cov1=x,cov2=x,tf=tf) px=cbind(age=c(30,60),stage=c(1,2),bulk=c(0,1)) px pfit=predict.crr(fit,cov1=px) plot.predict.crr(pfit,lty=c(1,2),xlim=c(0,25),ylim=c(0,1), xlab='Time to relapse', ylab='Predicted probability of relapse') legend(0,1,lty=c(1,2),bty='n', legend=c('30 years old, stage I, without bulk', '60 years old, stage II with bulk')) par(new=T) pfitT=predict.crr(fitT,cov1=px,cov2=px) plot.predict.crr(pfitT,lty=c(1,2),xlim=c(0,25),ylim=c(0,1),col='red', xlab='Time to relapse', ylab='Predicted probability of relapse') legend(15,1,lty=c(1,2),bty='n',col='red',text.col='red', legend=c('30 years old, stage I, without bulk', '60 years old, stage II with bulk')) ## 5) fit=coxph(Surv(dftime,rcens)~age+clinstg+blktxcat+ch,data=follic) summary(fit) ###Sample size calculation ## 1 alpha=0.05 zalpha=qnorm(1-alpha/2) beta=0.2 zbeta=qnorm(1-beta) HR=1.8 nev=(((zalpha+zbeta)*2)/log(HR))**2 nev ## 2 cens=(follic$rcens==1)+2*(follic$rcens==0 & follic$stat==1) fit=survreg(Surv(dftime,(cens==1))~1,data=follic,subset=(ch=='Y'),dist='exponential') hev=exp(-fit$coef) fit=survreg(Surv(dftime,(cens==2))~1,data=follic,subset=(ch=='Y'),dist='exponential') hcr=exp(-fit$coef) hev2=hev/HR hcr2=hcr ## 3 and 4 Fpev=function(a,f,hev,hcr,hev2,hcr2) { h=hev+hcr pev=hev/h*(1-(exp(-h*f)-exp(-h*(a+f)))/(a*h)) h2=hev2+hcr2 pev2=hev2/h2*(1-(exp(-h2*f)-exp(-h2*(a+f)))/(a*h2)) pevm=(pev+pev2)/2 return(pevm) } pev=Fpev(6,2,hev,hcr,hev2,hcr2) pev nev/pev pev=Fpev(6,3,hev,hcr,hev2,hcr2) nev/pev pev=Fpev(7,2,hev,hcr,hev2,hcr2) nev/pev pev=Fpev(7,3,hev,hcr,hev2,hcr2) nev/pev ## 5 , also assume that the hcr is the same in the two trt arms ## also assume that the time to event of interest is independent to the time for competing risks ## and that the HR refers to the ratio of the subhazards t0=5 Fev=hev/(hev+hcr)*(1-exp(-(hev+hcr)*t0)) Fev Fev2=hev2/(hev2+hcr2)*(1-exp(-(hev2+hcr2)*t0)) Fev2 ## 6 fsim=function(a,f,hev,hcr,HR,n) { hev2=hev/HR hcr2=hcr timecens=runif((2*n),f,a+f) timeevent=c(rexp(n,hev),rexp(n,hev2)) timecr=c(rexp(n,hcr),rexp(n,hcr)) time=apply(cbind(timecens,timeevent,timecr),1,min) cens=(time==timeevent)+2*(time==timecr) trt=rep(c(1,0),each=n) fit=coxph(Surv(time,(cens==1))~trt) coef=fit$coef pv=summary(fit)$coef[5] hr=exp(coef) fit=crr(time,cens,trt) coefcr=fit$coef pvcr=1-pchisq(fit$coef^2/fit$var,1) hrcr=exp(coefcr) nev=sum(cens==1) ncr=sum(cens==2) result=data.frame(nev,coef,hr,pv,ncr,coefcr,hrcr,pvcr) return(result) } result1=data.frame(nev=0,coef=0,hr=0,pv=0,ncr=0,coefcr=0,hrcr=0,pvcr=0) nsim=1000 set.seed(1) for (i in 1:nsim) { result1[i,]=fsim(a=7,f=3,hev=0.063,hcr=0.014,HR=1.8,n=175) } power=sum(result1$pv<=0.05)/nsim effectsize=exp(mean(result1$coef)) nev=mean(result1$nev) powercr=sum(result1$pvcr<=0.05)/nsim effectsizecr=exp(mean(result1$coefcr)) ncr=mean(result1$ncr) follic=read.table('c:/office/ProfDev/AnnieRobert/CRex/follic.csv', sep=',',header=T,na.strings='.') names(follic) library(cmprsk) par(las=1) ## par sets plotting parameters ## 1 a) fit=survfit(Surv(survtime,stat)~1,data=follic,conf.type='log-log') plot(fit,conf.int=F,xaxs='r', xlab='Time to death',ylab='Survival') ## xaxs parameter makes the x-axis start before 0 ## b) median(follic$survtime[follic$stat==0]) ## the survival can be reported at 10 years ## c) summary(fit,times=10) ## d) sfit=summary(fit) sfit1=cbind(sfit$time,sfit$surv) sfit1[sfit1[,2]>0.48 & sfit1[,2]<0.51,] ## median survival is 15.3 years ## e) fit=survreg(Surv(survtime,stat)~1,data=follic) summary(fit) ## default is the Weibull distribution ## there is evidence that the scale is not 1 (p value=0.0175) ## 2 a) table(follic$ch) fit=survfit(Surv(survtime,stat)~ch,data=follic) plot(fit,lty=c(1,2),xaxs='r', xlab='Time to death',ylab='Survival') legend(0,0.2,lty=c(1,2),bty='n', legend=c('No chemo','Chemo')) table(follic$clinstg) fit=survfit(Surv(survtime,stat)~clinstg,data=follic) plot(fit,lty=c(1,2),xaxs='r', xlab='Time to death',ylab='Survival') legend(0,0.2,lty=c(1,2),bty='n', legend=c('Stage I','Stage II')) table(follic$blktxcat) fit=survfit(Surv(survtime,stat)~blktxcat,data=follic) plot(fit,lty=c(1,2),xaxs='r', xlab='Time to death',ylab='Survival') legend(0,0.2,lty=c(1,2),bty='n', legend=c('No bulk','Bulk')) ## b) fit=survdiff(Surv(survtime,stat)~ch,data=follic) fit fit=survdiff(Surv(survtime,stat)~clinstg,data=follic) fit fit=survdiff(Surv(survtime,stat)~blktxcat,data=follic) fit ## c) fit=coxph(Surv(survtime,stat)~ch,data=follic) fit fit=coxph(Surv(survtime,stat)~clinstg,data=follic) fit fit=coxph(Surv(survtime,stat)~blktxcat,data=follic) fit ## 3 a) and b) fit=coxph(Surv(survtime,stat)~ch,data=follic) fit fit=coxph(Surv(survtime,stat)~ch+clinstg,data=follic) fit fit=coxph(Surv(survtime,stat)~ch+clinstg+blktxcat,data=follic) fit ## c) ## note how the coefficient for chemo gets further from zero as we add more covariates ## this is because chemo was probably given to patients with more severe disease ## like stage II or bulk present ## thus when not controlling for the severity of disease ## the effect of chemo is underestimated ## 4 a) and b) fit=survreg(Surv(survtime,stat)~clinstg,data=follic,subset=(ch=='Y')) summary(fit) ## the exponential seems appropriate ## c) fit2=coxph(Surv(survtime,stat)~clinstg,data=follic,subset=(ch=='Y')) summary(fit2) fit1=survreg(Surv(survtime,stat)~clinstg,data=follic, dist='exponential',subset=(ch=='Y')) summary(fit1) ## the hazard ratio from the parametric fit exp(-fit1$coef[2]) ## the hazard ratio from the Cox PH model is exp(fit2$coef) ## they are very close because the exponential distribution fulfills ## the assumption of the proportionality of hazards ## d) ## the p-value from Cox PH is slightly larger than from exponential. ## when the parametric model is appropriate usually it is more efficient than a semi (or non) parametric model ## 5 ## create the fake survival time survtime2=follic$survtime*(follic$stat==1)+2*(follic$stat==0) ## a) and b) fit=survfit(Surv(survtime,stat)~1,data=follic) fit2=survfit(Surv(survtime2,stat)~1,data=follic) plot(fit,lty=1,xaxs='r',mark.time=F,conf.int=F, xlab='Time to death',ylab='Survival') lines(fit2,lty=2) legend(0,0.2,lty=c(1,2),bty='n', legend=c('Correct survival time','The fake survival time')) ## d)when the follow id performed preferentially then the survival estimates are biased ## c) fit=coxph(Surv(survtime,stat)~clinstg,data=follic) summary(fit) fit2=coxph(Surv(survtime2,stat)~clinstg,data=follic) summary(fit2) ## d) Not only the estimates of survival are biased but also ## the estimates for the effect of covariates ## 6 a) ## the failure is given by rcens and the time to failure by dftime. ## creating the censor variable cens1=(follic$rcens==1)+2*(follic$stat==1 & follic$rcens==0) ## b) fitcr=cuminc(follic$dftime,cens1) timepoints(fitcr,times=c(3,5,10)) ## c) fit=survfit(Surv(dftime,rcens)~1,data=follic) summary(fit,times=c(3,5,10)) ## the difference between 1-KM and the estimates from cuminc (1-summary(fit,times=c(3,5,10))$surv)-timepoints(fitcr,times=c(3,5,10))$est[1,] ## d) ## the estimates of 1-km are always larger than the ones where competing risks were considered ## e) cens2=(follic$mcens==1)+2*(follic$stat==1 & follic$mcens==0) fitcr=cuminc(follic$maltime,cens2) timepoints(fitcr,times=c(3,5,10)) fit=survfit(Surv(maltime,mcens)~1,data=follic) summary(fit,times=c(3,5,10)) ## the difference between 1-KM and the estimates from cuminc (1-summary(fit,times=c(3,5,10))$surv)-timepoints(fitcr,times=c(3,5,10))$est[1,] ## f) ## The difference depends on how mauch competing risks there is ## ## Sample size calculation ## 1) fit=survreg(Surv(survtime,stat)~1,data=follic,subset=(ch=='Y')) summary(fit) l0=exp(-fit$coef) ## survival at 5 years for the standard arm s0=exp(-l0*5) ## survival at 5 years for the experimental arm s1=s0+0.1 ## the hazard in the experimental arm l1=-log(s1)/5 ## the hazard for the experimental arm hr=log(s0)/log(s1) ## or hr=l0/l1 ## the hazard overall l=-log(mean(s0,s1))/5 ## number of patients accrued per year 50*0.6 ## 60 % of 50 ## 2) f=function(a,f,ny,hr,l,alpha) { N=ny*a pev=1-(exp(-l*f)-exp(-l*(a+f)))/(l*a) zalpha==qnorm(1-alpha/2) zbeta=sqrt(pev*N)*log(hr)/2-zalpha power=pnorm(zbeta) return(power) } ## 2) f(a=5,f=2,ny=30,hr=2.2,l=0.03,alpha=0.05) ## 3) f(a=5,f=2,ny=60,hr=2.2,l=0.03,alpha=0.05) ## 4) f(a=10,f=2,ny=30,hr=2.2,l=0.03,alpha=0.05) ## 5) The power is larger when one institution accrues for 10 years than when 2 institutions accrue each for 5 years. Although the number of patients is the same the number of events is different. There is more follow-up when the accrual is longer. However, a study planned for 10 years is less likely to be finished. A new therapy may appear and the one tested may become obsolete before having a chance to be tested. lung=read.table("C:/office/ProfDev/AnnieRobert/SurvEx/lung.csv", sep=',',header=T,na.strings='.') library(survival) par(las=1) ## ex 1 a) fit=survfit(Surv(survtime,stat)~1,data=lung,conf.type='log-log') plot(fit,conf.int=F,xlab='Time to death',ylab='Survival') ## b) summary(lung$survtime[lung$stat==0]) ## c) summary(fit,times=c(3,4,5)) ## d) abline(h=0.5,lty=2) ## the median does not exist ## e) Weibull is the default fit=survreg(Surv(survtime,stat)~1,data=lung) summary(fit) fit=survreg(Surv(survtime,stat)~1,data=lung,dist='exponential') summary(fit) ## ex 2 a) table(lung$hist) fit=survfit(Surv(survtime,stat)~hist,data=lung) plot(fit,lty=c(1,2),xlab='Time to death',ylab='Survival') legend(0,0.2,lty=c(1,2),bty='n',legend=c('AD','SQ')) table(lung$smk) fit=survfit(Surv(survtime,stat)~smk,data=lung) plot(fit,lty=c(1,2),xlab='Time to death',ylab='Survival') legend(0,0.2,lty=c(1,2),bty='n',legend=c('No smokers','Smokers')) table(lung$stage) fit=survfit(Surv(survtime,stat)~stage,data=lung) plot(fit,lty=c(1,2),xlab='Time to death',ylab='Survival') legend(0,0.2,lty=c(1,2),bty='n',legend=paste('Stage',c('I','II'))) ## b) fit=coxph(Surv(survtime,stat)~hist+smk+stage,data=lung) sres=cox.zph(fit) pv=signif(sres$table[,3],2) plot(sres,var=1, main="Lung\nHistology",sub=paste('p-value',pv[1])) abline(h=coef(fit)[1],lty=2,col='blue',) plot(sres,var=2, main="Lung\nSmoking",sub=paste('p-value',pv[2])) abline(h=coef(fit)[2],lty=2,col='blue',) plot(sres,var=3, main="Lung\nStage",sub=paste('p-value',pv[3])) abline(h=coef(fit)[3],lty=2,col='blue',) ## c) in SAS only *******SAS only proc phreg data=lung; model survtime*stat(0)=smkn tsmkn t2smkn; tsmkn=survtime*smkn; t2smkn=(survtime**2)*smkn; run; ********** ## d) table(lung$smk) fit=survfit(Surv(survtime,stat)~smk,data=lung) plot(fit,lty=c(1,2),xlab='Time to death',ylab='Survival') legend(0,0.2,lty=c(1,2),bty='n',legend=c('No smokers','Smokers')) fit=coxph(Surv(survtime,stat)~smk,data=lung) pfit=survfit(fit,newdata=data.frame(survtime=c(20,100),stat=c(0,0),smk=c('N','Y'))) lines(pfit,col='red') ## e) fit=coxph(Surv(survtime,stat)~hist,data=lung) summary(fit) fit=coxph(Surv(survtime,stat)~smk,data=lung) summary(fit) fit=coxph(Surv(survtime,stat)~stage,data=lung) summary(fit) fit=coxph(Surv(survtime,stat)~hist+smk,data=lung) summary(fit) fit=coxph(Surv(survtime,stat)~smk+stage,data=lung) summary(fit) fit=coxph(Surv(survtime,stat)~hist+stage,data=lung) summary(fit) fit=coxph(Surv(survtime,stat)~hist+smk+stage,data=lung) summary(fit) ## 3 a) fit=coxph(Surv(survtime,stat)~1,data=lung) plot(lung$m1,resid(fit),xlab='m1',ylab='O-E') lines(lowess(lung$m1,resid(fit),iter=0)) plot(lung$m2,resid(fit),xlab='m2',ylab='O-E') lines(lowess(lung$m2,resid(fit),iter=0)) plot(lung$m4,resid(fit),xlab='m4',ylab='O-E') lines(lowess(lung$m4,resid(fit),iter=0)) ## b) fit=coxph(Surv(survtime,stat)~m1,data=lung) sres=cox.zph(fit) pv=signif(sres$table[3],2) plot(sres, main="Lung, m1",sub=paste('p-value',pv)) abline(h=coef(fit),lty=2,col='blue',) fit=coxph(Surv(survtime,stat)~m2,data=lung) sres=cox.zph(fit) pv=signif(sres$table[3],2) plot(sres, main="Lung, m2",sub=paste('p-value',pv)) abline(h=coef(fit),lty=2,col='blue',) fit=coxph(Surv(survtime,stat)~m4,data=lung) sres=cox.zph(fit) pv=signif(sres$table[3],2) plot(sres, main="Lung, m4",sub=paste('p-value',pv)) abline(h=coef(fit),lty=2,col='blue',) ## c) fit=coxph(Surv(survtime,stat)~m2,data=lung) summary(fit) extractAIC(fit) fit=coxph(Surv(survtime,stat)~m2+I(m2^2),data=lung) summary(fit) extractAIC(fit) m4mod=(-1)*(lung$m4<(-1))+lung$m4*(lung$m4>=(-1)) fit=coxph(Surv(survtime,stat)~m4,data=lung) summary(fit) extractAIC(fit) fit=coxph(Surv(survtime,stat)~m4mod,data=lung) summary(fit) extractAIC(fit) ## d) fit=coxph(Surv(survtime,stat)~1,data=lung) plot(lung$m2,resid(fit),xlab='m2',ylab='O-E') lines(lowess(lung$m2,resid(fit),iter=0)) fit=coxph(Surv(survtime,stat)~m2+I(m2^2),data=lung) ## the cut-off point need to be appx symetrtical to the point calculated in e) ## also one needs to have enough of the sample in each of the categories ## this should not be regarded as 'testing the quadratic effect', ## but rather as illustrating the quadratic effect found earlier m2cut=cut(lung$m2,c(-6,-0.7,0.1,6)) table(m2cut,lung$stat) fit=survfit(Surv(survtime,stat)~m2cut,data=lung) plot(fit,lty=c(1,2,3),mark.time=F,xaxs='r', xlab='Time to death',ylab='Survival', main='Illustration of the quadratic effect of m2') legend(0,0.2,lty=c(1,2,3),bty='n', legend=paste('m2 in ',levels(m2cut))) ## e) -fit$coef[1]/(2*fit$coef[2]) ## 4) fit=coxph(Surv(survtime,stat)~hist+stage+smk+m1,data=lung) summary(fit) fit=coxph(Surv(survtime,stat)~hist+stage+smk+m2,data=lung) summary(fit) fit=coxph(Surv(survtime,stat)~hist+stage+smk+m2+I(m2^2),data=lung) summary(fit) fit=coxph(Surv(survtime,stat)~hist+stage+smk+m4,data=lung) summary(fit) ## B Power and sample size alpha=0.05 beta=0.2 zalpha=qnorm(1-alpha/2) zbeta=qnorm(1-beta) ## 1 a) ## calculate power for the seen effect size, for the given number of events nev=50 m=median(lung$m1) fit=coxph(Surv(survtime,stat)~(m1>m),data=lung) zbetacalc=sqrt(nev)*abs(fit$coef)/2-zalpha power=pnorm(zbetacalc) ## note abs(fit$coef)=log(exp(fit$coef)) ## to use PASS ## calculate the effect size whihc is possible to be detected coef=(zalpha+zbeta)*2/sqrt(nev) ## the coefficient is slightly larger than waht was observed in the original dataset ## b) m1 as continuous ## calculate the power nev=50 s=sqrt(var(lung$m1)) fit=coxph(Surv(survtime,stat)~m1,data=lung) zbetacalc=sqrt(nev)*abs(fit$coef)*s-zalpha power=pnorm(zbetacalc) ## to use PASS ## calculate the effect size which is possible to be detected coef=(zalpha+zbeta)/(sqrt(nev)*s) ## the coefficient is larger than what was observed in the original dataset ## the recommendation is to try to increase the sample size ## 2 a) a=5 ## accrual time f=2 ## follow-up time nperyear=50 f=function(alpha=0.05,beta=0.2,a,f,nperyer) { zalpha=qnorm(1-alpha/2) zbeta=qnorm(1-beta) N=nperyear*a lowrisk=round((N*2/3),0) highrisk=round((N*1/3),0) N=lowrisk+highrisk ## approximate the hazard rate for the whole group fit=survreg(Surv(survtime,stat)~1,data=lung,dist='exponential') lambda=exp(-fit$coef) pev=1-(exp(-lambda*f)-exp(-lambda*(a+f)))/(lambda*a) nev=pev*N s=sqrt(1/3*2/3) coef=(zalpha+zbeta)/(s*sqrt(nev)) hr=exp(coef) result=data.frame(nev,pev,N,hr) return(result) } f(alpha=0.05,beta=0.2,a=5,f=2,nperyer=50) f(alpha=0.05,beta=0.2,a=5,f=3,nperyer=50) f(alpha=0.05,beta=0.2,a=6,f=2,nperyer=50) f(alpha=0.05,beta=0.2,a=5,f=3,nperyer=50) ## in PASS input: pev, coef (=log hazard ratio), s and N ## b) ## use the coefficients obtained in the model Cox PH model f=function(alpha=0.05, beta=0.2, a, f, coef1, coef2, s, nperyear) { N=a*nperyear x=rnorm(N,mean=0,sd=s) h=exp(coef1*x+coef2*x^2) timeinit=rexp(N,h) timecens=runif(N,f, (a+f)) time=apply(cbind(timeinit,timecens),1,min) stat=(time==timeinit)+0 fit=coxph(Surv(time,stat)~x+I(x^2)) sfit=summary(fit) c1=sfit$coef[1,1] c2=sfit$coef[2,1] p1=sfit$coef[1,5] p2=sfit$coef[2,5] result=data.frame(c1,c2,p1,p2) return(result) } result1=data.frame(c1=0,c2=0,p1=0,p2=0) nsim=2000 set.seed(3) for (i in 1:nsim) { result1[i,]=f(alpha=0.05, beta=0.2, a=5, f=2, coef1=0.044, coef2=0.065, s=1.5, nperyear=50) } summary(result1) power=sum(result1$p2<=0.05)/nsim power ### result2=data.frame(c1=0,c2=0,p1=0,p2=0) nsim=2000 set.seed(3) for (i in 1:nsim) { result2[i,]=f(alpha=0.05, beta=0.2, a=6, f=2, coef1=0.044, coef2=0.065, s=1.5, nperyear=50) } summary(result2) power=sum(result2$p2<=0.05)/nsim power ## the power for a=5 and f=2 (N=250) ## is larger when the covariate is continuous than when is categorical