################################################################################# # # MEAN CUMULATIVE COUNT # # Last updated February 26, 2015. ################################################################################# # R code to accompany: # # Dong H, Robison LL, Leisenring WM, Martin LJ, Armstrong GT, Yasui Y. (in press). # Estimating the burden of recurrent events in the presence of competing risks: # The method of mean cumulative count. American Journal of Epidemiology. # # NOTE: # Our purpose of providing a simple hypothetical example and the computation code is # that it would serve as a useful tutorial for researchers who want to learn how to # apply the method of Mean Cumulative Count. Should you have any question, please conctact # Huiru Dong by email: huiru@ualberta.ca. # # R version 3.1.1 (2014-07-10) # Platform: x86_64-w64-mingw32/x64 (64-bit) ############ ------------- PARAMETERS -------------- ######### ############----- id: Participants's ID ############----- time: Follow up time of participant to interest/competing-risk event/censoring ############----- cause: Event of interest is coded as 1; competing-risk event as 2; censoring as 0 ############----- type: Character string specifying the type of calcultion. # Possible values are "MCC" (MCC calculation by equation) and "SCI" (sum of cumulative incidence). ################################################################################# ######### Run this only once to install the package ######### #install.packages("reshape") #install.packages("etm") library(reshape) library(splines) library(survival) library(etm) ################### This is the main function to run ################### MCC=function(id,time,cause,type) { if(type=="MCC"){ outdata=MCC_eq(id,time,cause) } if(type=="SCI"){ outdata=SCI(id,time,cause) } return(outdata) } ################### Calculate the Sum of Cumulative Incidence ########## SCI=function(id,time,cause) { indata=data.frame(id,time,cause) ii=order(indata$id,indata$time,-indata$cause) ##if time is the same for 0/1 or 0/2 of the same person, make 0 last sort_data=indata[ii,] sort_id=sort_data$id #The time interval we are interested# freq=1 time.interval=seq(min(time),max(time),freq) #Calculate the total number of participants: N# Nodup_id=unique(id) N=length(unique(id)) ######## Huiru's code has loop for each person and loop for each id, which was very slow (I actually cannot ### have it work when there in my CCSS --too much time) I will revise it to avoid looping pfr person 1: no. of person sort_data$first <- ave(sort_data$time, sort_data$id, FUN = seq_along) M=max(sort_data$first) #maximum numer of events ### Take the last row so we know the maximum number of events per person event.number=do.call(rbind, lapply(split(sort_data, sort_data$id), tail, 1))[,c("id","first")] colnames(event.number)=c("id","maxE") alldata=merge(sort_data,event.number,by.x="id",by.y="id") #### make data for cumulative incidence, with dimension M*N data.MCC=NULL for(i in 1:M){ if(i==1){ data_temp=alldata[alldata$first==1,] data_temp$m_event=i data.MCC=rbind(data.MCC,data_temp) } if(i>1){ ## for those with i or more records, take the ith record the_ith=alldata[alldata$first==i,] ##for those with 0){ ## only if there are people whose last row is an event lastr$cause=0 sort_data=rbind(sort_data,lastr[,c("id","time","cause")]) ii=order(sort_data$id,sort_data$time,-sort_data$cause) ##if time is the same for 0/1 or 0/2 of the same person, make 0 last sort_data=sort_data[ii,] } ntotal=length(unique(id)) indata=sort_data time=sort_data$time cause=sort_data$cause id=sort_data$id count=rep(1,length(id)) freq_cause=aggregate(count~time+cause, data=indata,sum) lifetable_1 <- cast(freq_cause, time~cause,value="count",fill=0) colnames(lifetable_1)[colnames(lifetable_1)=="1"]="event" colnames(lifetable_1)[colnames(lifetable_1)=="0"]="censor" colnames(lifetable_1)[colnames(lifetable_1)=="2"]="cmprk" lifetable_1 ### need to consider the situation that there is no censor 0, or event 1, competing risk 2 in the data at all. cause_in=unique(freq_cause$cause) if(0 %in% cause_in==FALSE){ censor=rep(0,dim(lifetable_1)[1]) lifetable_1=data.frame(lifetable_1,censor) } if(1 %in% cause_in==FALSE){ event=rep(0,dim(lifetable_1)[1]) lifetable_1=data.frame(lifetable_1,event) } if(2 %in% cause_in==FALSE){ cmprk=rep(0,dim(lifetable_1)[1]) lifetable_1=data.frame(lifetable_1,cmprk) } ## n at risk at j = n at risk at j-1 -C-R, so get the running sum of C and R over time sum_censor=cumsum(lifetable_1[,"censor"]) sum_cmprk=cumsum(lifetable_1[,"cmprk"]) lifetable_2=cbind(lifetable_1,sum_censor,sum_cmprk) nrisk=ntotal-(sum_censor+sum_cmprk) nrisk_previous=c(ntotal,nrisk[1:(length(nrisk)-1)]) ## at the first time point, n at risk is the original number lifetable=data.frame(time=lifetable_1$time,nrisk=nrisk_previous,lifetable_1[,c("censor","event","cmprk")]) lifetable surv_prob=1-lifetable$cmprk/lifetable$nrisk overall_surv=cumprod(surv_prob) overall_surv_previous=c(1,overall_surv[1:(length(overall_surv)-1)]) ###KM(Tj-1) is used in the MCC equation. Ave_events=overall_surv_previous*lifetable$event/lifetable$nrisk MCC=cumsum(Ave_events) MCCtable=data.frame(lifetable,MCC) MCC.final=do.call(rbind, lapply(split(MCCtable, MCCtable$MCC), head, 1))[,c("time","MCC")] rownames(MCC.final)=NULL return(MCC.final) } #-------------------------------------------------------------------------------------------------# # ------------- Run the function with examples ------------# ### -----Simple example with 3 ids, and everyone had an event and then immediately censored Participant_ID=c(1,1,2,2,3,3) Exit_Time=c(8,8,9,9,10,10) Cause=c(1,0,1,0,1,0) simple2=data.frame(Participant_ID, Exit_Time, Cause) simple2 MCC(id=Participant_ID, time=Exit_Time,cause=Cause,type="MCC") ### In this example, MCC is larger than SCI at the end. MCC(id=Participant_ID, time=Exit_Time,cause=Cause,type="SCI") ### ---- Example data used in the Paper ---- ### Participant_ID=c(1,2,3,4,4,4,4,5,5) Exit_Time=c(15.2,1.3,7.4,3.7,9.6,11.5,15.2,4.5,5.3) Cause=c(0,0,2,1,1,1,0,1,2) MCC(id=Participant_ID, time=Exit_Time,cause=Cause,type="MCC") MCC(id=Participant_ID, time=Exit_Time,cause=Cause,type="SCI") # id=Participant_ID; time=Exit_Time; cause=Cause ### ---- Example data: multiple people had events at 4.2 ---- ### id=c(1,2,2,2,2,3,4,4,4,5,5,6,6,6,6,7,7,7,7) time=c(0.4,1.3,3,4.8,5.3,1.5,1,3,4.8,4.2,5.3,2.6,4.2,5.4,5.8,1.3,2,4.2,5.7) cause=c(0,1,1,1,0,2,1,1,2,1,0,1,1,1,1,1,1,1,2) MCC(id=id, time=time,cause=cause,type="MCC") MCC(id=id, time=time,cause=cause,type="SCI") ### CCSS SMN example########### data=read.csv("R:/Biostatistics/Biostatistics/Common/Qi/webpage/MCC/smndata.csv") mcc=MCC(id=data$ccssid, time=data$timesmn,cause=data$SMNcat,type="MCC") sci=MCC(id=data$ccssid, time=data$timesmn,cause=data$SMNcat,type="SCI") show=merge(mcc,sci,by.x="time",by.y="Time") show$diff=show$MCC-show$SumCIs summary(show$diff) # id=data$ccssid; time=data$timesmn; cause=data$SMNcat