/**************************************************************************/
/********** Algorithms for actual and actuarial Analysis ************/
/**************************************************************************/
/********************************************************************************/
/* Usage */
/* */
/* %Greenwod(inset =, outset =, timevar =, eventvar= , lastevnt=) */
/* A value must be given for each of the five macro parameters */
/********************************************************************************/
/***********************************************************************************************************************/
/* */
/* Computes actual incidence of events, together with standard errors from the matrix form of Greenwood's formula. */
/* */
/* Coded into SAS by Bill Anderson, September 1999 */
/* The code uses macro assignments instead of array multplication -- perhaps the latter would be cleaner */
/***********************************************************************************************************************/
/***********************************************************************************************************************/
/* Input data set: SAS dataset or view Unchanged by the algorithm */
/* */
/* Output data set: New SAS data set, whose name is a macro parameter */
/* */
/* Other interactions: Creates data sets temp and tempset. These are not needed at end of execution of the macro. */
/* */
/* Input data set variables used */
/* */
/* The variable names are passed as parameters to the macro; the values of the variables are as further stated */
/* below. */
/* */
/* &timevar The numeric variable containing outcome times. There will be exactly one outcome for each time; */
/* multiple outcomes are handled by multiple records, with the same value of the outcome time. */
/* */
/* &eventvar the numeric variable containing event info. Values: */
/* 1 --- &lastevnt transitions to a new state */
/* others censored, for this use of the macro */
/* THE CODE ASSUMES THAT EVENTVAR CONTAINS INTEGERS ONLY, AND DOES NOT CHECK FOR THIS CONDITION. IF */
/* THE CONDITION IS VIOLATED, THE RESULTS ARE UNDEFINED. */
/* */
/* &lastevnt The highest event value that will be analyzed. This must be an integer >= 1. The SAS statements in */
/* the code will in principle allow values of &lastevnt <= 99, but the number of variables will become */
/* so large as to cause poor execution long before that time. (Interpretation of large values of */
/* &lastevnt is questionable, so this limitation is one of form, rather than substance.) */
/* */
/* In a typical use of the macro, event 1 will be an event of primary interest, such as structural */
/* valve failure, event 2 will be death, and other values will represent different forms of censoring. */
/* Using the macro with &lastevnt = 1 will produce the ordinary Kaplan-Meier analysis with death being */
/* just one form of censoring; using the macro with &lastevnt = 2 will produce the actual incidence of */
/* the event of interest, as well as the actual incidence of death. */
/* */
/* Output Variable names are fixed in the macro, and some labels are supplied. */
/* */
/* &timevar The same name as in the input data set. There is one output record for each distinct value in the */
/* input data set, as long as there is at least one event occurring at that time; other times are ignored.*/
/* */
/* atrisk The number of patients at risk at the time */
/* */
/* event1 - event&lastevnt */
/* The number of events of that type at that time. */
/* */
/* censored The number of censored observations. */
/* */
/* */
/* incid0 The estimated probability of remaining in state 0. This is the ordinary Kaplan-Meier estimate of */
/* freedom from all events. */
/* */
/* incid1 - incid&lastevnt */
/* The incidence of that event, up to and including the time; freedom from event is 1 - incidence. */
/* */
/* error0 - error&lastevnt */
/* The standard errors of each incidence estimate. These are the square roots of the corresponding */
/* entries of the variance matrix; the user may capture the entire variance matrix for other use. */
/* */
/***********************************************************************************************************************/
%macro Greenwod(inset =, outset =, timevar =, eventvar =, lastevnt =);
data tempset (keep = &timevar &eventvar); set &inset;
/* Kill undefined times, and reclassify events */
if &timevar >= 0;
if &eventvar < 1 | &eventvar > &lastevnt then &eventvar = 0; /* Censor these events */
run;
/* Count total at risk */
/* Add dummy observations to ensure that all designated events are reflected by variables in the output data set */
/* These will have missing time, and will be killed in a couple of steps */
data tempset; set tempset nobs = numpats end = last;
output;
if last then do; &timevar = .; do &eventvar = 1 to &lastevnt; output; end; end;
call symput('numpats', put(numpats, 10.));
run;
proc sort data = tempset;
by &timevar;
run;
proc freq noprint data = tempset;
by &timevar;
tables &eventvar/out = temp;
run;
proc transpose data = temp out = &outset;
by &timevar;
id &eventvar;
var count;
run;
data &outset; set &outset; if &timevar ^= .; run; /* kill the dummy observations */
/* Now add an at risk column and put missing counts to zero */
/* Also make the variable names as promised */
data &outset; set &outset;
rename _0 = censored;
%do event = 1 %to &lastevnt; rename _&event = event&event; %end;
attrib atrisk label = 'Patients at Risk';
retain atrisk newrisk &numpats;
drop newrisk;
atrisk = newrisk;
if _0 = . then _0 = 0; newrisk = atrisk - _0;
%do event = 1 %to &lastevnt;
if _&event = . then _&event = 0;
newrisk = newrisk - _&event;
%end;
run;
/* Now do the actual computations */
data &outset; set &outset;
/* Some labels for variables coming in */
attrib
censored label = 'Censored Observations'
%do event = 1 %to &lastevnt; event&event label = "Incidents of Event &event" %end;
;
/* Components of present incidence vector */
/* (In the general model, this is the first row of the transition matrix) */
/* Only the first row is variable */
attrib
pk0_0 label = 'Freedom from all events'
%do event = 1 %to &lastevnt;
pk0_&event label = "Incidence of event &event"
%end;;
/* Components of previous incidence vector */
attrib
pkb0_0 label = 'Previous freedom from all events'
%do event = 1 %to &lastevnt;
pkb0_&event label = "Previous incidence of event &event"
%end;;
/* Components of present covariance matrix */
attrib
%do row = 0 %to &lastevnt; %do col = 0 %to &lastevnt;
ck&row._&col label = "Variance component (&row, &col)"
%end; %end;;
/* Components of previous covariance matrix */
attrib
%do row = 0 %to &lastevnt; %do col = 0 %to &lastevnt;
ckb&row._&col label = "Previous variance component (&row, &col)"
%end; %end;;
/* Standard Errors */
attrib error0 label = 'Standard Error of Freedom from All Events'
%do event = 1 %to &lastevnt; error&event label = "Standard Error of Incidence of Event &event" %end;;
/* Components of variance addition due to new terms */
/* These are from the multinomial distribution */
attrib
%do row = 0 %to &lastevnt; %do col = 0 %to &lastevnt;
va&row._&col label = "Additional variance component (&row, &col)"
%end; %end;;
/* Components of differential transition Matrix */
/* All but the first row are constant, but are made explicit for clarity of code */
attrib
%do row = 0 %to &lastevnt; %do col = 0 %to &lastevnt;
da&row._&col label = "Differential transition (&row, &col)"
%end; %end;;
/* retain variables needed for the recursion */
/* also initialize, for use the first time around */
retain
pkb0_0 1 %do event = 1 %to &lastevnt; pkb0_&event 0 %end;
%do row = 0 %to &lastevnt; %do col = 0 %to &lastevnt;
ckb&row._&col 0
%end; %end;;
/* Now to the general case */
/* Compute terms needed for the update */
/* Differential transition matrix */
da0_0 = 1;
%do event = 1 %to &lastevnt;
da0_&event = event&event/atrisk;
da0_0 = da0_0 - da0_&event;
%end;
/* The rest of this section is logically redundant, and kept in to clarify the matrix multiplication */
%do row = 1 %to &lastevnt; %do col = 0 %to &lastevnt;
%if &row = &col %then %do; da&row._&col = 1; %end;
%else %do; da&row._&col = 0; %end;
%end; %end;
/* Additional variance component */
/* The matrix is symmetric; for clarity all terms are explicitly created */
%do row = 0 %to &lastevnt; %do col = 0 %to &lastevnt;
%if &row = &col %then %do; va&row._&col = da0_&row*(1 - da0_&row)/atrisk; %end;
%else %do; va&row._&col = -da0_&row*da0_&col/atrisk; %end;
%end; %end;
/* Updating the incidence vector is one matrix multiplication */
%do col = 0 %to &lastevnt;
pk0_&col = 0;
%do event = 0 %to &lastevnt;
pk0_&col = pk0_&col + pkb0_&event*da&event._&col;
%end; %end;
/* Updating the Variance matrix is more complicated */
/* There is the sum of two products of three matrices each */
/* The first part of the first product has the temporary name t1 */
%do row = 0 %to &lastevnt; %do col = 0 %to &lastevnt;
t1&row._&col = 0;
%do event = 0 %to &lastevnt;
t1&row._&col = t1&row._&col + da&event._&row*ckb&event._&col;
%end; %end; %end;
/* Finishing the first product */
%do row = 0 %to &lastevnt; %do col = 0 %to &lastevnt;
ck&row._&col = 0;
%do event = 0 %to &lastevnt;
ck&row._&col = ck&row._&col + t1&row._&event*da&event._&col;
%end; %end; %end;
/* Now add in the second term */
%do row = 0 %to &lastevnt; %do col = 0 %to &lastevnt;
ck&row._&col = ck&row._&col + va&row._&col*pkb0_0**2;
%end; %end;
/* Finally fix up the terms to be retained for next time around */
%do event = 0 %to &lastevnt; pkb0_&event = pk0_&event; %end;
%do row = 0 %to &lastevnt; %do col = 0 %to &lastevnt;
ckb&row._&col = ck&row._&col;
%end; %end;
/* And the output standard errors */
%do event = 0 %to &lastevnt; error&event = sqrt(ck&event._&event); %end;
run;
data &outset /* keep only those variables likely to be of interest */
(keep = &timevar atrisk censored
%do event = 1 %to &lastevnt; event&event %end;
%do event = 0 %to &lastevnt; pk0_&event %end;
%do event = 0 %to &lastevnt; error&event %end;);
set &outset;
run;
/***********************************************************************************************************************/
/* */
/* Based on the Markov Chain approach, as presented in the textbook of Andersen, Borgan, Gill, and Keiding. */
/* */
/* The survival process is modeled as an n state Markov chain, where n = &lastevnt + 1. The initial state is state 0, */
/* and states 1, ..., &lastevnt are absorbing states. The first row of the transition matrix consists of the */
/* incidence functionss for the various states, with the (0,0) element being the freedom from all events. The */
/* remaining rows of the transition matrix are structurally constant in this application. */
/* */
/* The variance matrix would be n^2 x n^2, and the variance is given iteratively by formula 4.4.19 of Andersen, et al. */
/* However, since all but the last row is structurally constant, the variance reduces to n x n, and the variance is */
/* */
/* More specifically, let P_k be the transition matrix from the start to event time k, C_k its variance matrix, and */
/* VA_k the matrix of variances of the new terms. Then Greenwood's formula is */
/* C_{k+1} = P_{k+1}^{transpose}*C_k*P_{k+1} + VA_{k+1}*S_k^2, */
/* where S_k is the overall freedom from all events, and * represents matrix product. */
/* */
/* This recursion is 4.4.19 of Andersen, et al. It is an easy algebraic identity to see that this recursion gives */
/* the familiar Greenwood formula in the scalar case; it also agrees with the formula of Gaynor et al. in the actual */
/* survival (competing risks) case. */
/* */
/* The SAS macro code defines all elements of the transition matrix, and ignores the symmetry of the variance. This */
/* Is a tradeoff between execution speed and code clarity. */
/* */
/* References: */
/* */
/* Andersen, PK, Borgan, O, Gill, R., and Keiding, N, Statistical Models based on Counting Processes, Springer, 1993 */
/* */
/* Gaynor, JJ, et al., On the use of Cause-Specific Failure and Conditional Failure Probabilities: Examples for */
/* Clinical Oncology, JASA, vol 88, 1993, pages 400 - 409. */
/***********************************************************************************************************************/
%mend;
/* Sample Driver */
/*
options mprint;
data myset1;
infile '[projtg.lfeinstein.amlmini]trm_age55.ci';
input grp $ day event compete;
* if (grp='2');
data myset;
set myset1;
if (event=1) then evnt=1;
if (compete=1) then evnt=2;
if (event=0 and compete=0) then evnt=0;
etime=day;
%Greenwod(inset = myset, outset = actset, timevar = etime, eventvar = evnt, lastevnt = 2)
data out;
set actset;
ci=pk0_1;
se=error1;
lcl=ci-1.96*se;
ucl=ci+1.96*se;
options ls=79;
proc print data=out;
var etime ci se lcl ucl;
*/