Manuscript title: Application of advanced statistics in ophthalmology

Authors: Qiao Fan, Yik-Ying Teo, Seang-Mei Saw

Supplementary materials

#####################

## R codes (Table 3)

######################

## Longitudinal data analysis using different approaches: GEE, mixed-effects model and linear regression for reduced dataset.

## Read original data into R scorm <- read.table(‘scorm_example.txt’, header=T)

## change the data structure from wide format to long format

## school:1 or 2; age: 0= 7 years old, 1=8 years old, 2=9 years old. Gender: 1=male; 2=female; race: 0=non-Chinese; 1=Chinese; rse1 – rse4 = Sphere Equivalent (SE) measured at year 1 to year 4, right eye; lse1 – lse4 = SE measured at year 1 to year 4, right eye; se1 – se4 = SE measured at year 1 to year 4, both eye average; bookwk – books read per week: 0= 2 or less; 1= more than 2; IID: Individual IID. dat1 <- subset(scorm,select=c(IID,school,age,gender,race,rse1,rse2,rse3, rse4, bookwk) ) long1 <- reshape(dat1,idvar='IID', ids=IID, times = c(0,1,2,3), timevar='time', v.names='se', varying=list(names(dat1) [6:9]), direction="long")

### coding eye as dummy variable: 1= right, 2=left; long1$eye <- 1 dat2 <- subset(scorm,select=c(IID,school, age, gender, race, lse1,lse2,lse3,lse4,bookwk) )

### follow-up time is coded as 0=year1, 1=year 2, 3=year3, 4=year4 long2 <- reshape(dat2,idvar='IID', ids=IID, times = c(0,1,2,3), timevar='time', v.names='se', varying=list(names(dat2) [6:9]), direction="long") long2$eye <- 2 datf2 <- rbind(long1,long2) longft <- datf2[order(datf2$IID,datf2$eye),]

### create interaction of the school and follow-up time longft$school_time <- longft$school * longft$time

### mixed-effects model, full data analysis on repeated measures, adjusting for inter-eye correlation (Model 1)

### initiate nlme package; to install, use command in R : install.packages(‘nlme’)

1 library(nlme)

## random intercept at subject and eye levels. fit1 <- summary(lme(se ~ school + factor(time) + factor(age) + gender + race + bookwk + school_time , random= ~1 |IID/eye, data=longft ))

## print the model fitting results fit1

### GEE model, full data analysis on repeated measures, adjusting for inter-eye correlation(Model 2) library(geepack)

### Specify working correlation structure as ‘exchangeable’ in this example. Empirical correlation structure estimation form GEE is actually quite robust to pre-specified working correlation. Using ‘ar1’ (autoregressive in time series data) yielded similar results. fit2 <- summary(geeglm(se ~ school + factor(time) + factor(age) + gender + race + bookwk + school_time + factor(eye), id=IID, data=longft, family=Gaussian, constr=’exchangable’) ) fit2

### mixed-effects model, full data analysis on repeated measures, ignoring inter-eye correlation (Model 3)

### IIDcluster number is created for model 3 and model 4 specifically. Different eye in the same individual is in different IIDcluster. We treat the two eyes in the same individuals as independent observations. fit3 <- summary(lme(se ~ school + factor(time) + factor(age) + gender + race + bookwk + school_time , random= ~1 |IIDcluster, data=longft )) fit3

### GEE model, full data analysis on repeated measures, ignoring inter-eye correlation (Model 4) fit4 <- summary(geeglm(se ~ school + factor(time) + factor(age) + gender + race + bookwk + school_time , id=IIDcluster, data=longft, family=Gaussian, constr=’exchangable’) ) fit4

#############################################################

### modeling the average refractive error of paired eyes longitudinally

## create the data for average SE analysis. dat1 <- subset(scorm,select=c(IID,school,age,gender,race,se1,se2,se3, se4, bookwk) ) datf2 <- reshape(dat1,idvar='IID', ids=IID,times = c(0,1,2,3),timevar='time', v.names='avgse', varying=list(names(dat1)[6:9]), direction="long") longf2 <- datf2[order(datf2$IID,datf2$time),] longft2$school_time <- longft2$school * longft2$time

2 ### mixed-effects model, reduced data analysis on repeated measures for average SE (Model 5) fit5 <- summary(lme(avgse ~ school + factor(time) + factor(age) + gender + race + bookwk + school_time , random= ~1 |IID, data=longft2 )) fit5

### GEE model, reduced data analysis on repeated measures for average SE (Model 6) fit6 <- summary(geeglm(avgse ~ school + factor(time) + factor(age) + gender + race + bookwk + school_time, id=IID, data=longft2, family=Gaussian, constr=’exchangable’) ) fit6

## linear regression model using data at the last visit (year 4) and right eye only (Model 7).

Fit7 <- summary(lm( rse4 ~ school + factor(age) + gender + race + bookwk, data=scorm ))

Fit7

## Long format data structure used in the analysis for GEE and mixed-effect analysis (Model 1 and Model 2).

IID school age gender race bookwk time se eye school_time

4 1 0 2 1 1 0 0.975 1 0

4 1 0 2 1 1 1 0.7 1 1

4 1 0 2 1 1 2 0.4 1 2

4 1 0 2 1 1 3 0.2 1 3

4 1 0 2 1 1 0 0.775 2 0

4 1 0 2 1 1 1 0.375 2 1

4 1 0 2 1 1 2 0.725 2 2

4 1 0 2 1 1 3 0.6 2 3

6 1 0 1 0 1 0 -0.05 1 0

6 1 0 1 0 1 1 0.21875 1 1

6 1 0 1 0 1 2 NA 1 2

6 1 0 1 0 1 3 0.075 1 3

6 1 0 1 0 1 0 0.125 2 0

6 1 0 1 0 1 1 0.225 2 1

6 1 0 1 0 1 2 NA 2 2

6 1 0 1 0 1 3 -0.3 2 3

3 ……

NA – missing data

4