Wednesday, June 18, 2025

Compare Different Methods for Estimating Binary Proportions from Repeated Measurement Data

## Data 1 (mild case, under-dispersion)
n = c(5,3,1,4,5,2,5,1,2,4)
d <- data.frame(subject = rep(1:10, times = n), 
                response = c(1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1))

## Data 2 (normal case, over-dispersion)
n <- c(3, 2, 4, 5, 2, 6, 4, 3, 5, 2)
d <- data.frame(subject = rep(1:10, times = n),
                 response = c(1, 1, 1,        # Subject 1
                              0, 0,           # Subject 2
                              1, 0, 1, 1,     # Subject 3
                              0, 0, 0, 0, 0,  # Subject 4
                              1, 1,           # Subject 5
                              1, 1, 1, 1, 1, 1, # Subject 6
                              0, 0, 1, 0,     # Subject 7
                              1, 1, 1,        # Subject 8
                              0, 1, 0, 1, 0,  # Subject 9
                              0, 0            # Subject 10
                              )
                )

N <- dim(d)[1]

## Naive: Wilson Score 
prop.test(sum(d$response), N)

## Naive: Clopper Pearson ("exact")
binom.test(sum(d$response), N)      

## Event-level: Cluster Bootstrap
stat.bootstrap.cluster(id = d$subject, val = d$response) 

## Rao-Scott Method
x = as.vector(by(d$response, d$subject, sum))
a <- preprocess.clustered.binary.data(x, n)
prop.test(a$x, a$n)

## Mixed effects model 
library(lme4) 
library(modelbased)
g <- glmer(response ~ 1 + (1|subject), data = d, family = binomial)
estimate_relation(g)

## GEE model
library(geepack)
f <- geeglm(response ~ 1, data = d, id = subject, family = binomial)
estimate_relation(f)


Monday, March 31, 2025

Example Analysis of Binary Data with Repeated Measurement Structure

d <- data.frame(subject = rep(1:10, times = c(5,3,1,4,5,2,5,1,2,4)), 
                response = c(1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1))
N <- dim(d)[1]

## Naive: Wilson Score 
prop.test(sum(d$response), N)

## Naive: Clopper Pearson ("exact")
binom.test(sum(d$response), N)          #  0.52632  (0.28864, 0.75553)

## Event-level: Cluster Bootstrap
stat.bootstrap.cluster(id = d$subject, val = d$response) 

## Rao-Scott Method
x = as.vector(by(d$response, d$subject, sum))
n = c(5,3,1,4,5,2,5,1,2,4)
a <- preprocess.clustered.binary.data(x, n)
prop.test(a$x, a$n)

## Mixed effects model 
library(lme4) 
library(modelbased)
g <- glmer(response ~ 1 + (1|subject), data = d, family = binomial)
estimate_relation(g)

## GEE model
library(geepack)
f <- geeglm(response ~ 1, data = d, id = subject,
            family = binomial, corstr = "exchangeable")
estimate_relation(f)