A.3 Swine Disease Breakout Data

# sim1_da1.csv the 1st simulated data similar
# sim1_da2 and sim1_da3 sim1.csv simulated data,
# the first simulation dummy.sim1.csv dummy
# variables for the first simulated data with all
# the baseline code for simulation

nf <- 800
for (j in 1:20) {
    set.seed(19870 + j)
    x <- c("A", "B", "C")
    sim.da1 <- NULL
    for (i in 1:nf) {
        # sample(x, 120, replace=TRUE)->sam
        sim.da1 <- rbind(sim.da1, sample(x, 120, replace = TRUE))
    }
    
    sim.da1 <- data.frame(sim.da1)
    col <- paste("Q", 1:120, sep = "")
    row <- paste("Farm", 1:nf, sep = "")
    colnames(sim.da1) <- col
    rownames(sim.da1) <- row
    
    # use class.ind() function in nnet package to
    # encode dummy variables
    library(nnet)
    dummy.sim1 <- NULL
    for (k in 1:ncol(sim.da1)) {
        tmp = class.ind(sim.da1[, k])
        colnames(tmp) = paste(col[k], colnames(tmp))
        dummy.sim1 = cbind(dummy.sim1, tmp)
    }
    dummy.sim1 <- data.frame(dummy.sim1)
    
    # set 'C' as the baseline delete baseline dummy
    # variable
    
    base.idx <- 3 * c(1:120)
    dummy1 <- dummy.sim1[, -base.idx]
    
    # simulate independent variable for different
    # values of r simulate based on one value of r each
    # time r=0.1, get the link function
    s1 <- c(rep(c(1/10, 0, -1/10), 40), rep(c(1/10, 
        0, 0), 40), rep(c(0, 0, 0), 40))
    link1 <- as.matrix(dummy.sim1) %*% s1 - 40/3/10
    
    # r=0.25
    # c(rep(c(1/4,0,-1/4),40),rep(c(1/4,0,0),40),rep(c(0,0,0),40))->s1
    # as.matrix(dummy.sim1)%*%s1-40/3/4->link1
    
    # r=0.5
    # c(rep(c(1/2,0,-1/2),40),rep(c(1/2,0,0),40),rep(c(0,0,0),40))->s1
    # as.matrix(dummy.sim1)%*%s1-40/3/2->link1
    
    # r=1
    # c(rep(c(1,0,-1),40),rep(c(1,0,0),40),rep(c(0,0,0),40))->s1
    # as.matrix(dummy.sim1)%*%s1-40/3->link1
    
    # r=2
    # c(rep(c(2,0,-2),40),rep(c(2,0,0),40),rep(c(0,0,0),40))->s1
    # as.matrix(dummy.sim1)%*%s1-40/3/0.5->link1
    
    
    # calculate the outbreak probability
    hp1 <- exp(link1)/(exp(link1) + 1)
    
    # based on the probability hp1, simulate response
    # variable: res
    res <- rep(9, nf)
    for (i in 1:nf) {
        res[i] <- sample(c(1, 0), 1, prob = c(hp1[i], 
            1 - hp1[i]))
    }
    
    # da1 with response variable, without group
    # indicator da2 without response variable, with
    # group indicator da3 without response variable,
    # without group indicator
    
    dummy1$y <- res
    da1 <- dummy1
    y <- da1$y
    ind <- NULL
    for (i in 1:120) {
        ind <- c(ind, rep(i, 2))
    }
    
    da2 <- rbind(da1[, 1:240], ind)
    da3 <- da1[, 1:240]
    
    # save simulated data
    write.csv(da1, paste("sim", j, "_da", 1, ".csv", 
        sep = ""), row.names = F)
    write.csv(da2, paste("sim", j, "_da", 2, ".csv", 
        sep = ""), row.names = F)
    write.csv(da3, paste("sim", j, "_da", 3, ".csv", 
        sep = ""), row.names = F)
    write.csv(sim.da1, paste("sim", j, ".csv", sep = ""), 
        row.names = F)
    write.csv(dummy.sim1, paste("dummy.sim", j, ".csv", 
        sep = ""), row.names = F)
}