14.2 Swine Disease Breakout Data

# sim1_da1.csv the 1st simulated data similar
# sim1_da2 and sim1_da3 sim1.csv simulated data,
# the first simulation dummy.sim1.csv dummy
# variables for the first simulated data with all
# the baseline code for simulation

nf <- 800
for (j in 1:20) {
    set.seed(19870 + j)
    x <- c("A", "B", "C")
    sim.da1 <- NULL
    for (i in 1:nf) {
        # sample(x, 120, replace=TRUE)->sam
        sim.da1 <- rbind(sim.da1, sample(x, 120, replace = TRUE))
    }
    
    sim.da1 <- data.frame(sim.da1)
    col <- paste("Q", 1:120, sep = "")
    row <- paste("Farm", 1:nf, sep = "")
    colnames(sim.da1) <- col
    rownames(sim.da1) <- row
    
    # use class.ind() function in nnet package to encode
    # dummy variables
    library(nnet)
    dummy.sim1 <- NULL
    for (k in 1:ncol(sim.da1)) {
        tmp = class.ind(sim.da1[, k])
        colnames(tmp) = paste(col[k], colnames(tmp))
        dummy.sim1 = cbind(dummy.sim1, tmp)
    }
    dummy.sim1 <- data.frame(dummy.sim1)
    
    # set 'C' as the baseline delete baseline dummy variable
    
    base.idx <- 3 * c(1:120)
    dummy1 <- dummy.sim1[, -base.idx]
    
    # simulate independent variable for different values of
    # r simulate based on one value of r each time r=0.1,
    # get the link function
    
    s1 <- c(rep(c(1/10, 0, -1/10), 40), 
            rep(c(1/10, 0, 0), 40), 
            rep(c(0, 0, 0), 40))
    link1 <- as.matrix(dummy.sim1) %*% s1 - 40/3/10
    
    # Other settings  ---------------------------
    # r = 0.25
    # s1 <- c(rep(c(1/4, 0, -1/4), 40), 
    #        rep(c(1/4, 0, 0), 40), 
    #        rep(c(0, 0, 0), 40))
    # link1 <- as.matrix(dummy.sim1) %*% s1 - 40/3/4
    
    # r = 0.5
    # s1 <- c(rep(c(1/2, 0, -1/2), 40), 
    #        rep(c(1/2, 0, 0), 40), 
    #        rep(c(0, 0, 0), 40))
    # link1 <- as.matrix(dummy.sim1) %*% s1 - 40/3/2
    
    # r = 1
    # s1 <- c(rep(c(1, 0, -1), 40), 
    #        rep(c(1, 0, 0), 40), 
    #        rep(c(0, 0, 0), 40))
    # link1 <- as.matrix(dummy.sim1) %*% s1 - 40/3
    
    # r = 2
    # s1 <- c(rep(c(2, 0, -2), 40), 
    #        rep(c(2, 0, 0), 40), 
    #        rep(c(0, 0, 0), 40))
    # 
    # link1 <- as.matrix(dummy.sim1) %*% s1 - 40/3/0.5
    
    # calculate the outbreak probability
    hp1 <- exp(link1)/(exp(link1) + 1)
    
    # based on the probability hp1, simulate response
    # variable: res
    res <- rep(9, nf)
    for (i in 1:nf) {
        res[i] <- sample(c(1, 0), 1, prob = c(hp1[i], 1 - 
            hp1[i]))
    }
    
    # da1 with response variable, without group indicator
    # da2 without response variable, with group indicator
    # da3 without response variable, without group indicator
    
    dummy1$y <- res
    da1 <- dummy1
    y <- da1$y
    ind <- NULL
    for (i in 1:120) {
        ind <- c(ind, rep(i, 2))
    }
    
    da2 <- rbind(da1[, 1:240], ind)
    da3 <- da1[, 1:240]
    
    # save simulated data
    write.csv(da1, paste("sim", j, "_da", 1, ".csv", sep = ""), 
        row.names = F)
    write.csv(da2, paste("sim", j, "_da", 2, ".csv", sep = ""), 
        row.names = F)
    write.csv(da3, paste("sim", j, "_da", 3, ".csv", sep = ""), 
        row.names = F)
    write.csv(sim.da1, paste("sim", j, ".csv", sep = ""), 
        row.names = F)
    write.csv(dummy.sim1, paste("dummy.sim", j, ".csv", 
        sep = ""), row.names = F)
}