# sim1_da1.csv the 1st simulated data similar
# sim1_da2 and sim1_da3 sim1.csv simulated data,
# the first simulation dummy.sim1.csv dummy
# variables for the first simulated data with all
# the baseline code for simulation
nf <- 800
for (j in 1:20) {
set.seed(19870 + j)
x <- c("A", "B", "C")
sim.da1 <- NULL
for (i in 1:nf) {
# sample(x, 120, replace=TRUE)->sam
sim.da1 <- rbind(sim.da1, sample(x, 120, replace = TRUE))
}
sim.da1 <- data.frame(sim.da1)
col <- paste("Q", 1:120, sep = "")
row <- paste("Farm", 1:nf, sep = "")
colnames(sim.da1) <- col
rownames(sim.da1) <- row
# use class.ind() function in nnet package to encode
# dummy variables
library(nnet)
dummy.sim1 <- NULL
for (k in 1:ncol(sim.da1)) {
tmp = class.ind(sim.da1[, k])
colnames(tmp) = paste(col[k], colnames(tmp))
dummy.sim1 = cbind(dummy.sim1, tmp)
}
dummy.sim1 <- data.frame(dummy.sim1)
# set 'C' as the baseline delete baseline dummy variable
base.idx <- 3 * c(1:120)
dummy1 <- dummy.sim1[, -base.idx]
# simulate independent variable for different values of
# r simulate based on one value of r each time r=0.1,
# get the link function
s1 <- c(rep(c(1/10, 0, -1/10), 40),
rep(c(1/10, 0, 0), 40),
rep(c(0, 0, 0), 40))
link1 <- as.matrix(dummy.sim1) %*% s1 - 40/3/10
# Other settings ---------------------------
# r = 0.25
# s1 <- c(rep(c(1/4, 0, -1/4), 40),
# rep(c(1/4, 0, 0), 40),
# rep(c(0, 0, 0), 40))
# link1 <- as.matrix(dummy.sim1) %*% s1 - 40/3/4
# r = 0.5
# s1 <- c(rep(c(1/2, 0, -1/2), 40),
# rep(c(1/2, 0, 0), 40),
# rep(c(0, 0, 0), 40))
# link1 <- as.matrix(dummy.sim1) %*% s1 - 40/3/2
# r = 1
# s1 <- c(rep(c(1, 0, -1), 40),
# rep(c(1, 0, 0), 40),
# rep(c(0, 0, 0), 40))
# link1 <- as.matrix(dummy.sim1) %*% s1 - 40/3
# r = 2
# s1 <- c(rep(c(2, 0, -2), 40),
# rep(c(2, 0, 0), 40),
# rep(c(0, 0, 0), 40))
#
# link1 <- as.matrix(dummy.sim1) %*% s1 - 40/3/0.5
# calculate the outbreak probability
hp1 <- exp(link1)/(exp(link1) + 1)
# based on the probability hp1, simulate response
# variable: res
res <- rep(9, nf)
for (i in 1:nf) {
res[i] <- sample(c(1, 0), 1, prob = c(hp1[i], 1 -
hp1[i]))
}
# da1 with response variable, without group indicator
# da2 without response variable, with group indicator
# da3 without response variable, without group indicator
dummy1$y <- res
da1 <- dummy1
y <- da1$y
ind <- NULL
for (i in 1:120) {
ind <- c(ind, rep(i, 2))
}
da2 <- rbind(da1[, 1:240], ind)
da3 <- da1[, 1:240]
# save simulated data
write.csv(da1, paste("sim", j, "_da", 1, ".csv", sep = ""),
row.names = F)
write.csv(da2, paste("sim", j, "_da", 2, ".csv", sep = ""),
row.names = F)
write.csv(da3, paste("sim", j, "_da", 3, ".csv", sep = ""),
row.names = F)
write.csv(sim.da1, paste("sim", j, ".csv", sep = ""),
row.names = F)
write.csv(dummy.sim1, paste("dummy.sim", j, ".csv",
sep = ""), row.names = F)
}