# Simulation Codes for obtaining Tables 2 and 3 in the paper 
# ``About predictions in spatial autoregressive models:
#   Optimal and almost optimal strategies'' (2016)
#
# Number of cores: simulations done with 48 cores 
#################################################################

insample <- function(j, path.dir, param.RData, nb.simul=1000)
{ # - j: an integer, giving the number of cores to be used
  # - path.directory: a character, giving the main path directory 
  # - param.RData: a character, giving the name of the file containing initial parameters to be tested ("param-in.RData")
  # - nb.simul: an integer, the number of simulations 
  
  # we load the file contianing the parameters to be tested
  load(paste(path.dir, param.RData, sep=""))
  # the number of combinaisons
  # NB : list.insample.param has been created in GLT.html and contained
  # the list with different configurations which has been tested
  nb.poss <- prod(unlist(lapply(list.insample.param , length)))
  # the array with the different possibilities 
  arr.poss <- array(1:nb.poss, unlist(lapply(list.insample.param, length)))
  n <- nrow(x) 
  ind.param <- as.numeric(which(arr.poss==j, arr.ind=T))
  rho.j <- list.insample.param[[1]][ind.param[1]]
  W.listw <- list.insample.param[[2]][[ind.param[2]]]
  sigma.j <- list.insample.param[[3]][ind.param[3]]
  W <- spdep::listw2mat(W.listw)
  
  MSE=matrix(0,nb.simul,3)
  for (l in 1:nb.simul)
  {
    A <- diag(1,n) - rho.j*W
  # set.seed(j*l);
  Y0 <- x%*%beta0 + rnorm(n, 0, sigma.j)
  y <- solve(A, Y0)
  don <- as.data.frame(x[,-1])
  don <- data.frame(don, y)
  
  # estimate the parameters of the SAR model
  sar.res <- spdep::lagsarlm(y~., data=don, W.listw, method="eigen")
  hat.rho <- sar.res$rho
  
  hat.A <- diag(1,n) - hat.rho*W
  
  # Calcul de Xbeta
  xbeta = x%*%coefficients(sar.res)[2:5]
  
  # Estimation TC
  y_TC = solve(hat.A, xbeta)
  # y_TC.spdep = as.numeric(predict.sarlm(sar.res, listw = W.kn, zero.policy = F, type = "TC")) 
  # all(round(y_TC,2)==round(y_TC.spdep,2))
  
  # Estimation TS
  y_TS = xbeta + hat.rho*(W%*%y)
  # y_TS.spdep =  as.numeric(predict.sarlm(sar.res, listw = W.kn, zero.policy = F, type = "TS")) 
  # all(round(y_TS,2)==round(y_TS.spdep,2))
  
  # estimation BP
  Qs <- (crossprod(hat.A))/sar.res$s2
  
  y_BP <- y_TC-diag(1/diag(Qs))%*%(Qs-diag(diag(Qs)))%*%(y-y_TC)
  # y_BP.spdep = as.numeric(predict.sarlm(sar.res, listw = W.kn, zero.policy = F, type = "BP")) 
  # all(round(y_BP,2)==round(y_BP.spdep,2))
  
  # computation of the residuals
  e_TS <- (y-y_TS)
  e_TC <- (y-y_TC)
  e_BP <- (y-y_BP) 
  
  # computation of the R2
  ym <- y-mean(y)
  rsqr1_TS = sum(e_TS^2)
  rsqr1_TC = sum(e_TC^2)
  rsqr1_BP = sum(e_BP^2)
  
  # computation of the MSE
  MSE[l,1] = mean(e_TS^2)
  MSE[l,2] = mean(e_TC^2)
  MSE[l,3] = mean(e_BP^2)
  }
  return(MSE)
}

# An example of use of parallel computing :
# we load the data 
path.dir <- "Z:/Thibault Pro/research/statistique spatiale/prédiction Michel Christine/article spatial economic analysis/codes/"
load(paste(path.dir, "/parameters/param-in.RData", sep=""))
# the number of possible combinaisons
nb.poss <- prod(unlist(lapply(list.insample.param, length)))

# To obtain the simulations results
require("snowfall")
sfInit(parallel=TRUE, cpus=nb.poss)
result.in <- sfClusterApplyLB(1:nb.poss, insample, path.dir=path.dir, param.RData="/parameters/param-in.RData", nb.simul=1000)
sfStop()
# one could save the files like this
save(result.in, file=paste(path.dir,"result/res-in.RData",sep=""))


