library(MuMIn)
options(na.action = "na.fail")
library(xtable)

source("RJMCMCfunctions.R")

simdata <- function(N){
  # PREDICTORS
  P <- 2    # number of predictors, if want more will need a larger rotation matrix
  preds1 <- matrix(runif(2*N*P),nrow=2*N,ncol=P)   # three predictors (SET1)
  #rot1   <- c( 0.36, 0.48, -0.8, -0.80, 0.60, 0.0, 0.48, 0.64, 0.6)  
  rot1   <- c( 0.96, -0.26, 0.26, 0.96)  
  rot1m  <- matrix(rot1, nrow=P, ncol=P)           # rotation matrix; t(rot1m) = solve(rot1m)
  preds2 <- preds1 %*% rot1m                     # rotated predictors (SET2)
  preds2 <- preds2 + matrix(runif(2*N*P, min=0, max=0.2), nrow=2*N, ncol=P) # with a bit of noise
  preds  <- cbind(preds1, preds2)
  
  # RESPONSE
  b0 <- 1; b1 <- 4; b2 <- 2;   #regression coefficients 
  y1 <- b0 + b1 * preds[,1] + b2 * preds[,2]   # response as determined by preds1
  bs <- c(b1, b2) %*% rot1m; b3 <- bs[1]; b4 <- bs[2]; #rotated coefficients
  y2 <- b0 + b3*preds[,3] + b4*preds[,4]   # response as determined by preds2
  tmp <- sample(1:(2*N), size=N)   #some sites get y1 and some y2
  y <- y1; y[tmp] <- y2[tmp]
  y <- y + rnorm(2*N, sd=1)  # some noise, for more realism
  
  # COMPILE ALL DATA
  mydf<-data.frame(y=y, p=preds)    #dataframe with response and all predictors
}

stacking <- function(test.preds, test.obs){
  # this function computes the optimal weight for a single train/test split;
  # from the models fitted to the training data it uses the predictions to the test;
  # then it optimises the weight vector across the models for combining these 
  # predictions to the observed data in the test;
  # trick 1: each weight is between 0 and 1: w <- exp(-w)
  # trick 2: weights sum to 1: w <- w/sum(w)
  #
  # weights are weights for each model, between -infty and +infty!
  # preds are predictions from each of the models
  
  if (NCOL(test.preds) >= length(test.obs)) stop("Increase the test set! More models than test points.")
  
  # now do an internal splitting into "folds" data sets:
  weightsopt <- function(ww){ 
    # function to compute RMSE on test data
    w <- c(1, exp(ww)); w <- w/sum(w) ## w all in (0,1) SIMON; set weight1 always to 1, other weights are scaled accordingly (this leads to a tiny dependence of optimal weights on whether model1 is any good or utter rubbish; see by moving the 1 to the end instead -> 3rd digit changes)
    pred <- as.vector(test.preds %*% w)
    return(sqrt(mean((pred - test.obs)^2)))
  }
  
  ops <- optim(par=runif(NCOL(test.preds)-1), weightsopt, method="BFGS")
  if (ops$convergence != 0) stop("Optimisation not converged!")
  round(c(1, exp(ops$par))/sum(c(1, exp(ops$par))), 4)
}


csweights <- function(R, eps=1E-6, maxit=50, verbose=FALSE){
  # implements Garthwaite & Mubwandarikwa's cos-square scheme (their appendix)
  # eps and maxit are chosen without much testing; not converging within 20 iterations doesn't actually mean that something is wrong; mostly it works much faster, though, i.e. within only a few iterations.
  require(expm)
  D1 <- diag(rep(2, NCOL(R)))
  D2 <- diag(NCOL(R))
  counter = 0
  while (any( abs(diag(D1) - diag(D2)) > eps)){
    ED <- eigen(D1 %*% R %*% D1)
    Q <- ED$vectors
    Lambda <- diag(ED$values)
    ## test:
    #Q %*% Lambda %*% solve(Q) # fine
    Lambda12 <- sqrtm(Lambda)
    E <- solve(D1) %*% Q %*% Lambda12 %*% solve(Q)
    D2 <- D1
    D1 <- diag( diag(Re(E)))
    counter <- counter + 1
    if (verbose) cat(counter, " ")
    if (counter >= maxit){
      warning("Maximum number of iterations reached without convergence!")
      break
    }
  }
  w <- diag(D2)^2 / sum(diag(D2)^2)
  return(w)
}
# end preamble
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

R <- 100 # put this to at least 100
M <- 16 # number of models
weights.arr <- array(NA, dim=c(R, 21+M, M)) # repeats, approaches, models
results.mat <- matrix(NA, nrow=R, ncol=21+M)
colnames(results.mat) <- c("single best", "full", "1/M", "median", "rjMCMC", "rjMCMC median", "Bayes Factor", "AIC", "BIC", "Cp", "WAIC", "LOOrmse", "LOOR2", "EBMA", "boot", "stacking", "JMA", "Bates-Granger", "cos-squared", "MBMC lm", "MBMC rf", paste0("m", 1:M))
for (r in 85:R){
  set.seed(r)
  if (r %in% c(50, 85)) set.seed(r*10) # some simulation cause convergence problems
  N <- 70 # number of data points
  simdats <- simdata(N)
  train <- simdats[1:N,]
  test <- simdats[(N+1) : (2*N),]
  rm(simdats)
  
  m.all <- lm(y~., data=train) # fit linear terms only; use only first half of the data
  mytab <- dredge(m.all, rank=AIC)
  mytab
  model.list <- get.models(mytab, subset=NA)
  # sort the model list from 1 : M (to make all outputs follow the same sequence, from simplest to full model):
  model.list <- model.list[order(as.numeric(rownames(mytab)))]
  M <- length(model.list)
  rm(m.all)
  
  truth <- test[,1] ## until we have the true values from the simulation, we use the test as stand-in
  preds <- sapply(model.list, predict, newdata=test)
  
  singleRMSEs <- apply(preds, 2, function(x) (sqrt(mean((x - truth)^2))))
  RMSEsinglebest <- min(singleRMSEs, na.rm=T)
  sbw <- rep(0, M)
  sbw[which(singleRMSEs == RMSEsinglebest)] <- 1
  weights.arr[r, 1, ] <- sbw  
  rm(sbw)
  
  # full model
  RMSEfull <- singleRMSEs[16]
  weights.arr[r, 2, ] <- c(rep(0, 15), 1)
  
  # equal weight
  weighted1overM <- preds %*% rep(1/M, M)
  (RMSE1overM <- sqrt(mean((weighted1overM - truth)^2)))
  weights.arr[r, 3, ] <- rep(1/M, M)
  rm(weighted1overM)
  
  # median
  medians <- apply(preds, 1, median)
  (RMSEmedian <- sqrt(mean((medians - truth)^2)))
  weights.arr[r,4,] <- rep(NA, 16)
  rm(medians)
  
  # rjMCMC
  #source("RJMCMCfunctions.R")
  X <- as.matrix(cbind(1, train[,-1])) # include 1 for the intercept
  y <- train$y
  # starting values for coefficients
  beta_vec <- runif(ncol(X), -1, 1)
  # starting value for sigma
  sigma <- rgamma(1, 1, 1)
  llikhood <- sum(dnorm(y, X%*%beta_vec, sigma, log = T))
  niter <- 1e5 #4000000
  burnin <- niter/2
  thin <- 20
  # store values
  beta_mat <- matrix(0, nrow = (niter-burnin)/thin, ncol = length(beta_vec))
  sigma_vec <- c()
  #choose prior parameters for coefficients
  prior_sigma_beta <- 3
  prior_pars <- c(0, prior_sigma_beta)
  #choose parameters of proposal distributions for updates
  prop_sigma_beta <- 3
  prop_pars <- c(0, prop_sigma_beta)
  # now run rjMCMC:
  for (iter in 1:niter){
    temp1 <- updateparam(beta_vec, sigma, llikhood, prior_beta = "norm", prior_beta_par = prior_pars, prop_beta = "norm", prop_beta_par = prop_pars[2])
    beta_vec <- temp1$beta_vec
    sigma <- temp1$sigma
    llikhood <- temp1$llikhood
    # between models update
    temp2 <- updatemodel(beta_vec, sigma, llikhood,  prior_beta = "norm", prior_beta_par = prior_pars, prop_beta = "norm", prop_beta_par = prop_pars)
    # read current values for beta, current model and log-likelihood value
    beta_vec <- temp2$beta_vec
    llikhood <- temp2$llikhood
    # Store output 
    if (iter>burnin & (iter-burnin) %% thin == 0){
      ind_st  <-  ceiling((iter-burnin)/thin)
      beta_mat[ind_st, ] <- beta_vec
      sigma_vec[ind_st] <- sigma
    } 
    #there are 16 possible models, count the number of times each model appears
    model_mat <- matrix(0, nrow = nrow(beta_mat), ncol = 1)
    for (sim in 1:nrow(beta_mat)){
      model_mat[sim,] <- paste(c((1:length(beta_vec))[beta_mat[sim,1:length(beta_vec)]!=0]), sep="", collapse = "+")
    }
  }  
  model_weights <- sort(round(table(model_mat)/nrow(model_mat), 3), TRUE)
  allmodelnames <- c("1", paste0("1+", unlist(sapply(1:4, function(n) apply(combn(2:5, n), 2, paste0, collapse="+")))))
  weightsrjMCMC <- rep(0, M)
  names(weightsrjMCMC) <- allmodelnames
  # put values into vector of M models:
  for (i in 1:length(model_weights)){# loop through weights of rjMCMC
    ind <- which(allmodelnames == names(model_weights[i]) )
    weightsrjMCMC[ind] <- model_weights[i]
  }
  # CHECK sequence of models to be the same as after dredge!!
  rightSequence <- c(1,2,3,6,4,7,9,12,5,8,10,13,11,14,15,16) # this is the order in which rjMCMC-models should occor
  #allmodelnames[rightSequence] # check this against varIndMatrix below
  weightsrjMCMC <- weightsrjMCMC[rightSequence]
  weights.arr[r,5,] <- weightsrjMCMC
  weightedPredsrjMCMC <- preds %*% weightsrjMCMC
  RMSErjMCMC <- sqrt(mean((weightedPredsrjMCMC - truth)^2))
  # rjMCMCmedian
  testpredsrjMCMC <- as.matrix(cbind(1, test[,-1])) %*% t(beta_mat)
  RMSErjMCMCmedian <- sqrt(mean((apply(testpredsrjMCMC, 1, median) - truth)^2))
  weights.arr[r,6,] <- weightsrjMCMC # uses same combination as rjMCMC before
  rm(X, Y, temp1, temp2, beta_vec, sim, weightsrjMCMC, weightedPredsjrMCMC, model_mat, llikhood, ind_st, iter, i)
  gc()
  
  # Bayes Factor
  # first set up an indicator matrix for all models:
  varIndMatrix <- ifelse(is.na(mytab[, 1:5]), 0, 1)
  varIndMatrix <- varIndMatrix[order(as.numeric(rownames(varIndMatrix))),] # sort sequence, same as for model.list!
  X <- cbind(1, train[1:N, -1]) # only the predictors, plus intercept as first column
  Y <- train[1:N, 1]
  likelihood <- function(x, option = 1){
    # a function to switch between the four models and compute the respective likelihood
    # x is a vector with betas, plus a parameter for sd, i.e. NCOL(X)+1
    res = as.matrix(X) %*% (x[-(NCOL(X)+1)] * varIndMatrix[option, ]) # sets all parameters to 0 that are not in the model
    ll = sum(dnorm(res - Y), sd = x[NCOL(X)+1], log = T)
    return(ll)
  }
  prior <- function(x){
    # double-exponential/Laplace prior, with lambda set to 10
    ll = sum(dexp(abs(10*x)), log = T)
    return(ll)
  }
  # We loop the setup, and lapply the run:
  setups <- list()
  for (m in 1:M){
    setups[[m]] <- createBayesianSetup(likelihood = function(x) likelihood(x, option  = m), prior=createPrior(density = prior, lower = c(rep(-5, NCOL(X)), 0.0001), upper = c(rep(5, NCOL(X)),5)))
  }
  resBayesFits <- lapply(setups, runMCMC, sampler = "Metropolis", settings = list(iterations = 30000, startValue = rep(0, NCOL(X)+1)))
  # extract the marginal likelihoods:
  ML <- unlist(sapply(resBayesFits, function(x) marginalLikelihood(x)[1]))
  # compute Bayes Factor weights:
  (weightsBF <- exp(ML) / sum(exp(ML)))
  #weightsBF
  weightedPredsBF <- preds %*% weightsBF
  RMSEBF <- sqrt(mean((weightedPredsBF - truth)^2))
  weights.arr[r, 7, ] <- weightsBF
  rm(X, Y, m, resBayesFits, ML, weightedPredsBF)
  gc()
  
  # AIC-based model weights
  AICs <- sapply(model.list, AICc)
  weightsAIC <- exp(-0.5*(AICs - min(AICs)))/sum(exp(-0.5*(AICs-min(AICs))))
  weights.arr[r, 8, ] <- weightsAIC
  weightedPredsAIC <- preds %*% weightsAIC
  RMSEAIC <- sqrt(mean((weightedPredsAIC - truth)^2))
  rm(AICs, weightsAIC, weightedPredsAIC)
  
  # BIC-based model weights
  BICs <-  sapply(model.list, BIC)
  weightsBIC <- exp(-0.5*(BICs-min(BICs)))/sum(exp(-0.5*(BICs-min(BICs))))
  weights.arr[r, 9, ] <- weightsBIC
  weightedPredsBIC <- preds %*% weightsBIC
  RMSEBIC <- sqrt(mean((weightedPredsBIC - truth)^2))
  rm(BICs, weightsBIC, weightedPredsBIC)
  
  # Mallows' Cp weights
  Cps <- sapply(model.list, Cp)
  weightsCp <- exp(-0.5*(Cps-min(Cps)))/sum(exp(-0.5*(Cps - min(Cps))))
  weightedPredsCp <- preds %*% weightsCp
  weights.arr[r, 10,] <- weightsCp
  RMSECp <- sqrt(mean((weightedPredsCp - truth)^2))
  rm(Cps, weightsCp, weightedPredsCp)
  
  # Widely applicable information criterion (WAIC)
  waics <- sapply(model.list, function(x) WAIC(glm(x))$WAIC2) # requires reformulation as glm due to a little programming bug
  weightsWAIC <- exp(-0.5*waics)/sum(exp(-0.5*waics))
  weights.arr[r, 11, ] <- weightsWAIC
  weightedPredsWAIC <- preds %*% weightsWAIC
  RMSEWAIC <- sqrt(mean((weightedPredsWAIC - truth)^2))
  rm(waics, weightsWAIC, weightedPredsWAIC)
  
  # Leave-one-out cross-validation (LOOCV)
  looAll <- matrix(NA, nrow=N, ncol=M)
  for (i in 1:N){
    fm.loo <- lapply(model.list, function(x) update(x, .~., data=train[-i, ]))
    looAll[i,] <- suppressWarnings(sapply(fm.loo, function(x) predict(x, newdata=train[i, ,drop=F])))
  }
  # now choose a criterion for evaluation, e.g. RMSE:
  RMSE <- apply(looAll, 2, function(x) sqrt(mean((x-truth)^2)))
  # or R2: 
  R2 <- apply(looAll, 2, function(x) cor(x, truth)^2)
  # turn into weights:
  weightsRMSE <- (exp(-1*(RMSE-min(RMSE))))/sum(exp(-1*(RMSE-min(RMSE))))
  weights.arr[r, 12, ] <- weightsRMSE
  weightsR2 <- (exp((R2-min(R2))))/sum(exp((R2-min(R2))))
  weights.arr[r, 13, ] <- weightsR2
  weightedPredslooRMSE <- preds %*% weightsRMSE
  RMSEloormse <- sqrt(mean((weightedPredslooRMSE - truth)^2))
  weightedPredslooR2 <- preds %*% weightsR2
  RMSElooR2 <- sqrt(mean((weightedPredslooR2 - truth)^2))
  rm(looAll, fm.loo, RMSE, R2, weightsRMSE, weightsR2, weightedPredslooR2, weightedPredslooRMSE)
  
  # BMA-EM
  #require(EBMAforecast)
  set.seed(1)
  trainsplit <- sample(rep(c(T, F), N/2))
  trainsub1 <- train[trainsplit, ]
  trainsub2 <- train[!trainsplit, ]
  trainsub1fits <- lapply(model.list, update, .~. , data=trainsub1) # re-fit models on train1
  bmafits <- sapply(trainsub1fits, predict, newdata=trainsub2) # predict them to train2
  bmaY <- trainsub2$y
  EBMAdata <- makeForecastData(.predCalibration=bmafits, .outcomeCalibration=bmaY, .modelNames=paste0("m", 1:M)) # make dataset
  EBMAfit <- calibrateEnsemble(EBMAdata, model="normal") # compute weights
  weights.arr[r, 14, ] <- EBMAfit@modelWeights
  weightedPredsEBMA <- preds %*% EBMAfit@modelWeights
  RMSEebma <- sqrt(mean((weightedPredsEBMA - truth)^2))
  rm(bmafits, bmaY, EBMAfit, EBMAdata, weightedPredsEBMA, trainsplit, trainsub1, trainsub2, trainsub1fits)
  
  # Naive bootstrap
  Nboots <- 1000
  bestCounter <- rep(0, M)
  for (i in 1:Nboots){
    bsfits <- lapply(model.list, update, .~. , data=train[sample(nrow(train), nrow(train), replace=T),]) # re-fit models on train1
    bsAICs <- sapply(bsfits, AIC)
    bestCounter[which.min(bsAICs)] <- bestCounter[which.min(bsAICs)] + 1
  }
  weightsboot <- bestCounter/sum(bestCounter)
  weights.arr[r, 15, ] <- weightsboot
  weightedPredsBoot <- preds %*% weightsboot
  RMSEboot <- sqrt(mean((weightedPredsBoot - truth)^2))
  rm(bestCounter, bsfits, bsAICs, weightsboot, weightedPredsBoot)
  
  # Stacking
  Nstack <- 100
  weightsStack <- matrix(NA, ncol=M, nrow=Nstack)
  colnames(weightsStack) <- paste0("m", 1:M)
  i = 0
  while (i < Nstack){
    trainsplit <- sample(rep(c(T, F), N/2))
    trainsub1 <- train[trainsplit, ]
    trainsub2 <- train[!trainsplit, ]
    trainsub1fits <- lapply(model.list, update, .~. , data=trainsub1) # re-fit models on train1
    stackpreds <- sapply(trainsub1fits, predict, newdata=trainsub2) # predict them to train2
    optres <- try(stacking(test.preds=stackpreds, test.obs=trainsub2$y), silent=T)
    if (inherits(optres, "try-error"))  next;
    i = i + 1
    weightsStack[i,] <- optres
    rm(trainsplit, trainsub1, trainsub2, trainsub1fits, stackpreds)
  }
  weightsStacking <- colSums(weightsStack)/sum(weightsStack)
  weights.arr[r, 16, ] <- weightsStacking
  weightedPredsStack <- preds %*% weightsStacking
  RMSEstack <- sqrt(mean((weightedPredsStack - truth)^2))
  rm(weightsStacking, weightedPredsStack, weightsStack)
  
  # Jackknife
  # 1. fit the candidate models, omitting one data point at a time:
  J <- matrix(NA, nrow(train), M) # matrix with jackknifed predictions
  for (i in 1:nrow(train)){#1:NROW(Anguilla_train)){
    jfits <- lapply(model.list, update, .~. , data=train[-i,]) # re-fit models on train1
    J[i,] <- sapply(jfits, predict, newdata=train[i, , drop=F]) # predict them to train2
    rm(jfits)
  }
  # 2. compute RMSE for a value of w, given J:
  weightsopt <- function(ww, J){ 
    # function to compute RMSE on test data
    # at some point to also use likelihood instead of RMSE, but primarily for 0/1 data
    w <- c(1, exp(ww)); w <- w/sum(w) 
    Jpred <- J %*% w
    return(sqrt(mean((Jpred - train$y)^2)))
  }
  ops <- optim(par=runif(NCOL(J)-1), weightsopt, method="BFGS", control=list(maxit=5000), J=J)
  if (ops$convergence != 0) stop("Not converged!")
  round(weightsJMA <- c(1, exp(ops$par))/sum(c(1, exp(ops$par))),3)
  weights.arr[r, 17, ] <- weightsJMA
  weightedPredsJMA <- preds %*% weightsJMA
  RMSEjma <- sqrt(mean((weightedPredsJMA - truth)^2))
  rm(J, ops, weightedPredsJMA)
  
  # Bates-Granger
  set.seed(1)
  trainsplit <- sample(rep(c(T, F), N/2))
  trainsub1 <- train[trainsplit, ]
  trainsub2 <- train[!trainsplit, ]
  trainsub1fits <- lapply(model.list, update, .~. , data=trainsub1) # re-fit models on trainsub1
  trainsub2preds <- sapply(trainsub1fits, predict, newdata=trainsub2) # predict them to train2
  trainsub2resid <- trainsub2$y - trainsub2preds
  Sigma <- cov(trainsub2resid[,-c(4,6,7,8,10:16)]) # PROBLEM: models are nested ... removed models manually ...
  #Sigma <- cov(trainsub2resid)
  ones <- rep(1, nrow(Sigma))
  weightsBG <- rep(0, M)
  weightsBG[c(1,2,3,5,9)] <- solve(t(ones) %*% solve(Sigma) %*% ones)%*%ones%*%solve(Sigma)
  weights.arr[r, 18, ] <- weightsBG
  weightedPredsBG <- preds %*% weightsBG
  RMSEbg <- sqrt(mean((weightedPredsBG - truth)^2))
  rm(trainsplit, trainsub1, trainsub2, trainsub1fits, trainsub2preds, trainsub2resid, ones, weightsBG, weightedPredsBG, Sigma)
  
  # cos-squared
  # We drop the intercept-only model from the set of models, i.e. assign a weight of 0.
  RR <- cor(preds[,-1])
  weightsCS <- c(0, csweights(RR, verbose=F, maxit=500))
  weights.arr[r, 19, ] <- weightsCS
  weightedPredsCS <- preds %*% weightsCS
  RMSEcs <- sqrt(mean((weightedPredsCS - truth)^2))
  rm(RR, weightsCS, weightedPredsCS)
  
  # Model-based model combinations
  set.seed(1)
  trainsplit <- sample(rep(c(T, F), N/2))
  trainsub1 <- train[trainsplit, ]
  trainsub2 <- train[!trainsplit, ]
  trainsub1fits <- lapply(model.list, update, .~. , data=trainsub1) # re-fit models on trainsub1
  mbmcfits <- sapply(trainsub1fits, predict, newdata=trainsub2) # predict them to train2
  colnames(mbmcfits) <- paste0("mbmcfits", 1:M)
  summary(mbmc1 <- lm(trainsub2$y ~ ., data=as.data.frame(mbmcfits))) # this is a simple linear model
  #require(mgcv)
  #summary(mbmc1b <- gam(train2$Y ~ s(mbmcfits1, k=5, bs="cs")+s(mbmcfits2, k=5, bs="cs")+s(mbmcfits3, k=5, bs="cs")+s(mbmcfits4, k=5, bs="cs"), data=as.data.frame(mbmcfits), method="ML")) 
  # alternatively, we can use a machine-learning algorithm, e.g. ANN or randomForest:
  require(randomForest)
  (mbmc2 <- randomForest(x=mbmcfits, y=trainsub2$y))
  mbmcpreds <- sapply(trainsub1fits, predict, newdata=test)
  colnames(mbmcpreds) <- paste0("mbmcfits", 1:M)
  # MBMC prediction with the linear model:
  weightedPredsMBMC1 <- predict(mbmc1, newdata=as.data.frame(mbmcpreds))
  RMSEmbmc1 <- sqrt(mean((weightedPredsMBMC1 - truth)^2))
  # MBMC prediction with the wild model:
  #weightedPredsMBMC1b <- predict(mbmc1b, newdata=as.data.frame(mbmcpreds))
  #(RMSEmbmc1b <- sqrt(mean((weightedPredsMBMC1b - truth)^2)))
  # MBMC prediction with randomForest:
  weightedPredsMBMC2 <- predict(mbmc2, newdata=as.data.frame(mbmcpreds))
  RMSEmbmc2 <- sqrt(mean((weightedPredsMBMC2 - truth)^2))
  rm(trainsplit, trainsub1, trainsub2, trainsub1fits, mbmcfits, mbmcpreds, mbmc1, mbmc2, weightedPredsMBMC1, weightedPredsMBMC2)
  
  RMSEapproaches <- c(RMSEsinglebest, RMSEfull, RMSE1overM, RMSEmedian, RMSErjMCMC, RMSErjMCMCmedian, RMSEBF, RMSEAIC, RMSEBIC, RMSECp, RMSEWAIC, RMSEloormse, RMSElooR2, RMSEebma, RMSEboot, RMSEstack, RMSEjma, RMSEbg, RMSEcs, RMSEmbmc1, RMSEmbmc2, singleRMSEs)
  results.mat[r,] <- RMSEapproaches
  save(results.mat, weights.arr, file="App_case1new_comparison_results.Rdata")
  rm(simdats, train, test, RMSEsinglebest, RMSEfull, RMSE1overM, RMSEmedian, RMSErjMCMC, RMSErjMCMCmedian, RMSEBF, RMSEAIC, RMSEBIC, RMSECp, RMSEWAIC, RMSEloormse, RMSElooR2, RMSEebma, RMSEboot, RMSEstack, RMSEjma, RMSEbg, RMSEcs, RMSEmbmc1, RMSEmbmc2, singleRMSEs, y, preds)
  
  print(r)
  
}#end for-loop

#   
# 
# # re-run cos-squared without the intercept-only model:
# for (r in 1:R){
#   set.seed(r)
#   if (r %in% c(50, 85)) set.seed(r*10) # some simulation cause convergence problems
#   N <- 70 # number of data points
#   simdats <- simdata(N)
#   train <- simdats[1:N,]
#   test <- simdats[(N+1) : (2*N),]
#   rm(simdats)
#   
#   m.all <- lm(y~., data=train) # fit linear terms only; use only first half of the data
#   mytab <- dredge(m.all, rank=AIC)
#   mytab
#   model.list <- get.models(mytab, subset=NA)
#   # sort the model list from 1 : M (to make all outputs follow the same sequence, from simplest to full model):
#   model.list <- model.list[order(as.numeric(rownames(mytab)))]
#   M <- length(model.list)
#   rm(m.all)
#   
#   truth <- test[,1] ## until we have the true values from the simulation, we use the test as stand-in
#   preds <- sapply(model.list, predict, newdata=test)
#   
#   # cos-squared
#   RR <- cor(preds[,-1])
#   weightsCS <- c(0, csweights(RR, verbose=F, maxit=500))
#   weights.arr[r, 19, ] <- weightsCS
#   weightedPredsCS <- preds %*% weightsCS
#   RMSEcs <- sqrt(mean((weightedPredsCS - truth)^2))
#   results.mat[r, 19] <- RMSEcs
#   rm(RR, weightsCS, weightedPredsCS, RMSEcs)
# }

  
  
# CHECK FOR 0-only data in rjMCMC due to NAs for weights!
  
apply(weights.arr, c(2,3), mean, na.rm=T)

sort(colMeans(results.mat), decreasing=F)
sort(apply(results.mat, 2, sd), decreasing=F)


library(xtable)
out <- round(apply(weights.arr, c(2,3), mean, na.rm=T), 3)
#out <- round(colMeans(results.mat), 4)
out[22:37, ] <- diag(1, 16)
oo <- order(apply(results.mat, 2, median))
out.ordered <- cbind(out, apply(results.mat, 2, median))[oo, ]
print(xtable(out.ordered, digits=c(1,rep(2, 16),3)), file="case1weights.txt")

oo <- order(apply(results.mat, 2, median))
pdf("Case1_100runs_new.pdf", width=9, height=5)
par(mar=c(8,4,1,1))
boxplot(results.mat[, oo], las=2, col="grey", border="white", ylab="prediction error (RMSE)", cex.lab=1.3, ylim=c(1, 1.8)) #
abline(v=4.5, col="grey")

dev.off()
