require(dplyr) require(sf) load("R/data_cantons.RData") # drop the 0 contours_occitanie_no0 <- contours_occitanie %>% filter(left != 0, right != 0, others != 0) # We create the percentages associated to each of the parties: contours_occitanie_no0 <- contours_occitanie_no0 %>% mutate(percent_left = left / (left + right + others), percent_right = right / (left + right + others), percent_others = others / (left + right + others)) # create ILR y_ilr <- ilr(contours_occitanie_no0[, c("left" , "right", "others")] %>% st_set_geometry(NULL)) contours_occitanie_no0 <- contours_occitanie_no0 %>% mutate(y_ilr_1 = as.numeric(y_ilr[, 1]), y_ilr_2 = as.numeric(y_ilr[, 2])) require("missForest") data.to.imp <- contours_occitanie_no0 %>% st_set_geometry(NULL) %>% select(unemp_rate, employ_evol, owner_rate, income_rate) data.to.imp$income_rate[is.nan(data.to.imp$income_rate)] <- NA data.imp <- missForest(data.to.imp) contours_occitanie_no0$income_rate <- data.imp$ximp[, "income_rate"] # prepare the matrices V to transform data into ilr space V_dip <- rbind(c(1/sqrt(6), 1/sqrt(6), -2/sqrt(6)), c(1/sqrt(2), -1/sqrt(2), 0)) V_employ <- rbind(c(-1/2/sqrt(5), -1/2/sqrt(5), -1/2/sqrt(5), -1/2/sqrt(5), 2/sqrt(5)), c(-sqrt(3)/6, -sqrt(3)/6, -sqrt(3)/6, sqrt(3/4), 0), c(-sqrt(2/3)/2, -sqrt(2/3)/2, sqrt(2/3), 0, 0), c(sqrt(1/2), -sqrt(1/2), 0, 0, 0)) # we create the independant variabes # age without "mineur" age <- contours_occitanie_no0[, c("age_1824", "age_2540", "age_4055", "age_5564", "age_65")] %>% st_set_geometry(NULL) age_1840 <- age[, "age_1824"] + age[, "age_2540"] age_4064 <- age[, "age_4055"] + age[, "age_5564"] age_65 <- age[, "age_65"] age3 <- as.data.frame(cbind(age_1840, age_4064, age_65)) # diplome diplome <- contours_occitanie_no0[, c("no_diplom", "capbep", "bac", "diplom_sup")] %>% st_set_geometry(NULL) No_CAPBEP <- diplome[, "no_diplom"] + diplome[, "capbep"] diplome3 <- cbind(diplome[,c("diplom_sup", "bac")], No_CAPBEP) # employ employ <- contours_occitanie_no0[, c("AZ", "BE", "FZ", "GU", "OQ")] %>% st_set_geometry(NULL) # we create the matrix of the independent variables x2 <- cbind(ilr(age3), ilr(diplome3, V = t(V_dip)), ilr(employ, V = t(V_employ)), as.matrix(contours_occitanie_no0[, c("unemp_rate", "employ_evol", "owner_rate", "income_rate", "foreign")] %>% st_set_geometry(NULL)), log(contours_occitanie_no0$NBRINS)) colnames(x2) <- c("age3_ilr1", "age3_ilr2", "diplome3_ilr1", "diplome3_ilr2", "employ_ilr1", "employ_ilr2", "employ_ilr3", "employ_ilr4", "unemp_rate", "employ_evol", "owner_rate", "income_rate", "foreign", "voters") x2_df <- data.frame(x2) rm(age3, age, diplome, employ, x2, age_1840, age_4064, age_65)