# import data

################################
# 1993 
# We copy the url of the data:
  
url_leg_1993 <- "http://static.data.gouv.fr/4d/31c587f56b293e3b48c7ba2951fdbe09c4f5396317e0793a5e3ac33b396e22.xls"
curl::curl_download(url = url_leg_1993, destfile = "base_1993.xls")

# We import the data. We first define the name and the type of the variables :
nom_var_1993 <- c("code_dep", "nom_dep", "code_circ", "nom_circ", "inscrits", 
             "abstent", "absten/inscrits", "votants", "votants/inscrits",
             "blancs", "blancs/inscrits", "blancs/votants", 
             "exprimes", "exprimes/inscrits", "exprimes/votants", 
             paste(rep(c("sexe", "nom", "prenom", "nuance", "voix", "voix/inscrits", "voix/exprimes"), 18), 
                   rep(1:18, each = 7), sep = "_"))

# We can directly import the data with *read_xls()* function and rename the variables:
don_leg_1993 <- readxl::read_xls(path = "base_1993.xls", sheet = "Circo leg T1",
                                 col_names = nom_var_1993, skip = 1)


################################
# 1997 
# We copy the url of the data:

url_leg_1997 <- "http://static.data.gouv.fr/bb/66d1298c1ac462a7bf1cd3209a30fca62d1e2702561ece1453c2a127952ec1.xls"
curl::curl_download(url = url_leg_1997, destfile = "base_1997.xls")

# We import the data. We first define the name and the type of the variables :
nom_var_1997 <- c("code_dep", "nom_dep", "code_circ", "nom_circ", "inscrits", 
             "abstent", "absten/inscrits", "votants", "votants/inscrits",
             "blancs", "blancs/inscrits", "blancs/votants", 
             "exprimes", "exprimes/inscrits", "exprimes/votants", 
             paste(rep(c("sexe", "nom", "prenom", "nuance", "voix", "voix/inscrits", "voix/exprimes"), 29), 
                   rep(1:29, each = 7), sep = "_"))

# We can directly import the data with *read_xls()* function and rename the variables:
don_leg_1997 <- readxl::read_xls(path = "base_1997.xls", 
                                 sheet = "Circo leg T1",
                                 col_names = nom_var_1997, skip = 1)


################################
# 2002
# We copy the url of the data:

url_leg_2002 <- "http://static.data.gouv.fr/9c/96f64b379471595907b6abb0267e7b9e36a1d9ccd06521e377c8330c3130de.xls"
curl::curl_download(url = url_leg_2002, destfile = "base_2002.xls")

# We import the data. We first define the name and the type of the variables :
nom_var_2002 <- c("code_dep", "nom_dep", "code_circ", "nom_circ", "inscrits", 
             "abstent", "absten/inscrits", "votants", "votants/inscrits",
             "blancs", "blancs/inscrits", "blancs/votants", 
             "exprimes", "exprimes/inscrits", "exprimes/votants", 
             paste(rep(c("sexe", "nom", "prenom", "nuance", "voix", "voix/inscrits", "voix/exprimes"), 27), 
                   rep(1:27, each = 7), sep = "_"))


# We can directly import the data with *read_xls()* function and rename the variables:
don_leg_2002 <- readxl::read_xls(path = "base_2002.xls", 
                                 sheet = "Circo leg T1",
                                 col_names = nom_var_2002, 
                                 skip = 1)

################################
# 2007
# We copy the url of the data:

url_leg_2007 <- "http://static.data.gouv.fr/21/0c89caeccc170effac852b1ffa2e3b318827fa7b0fd418c92b66ed8f986aef.xls"
curl::curl_download(url = url_leg_2007, destfile = "base_2007.xls")

# We import the data. We first define the name and the type of the variables :
nom_var_2007 <- c("code_dep", "nom_dep", "code_circ", "nom_circ", "inscrits", 
             "abstent", "absten/inscrits", "votants", "votants/inscrits",
             "blancs", "blancs/inscrits", "blancs/votants", 
             "exprimes", "exprimes/inscrits", "exprimes/votants", 
             paste(rep(c("sexe", "nom", "prenom", "nuance", "voix", "voix/inscrits", "voix/exprimes"), 19), 
                   rep(1:19, each = 7), sep = "_"), 
             "bug", paste(c("sexe", "nom", "prenom", "nuance", "voix", "voix/inscrits", "voix/exprimes"), 20, sep = "_"))


# We can directly import the data with *read_xls()* function and rename the variables:
don_leg_2007 <- readxl::read_xls(path = "base_2007.xls", 
                                 sheet = "Circo Leg T1",
                                 col_names = nom_var_2007, 
                                 skip = 1)
don_leg_2007 <- don_leg_2007 %>%
  select(-bug)

################################
## Elections in 2012 

# We copy the url of the data:
url_leg_2012 <- "https://www.data.gouv.fr/s/resources/elections-legislatives-2012-resultats-par-bureaux-de-vote/20150925-103435/LG12_Bvot_T1T2.txt"

# We import the data. In 2012, data were presented so that each row corresponds to the results of one candidate in one voting place. We can directly import the data with *read_csv2()* function and rename the variables:
don_leg_2012 <- readr::read_csv2(url_leg_2012, col_names = F, skip = 18)
don_leg_2012 <- rename(don_leg_2012, tour = 1, code_dep = 2,
                       code_circ = 5, code_com = 3, nom_com = 4, code_cant = 6, 
                       b_vote = 7, inscrits = 8, votants = 9, exprimes = 10, 
                       candidate = 12, parti = 14, score = 15)

# We separate the results from the 1st and 2nd tour:
don_leg_2012_T1 <- filter(don_leg_2012, tour == 1)
don_leg_2012_T2 <- filter(don_leg_2012, tour == 2)


### Data basis related to the number of voters 

# We create a first data basis with the number of voters per voting place: 
bv_leg_2012 <- unique(don_leg_2012_T1[, c("code_dep", "code_com",
                                          "code_circ", "b_vote", "inscrits")])


################################
## Elections in 2012 

# We copy the url of the data:
url_leg_2017_T1 <- "https://www.data.gouv.fr/fr/datasets/r/80cb1309-9147-4bae-b6e2-79877d549b50"
url_leg_2017_T2 <- "https://www.data.gouv.fr/fr/datasets/r/8eb61f3e-dfdf-496e-85af-2859cd7383c3"


# In 2017, data were presented so that each row corresponds to the results of every candidates per voting place. The number of columns could be different with respect to the numbers of candidates. Hence, we have to define the names and the types of columns before importing the data. At the first tour :  
nom_var_2017 <- c("code_dep", "nom_dep", "code_circ", "nom_circ", "code_com", "nom_com",
             "b_vote", "inscrits", "abstent", "absten/inscrits", "votants",
             "votants/inscrits", "blancs", "blancs/inscrits", "blancs/votants",
             "nuls", "nuls/inscrits", "nuls/votants",
             "exprimes", "exprimes/inscrits", "exprimes/votants", 
             paste(rep(c("panneau", "sexe", "nom", "prenom", "nuance", "voix", "voix/inscrits", "voix/exprimes"), 27), rep(1:27, each = 8), sep = "_"))
type_var <- c("character", "character", "character", "character", "character",
              "character", "character", "numeric", "numeric", "numeric", 
              "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", 
              "numeric", "numeric", "numeric", "numeric", "numeric", 
              rep(c("integer", "character", "character", "character", "character",
                    "numeric", "numeric", "numeric"), 27))
type_var_2 <- paste(substr(type_var, 1, 1), collapse = "")
don_leg_2017_T1 <- readr::read_csv2(url_leg_2017_T1, 
                                    locale = locale(encoding = "ISO-8859-1"),
                                    col_names = nom_var_2017,
                                    col_types = type_var_2,
                                    skip = 1)


# At the second tour, the maximum number of candidates is 3:  
nom_var_2017_T2 <- c("code_dep", "nom_dep", "code_circ", "nom_circ", "code_com", "nom_com",
             "b_vote", "inscrits", "abstent", "absten/inscrits", "votants",
             "votants/inscrits", "blancs", "blancs/inscrits", "blancs/votants", 
             "nuls", "nuls/inscrits", "nuls/votants",
             "exprimes", "exprimes/inscrits", "exprimes/votants", 
             paste(rep(c("panneau", "sexe", "nom", "prenom", "nuance", "voix",
                         "voix/inscrits", "voix/exprimes"), 3), 
                   rep(1:3, each = 8), sep = "_"))
type_var <- c("character", "character", "character", "character", "character",
              "character", "character", "numeric", "numeric", "numeric", 
              "numeric", "numeric", "numeric", "numeric", "numeric", 
              "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", 
              rep(c("integer", "character", "character", "character", "character",
                    "numeric", "numeric", "numeric"), 3))
type_var_2 <- paste(substr(type_var, 1, 1), collapse = "")
don_leg_2017_T2 <- readr::read_csv2(url_leg_2017_T2, 
                                    locale = locale(encoding = "ISO-8859-1"),
                                    col_names = nom_var_2017_T2,
                                    col_types = type_var_2,
                                    skip = 1)

### Data basis related to the number of voters 

# We create a first data basis with the number of voters per voting place: 
bv_leg_2017 <- don_leg_2017_T1 %>%
  select(code_dep, code_com, code_circ, b_vote, inscrits)

#save(don_leg_1993, don_leg_1997, don_leg_2002, don_leg_2007, 
#     bv_leg_2012, bv_leg_2017, file = "bv_leg.RData")