This document presents the R codes used to obtain the computational results included in the paper “One Man, One Vote” Part 2: Measurement of Malapportionment and Disproportionality and the Lorenz Curve - Applications.
To cite this work, please use:
de Mouzon, O., Laurent, T., and Le Breton, M. (2020). “One Man, One Vote” Part 2: Measurement of Malapportionment and Disproportionality and the Lorenz Curve, TSE WP.
Packages needed:
library(cowplot)
library(classInt)
library(ggridges)
library(sf)
library(tidyverse)
library(readxl)
library(latex2exp)
The data can be found at https://www.data.gouv.fr/fr/posts/les-donnees-des-elections/
This election occurs every 5 years which means than there are 6 elections beteen 1993 and 2017 (1993, 1997, 2002, 2007, 2012, 2017). First, we import the different data bases. The codes are available in the “source_leg.R” file:
source("http://www.thibault.laurent.free.fr/code/4CT/source_leg.R")
For each election, we have the results of the votes at the two rounds (more information about the election process, can be found e.g. https://fr.wikipedia.org/wiki/%C3%89lections_l%C3%A9gislatives_en_France). Among the variables collected, we have the number of people who have the right to vote, the number of voters and the votes obtained by the different candidates.
In this first application, we are only interested in the data related to the number of people who have the right to vote.
For each election, we create a data basis with the number of voters per circonscription and we create a rank variable which will be used to plot the Lorenz curve.
circ_1993 <- don_leg_1993 %>%
select(code_dep, code_circ, inscrits)
circ_1993$rank <- rank(-circ_1993$inscrits, ties.method = "random")
circ_1993$year <- "1993"
circ_1993 <- circ_1993[order(circ_1993$rank), ]
circ_1993$p <- circ_1993$rank/nrow(circ_1993)
circ_1993$L <- cumsum(circ_1993$inscrits)/sum(circ_1993$inscrits)
circ_1997 <- don_leg_1997 %>%
select(code_dep, code_circ, inscrits)
circ_1997 <- na.omit(circ_1997)
circ_1997$rank <- rank(-circ_1997$inscrits, ties.method = "random")
circ_1997$year <- "1997"
circ_1997 <- circ_1997[order(circ_1997$rank), ]
circ_1997$p <- circ_1997$rank/nrow(circ_1997)
circ_1997$L <- cumsum(circ_1997$inscrits)/sum(circ_1997$inscrits)
circ_2002 <- don_leg_2002 %>%
select(code_dep, code_circ, inscrits)
circ_2002$rank <- rank(-circ_2002$inscrits, ties.method = "random")
circ_2002$year <- "2002"
circ_2002 <- circ_2002[order(circ_2002$rank), ]
circ_2002$p <- circ_2002$rank/nrow(circ_2002)
circ_2002$L <- cumsum(circ_2002$inscrits)/sum(circ_2002$inscrits)
circ_2007 <- don_leg_2007 %>%
select(code_dep, code_circ, inscrits)
circ_2007$rank <- rank(-circ_2007$inscrits, ties.method = "random")
circ_2007$year <- "2007"
circ_2007 <- circ_2007[order(circ_2007$rank), ]
circ_2007$p <- circ_2007$rank/nrow(circ_2007)
circ_2007$L <- cumsum(circ_2007$inscrits)/sum(circ_2007$inscrits)
circ_2012 <- bv_leg_2012 %>%
group_by(code_dep, code_circ) %>%
summarize(inscrits = sum(inscrits))
## `summarise()` regrouping output by 'code_dep' (override with `.groups` argument)
circ_2012$rank <- rank(-circ_2012$inscrits, ties.method = "random")
circ_2012$year <- "2012"
circ_2012 <- circ_2012[order(circ_2012$rank), ]
circ_2012$p <- circ_2012$rank/nrow(circ_2012)
circ_2012$L <- cumsum(circ_2012$inscrits)/sum(circ_2012$inscrits)
circ_2017 <- bv_leg_2017 %>%
group_by(code_dep, code_circ) %>%
summarize(inscrits = sum(inscrits))
## `summarise()` regrouping output by 'code_dep' (override with `.groups` argument)
circ_2017$rank <- rank(-circ_2017$inscrits, ties.method = "random")
circ_2017$year <- "2017"
circ_2017 <- circ_2017[order(circ_2017$rank), ]
circ_2017$p <- circ_2017$rank/nrow(circ_2017)
circ_2017$L <- cumsum(circ_2017$inscrits)/sum(circ_2017$inscrits)
We are also interested in collecting the data related to the population census. Indeed, the method to allocate the deputies is related to the number of inhabitants which is different from the number of people who have the right to vote and which is given by the results of the election. Heafter, the data is available only for year 2013. The data can be found at https://www.insee.fr/fr/statistiques/2508230
We import the data:
pop_circ <- readxl::read_xls(
path = "population-circonscriptions-legislatives-en-2013.xls",
skip = 7)
pop_circ <- pop_circ %>%
select(1, 2, 4) %>%
rename(code_dpt = 1,
num_circ = 2,
pop_muni = 3)
We change the names of the departement DOM/TOM:
pop_circ$code_dpt[pop_circ$code_dpt == "971"] <- "ZA"
pop_circ$code_dpt[pop_circ$code_dpt == "972"] <- "ZB"
pop_circ$code_dpt[pop_circ$code_dpt == "973"] <- "ZC"
pop_circ$code_dpt[pop_circ$code_dpt == "974"] <- "ZD"
pop_circ$code_dpt[pop_circ$code_dpt == "975"] <- "ZS"
pop_circ$code_dpt[pop_circ$code_dpt == "976"] <- "ZM"
pop_circ$code_dpt[pop_circ$code_dpt == "978"] <- "ZX"
pop_circ$code_dpt[pop_circ$code_dpt == "986"] <- "ZW"
pop_circ$code_dpt[pop_circ$code_dpt == "987"] <- "ZP"
pop_circ$code_dpt[pop_circ$code_dpt == "988"] <- "ZN"
pop_circ <- pop_circ %>%
mutate(ID = paste0(code_dpt, "0", num_circ)) %>%
select(ID, pop_muni)
We are interested to make maps. The geographical circonscriptions have evolved across the time. The data correspond here to the geographical zones observed in 2017. The codes are given in the file “spatial_circon.R”:
source("http://www.thibault.laurent.free.fr/code/4CT/spatial_circon.R")
We represent the histogram of the number of people who have the right to vote with respect to the year of the election. First, we row-bind the data year by year:
circ <- rbind(
as.data.frame(circ_1993),
as.data.frame(circ_1997),
as.data.frame(circ_2002),
as.data.frame(circ_2007),
as.data.frame(circ_2012)[, -8],
as.data.frame(circ_2017)[, -8])
We then plot the distributions (Figure 1 in the chapter)
ggplot(circ,
aes(x = year,
y = inscrits)) +
geom_violin() +
geom_boxplot(width = .5, outlier.size = 2) +
labs(title = "Number of voters per election") +
ylab("Number of voters")
We consider the variable population observed in 2013 which is the variable supposed to be used to fix the geographical boundaries of the circonscriptions. We used the geographical boundaries of the 2017 election. For this election, 10 circonscriptions were allocated to the French citizens living in foreign countries. We did not include these circonscriptions hereafter.
We print the 10 smallest circonscriptions:
as.data.frame(geo_circ) %>%
arrange(pop_muni) %>%
select(nom_dpt, num_circ, pop_muni) %>%
head(10)
## nom_dpt num_circ pop_muni
## 1 SAINT-PIERRE-ET-MIQUELON 1 6057
## 2 WALLIS-ET-FUTUNA 1 12197
## 3 SAINT-MARTIN/SAINT-BARTHELEMY 1 44873
## 4 HAUTES-ALPES 2 63979
## 5 CANTAL 2 64494
## 6 ARIEGE 1 70240
## 7 TERRITOIRE DE BELFORT 1 72137
## 8 TERRITOIRE DE BELFORT 2 72181
## 9 CORSE-DU-SUD 1 74186
## 10 CORSE-DU-SUD 2 75048
We print the 10 biggest units:
as.data.frame(geo_circ) %>%
arrange(-pop_muni) %>%
select(nom_dpt, num_circ, pop_muni) %>%
head(10)
## nom_dpt num_circ pop_muni
## 1 LOIRE-ATLANTIQUE 5 151788
## 2 HAUTE-GARONNE 6 150898
## 3 LOIRE-ATLANTIQUE 6 150726
## 4 LOIRE-ATLANTIQUE 9 148647
## 5 NOUVELLE-CALEDONIE 2 148586
## 6 RHONE 6 147192
## 7 LOIRE-ATLANTIQUE 10 147016
## 8 VENDEE 1 145627
## 9 VAR 6 144986
## 10 GIRONDE 5 144832
We now look at the number of deputies observed per departement to check if the rule “a departement has at least one deputy and an additional deputy is allocated every additional 125,000 inhabitants” is indeed respected.
We prepare the data:
geo_dep <- geo_circ %>%
group_by(nom_dpt, code_dpt) %>%
summarize(pop_muni = sum(pop_muni),
inscrits_2012 = sum(inscrits_2012),
nb_representatives = n()) %>%
mutate(rep_per_hab = pop_muni/nb_representatives) %>%
filter(code_dpt != "ZZ") %>%
as.data.frame()
## `summarise()` regrouping output by 'nom_dpt' (override with `.groups` argument)
We plot the number of representatives per departement with respect to the population (Figure 2 in the article):
# only keep the label of the departement mentionned
geo_dep$code_dpt_2 <- geo_dep$code_dpt
geo_dep$code_dpt_2[! geo_dep$code_dpt_2 %in%
c("ZS", "ZW", "ZX", "ZN", "05", "39", "75" )] <- ""
ggplot(geo_dep) +
aes(x = (pop_muni), y = rep_per_hab, label = code_dpt_2) +
geom_point(aes(colour = factor(nb_representatives),
size = factor(nb_representatives))) +
scale_color_grey(start=0.8, end=0.2) +
geom_text(size = 3, hjust = -0.2, nudge_x = 30000) +
geom_hline(yintercept = sum(geo_dep$pop_muni) / sum(geo_dep$nb_representatives),
linetype="dashed", color = "red") +
geom_hline(yintercept = 125000,
linetype="dotted", color = "blue") +
xlab("Population per state") +
ylab("Number of inhabitants per deputy") +
guides(color=guide_legend(),
size = guide_legend()) +
labs(color='Seats',
size = 'Seats')
## Warning: Using size for a discrete variable is not advised.
We use the following criteria:
to allocate the number of deputy per departement and we check with the truth.
geo_dep$theoretical <- ceiling(geo_dep$pop_muni / 125000)
geo_dep$reste <- geo_dep$pop_muni %% 125000
We compare the theoretical and observed number of representatives and print those which are different.
geo_dep %>%
select(code_dpt, nb_representatives, theoretical, pop_muni, reste, -geometry) %>%
filter(nb_representatives - theoretical != 0) %>%
arrange(-reste)
## code_dpt nb_representatives theoretical pop_muni reste
## 1 44 10 11 1328620 78620
## 2 93 12 13 1552482 52482
## 3 31 10 11 1298562 48562
## 4 85 5 6 655506 30506
## 5 69 14 15 1779845 29845
## 6 83 8 9 1028583 28583
## 7 40 3 4 397226 22226
## 8 35 8 9 1019923 19923
## 9 74 6 7 769677 19677
## 10 ZN 2 3 268767 18767
## 11 63 5 6 640999 15999
## 12 68 6 7 758723 8723
## 13 17 5 6 633417 8417
## 14 81 3 4 381927 6927
## 15 42 6 7 756715 6715
## 16 33 12 13 1505517 5517
## 17 76 10 11 1254609 4609
## 18 91 10 11 1253931 3931
## 19 87 3 4 375856 856
## 20 82 2 3 250342 342
We plot on the map the number of voters in 2012 (not presented in the article). We keep the same intervals for creating the class:
my_class <- quantile(c(geo_circ$inscrits_2012, geo_circ$pop_muni),
seq(0, 1, 0.125),
na.rm = T)
geo_circ$quantile_2012 <- cut(geo_circ$inscrits_2012,
my_class,
include.lowest = T)
ggplot(data = geo_circ) +
geom_sf(aes(fill = quantile_2012)) +
scale_fill_brewer(palette = "OrRd")
We plot on the map the number of populations:
geo_circ$quanti_pop <- cut(geo_circ$pop_muni,
my_class,
include.lowest = T)
ggplot(data = geo_circ) +
geom_sf(aes(fill = quanti_pop)) +
scale_fill_brewer(palette = "OrRd")
cor(geo_circ$inscrits_2012, geo_circ$pop_muni, use = "complete.obs")
## [1] 0.6798447
Moreover, when we plot the ratio voters/population, we observe that there are some spatial patterns which indicate that the differences between the two measures are related to a spatial phenomena.
geo_circ$ratio <- geo_circ$inscrits_2012 / geo_circ$pop_muni
We plot on the map the ratio:
geo_circ$quanti_ratio_2017 <- cut(geo_circ$ratio,
c(0, 0.5, 0.6, 0.7, 0.75, 0.9),
include.lowest = T)
ggplot(data = geo_circ) +
geom_sf(aes(fill = quanti_ratio_2017)) +
scale_fill_brewer(palette = "OrRd")
circ_to_lorenz <- rbind(
rbind(c(NA, NA, NA, NA, 1993, 0, 0),
as.data.frame(circ_1993)),
rbind(c(NA, NA, NA, NA, 1997, 0, 0),
as.data.frame(circ_1997)),
rbind(c(NA, NA, NA, NA, 2002, 0, 0),
as.data.frame(circ_2002)),
rbind(c(NA, NA, NA, NA, 2007, 0, 0),
as.data.frame(circ_2007)),
rbind(c(NA, NA, NA, NA, 2012, 0, 0),
as.data.frame(circ_2012)[, -8]),
rbind(c(NA, NA, NA, NA, 2017, 0, 0),
as.data.frame(circ_2017)[, -8]))
We plot the Lorenz curve for each year (Figure 3 in the article). We do not keep the election in 1993 because the data was incomplete.
t1.rect1 <- data.frame(x1 = c(0, 0.01, 0.1, 0.5, 0.7, 0.9),
x2 = c(0.01, 0.1, 0.5, 0.7, 0.9, 1),
y1 = c(0, 0.01, 0.1, 0.4, 0.6, 0.85),
y2 = c(0.015, 0.15, 0.45, 0.65, 0.9, 1))
p <- ggplot(data = circ_to_lorenz %>% filter (year != 1993)) +
geom_line(aes(x = L, y = p, color = year, linetype = as.factor(year))) +
scale_color_grey(start=0.8, end=0.2) +
scale_x_continuous(name = "Cumulative voters", limits = c(0, 1)) +
scale_y_continuous(name = "Cumulative deputies", limits = c(0, 1)) +
geom_abline() +
geom_rect(t1.rect1, mapping = aes(xmin = x1, xmax = x2, ymin = y1, ymax = y2),
color = "grey23", lty = 2, fill = NA) +
guides(color=guide_legend(),
linetype = guide_legend()) +
labs(color='year',
linetype = 'year')
p
We decompose the Lorenz curve in small squares:
p1 <- ggplot(data = circ_to_lorenz %>% filter (year != 1993)) +
geom_line(aes(x = L, y = p, color = year)) +
scale_x_continuous(name = "Cumulative share of X") +
scale_y_continuous(name = "Cumulative share of Y") +
coord_cartesian(
xlim = c(0, 0.01),
ylim = c(0, 0.01),
expand = TRUE,
default = FALSE,
clip = "on") +
geom_abline()
p1
p2 <- ggplot(data = circ_to_lorenz %>% filter (year != 1993)) +
geom_line(aes(x = L, y = p, color = year)) +
scale_x_continuous(name = "Cumulative share of X") +
scale_y_continuous(name = "Cumulative share of Y") +
coord_cartesian(
xlim = c(0.01, 0.1),
ylim = c(0.015, 0.1),
expand = TRUE,
default = FALSE,
clip = "on") +
geom_abline()
p2
p3 <- ggplot(data = circ_to_lorenz %>% filter (year != 1993)) +
geom_line(aes(x = L, y = p, color = year)) +
scale_x_continuous(name = "Cumulative share of X") +
scale_y_continuous(name = "Cumulative share of Y") +
coord_cartesian(
xlim = c(0.1, 0.5),
ylim = c(0.15, 0.45),
expand = TRUE,
default = FALSE,
clip = "on") +
geom_abline()
p3
p4 <- ggplot(data = circ_to_lorenz %>% filter (year != 1993)) +
geom_line(aes(x = L, y = p, color = year)) +
scale_x_continuous(name = "Cumulative share of X") +
scale_y_continuous(name = "Cumulative share of Y") +
coord_cartesian(
xlim = c(0.5, 0.7),
ylim = c(0.4, 0.65),
expand = TRUE,
default = FALSE,
clip = "on") +
geom_abline()
p4
p5 <- ggplot(data = circ_to_lorenz %>% filter (year != 1993)) +
geom_line(aes(x = L, y = p, color = year)) +
scale_x_continuous(name = "Cumulative share of X") +
scale_y_continuous(name = "Cumulative share of Y") +
coord_cartesian(
xlim = c(0.7, 0.9),
ylim = c(0.6, 0.9),
expand = TRUE,
default = FALSE,
clip = "on") +
geom_abline()
p5
p6 <- ggplot(data = circ_to_lorenz %>% filter (year != 1993)) +
geom_line(aes(x = L, y = p, color = year)) +
scale_x_continuous(name = "Cumulative share of X") +
scale_y_continuous(name = "Cumulative share of Y") +
coord_cartesian(
xlim = c(0.9, 1),
ylim = c(0.85, 1),
expand = TRUE,
default = FALSE,
clip = "on") +
geom_abline()
p6
Moreover, we observe very few crossings between the curves. To check this, we compute the number of times a curve is below another one.
mat_circ <- matrix(0, 5, 5)
year <- c("1997", "2002", "2007", "2012", "2017")
for(k in 1:577) {
for (i in 1:4) {
for (j in (i + 1):5) {
if (circ[circ$rank == k & circ$year == year[i], "L"] >
circ[circ$rank == k & circ$year == year[j], "L"] ) {
mat_circ[j, i] <- mat_circ[j, i] + 1
} else {
mat_circ[i, j] <- mat_circ[i, j] + 1
}
}
}
}
We order the curves with respect to the total number of times a curve is below the others.
mat_circ <- cbind(mat_circ, apply(mat_circ, 1, sum))
colnames(mat_circ) <- c("1997", "2002", "2007", "2012", "2017", "Total")
row.names(mat_circ) <- c("1997", "2002", "2007", "2012", "2017")
mat_circ <- mat_circ[order(mat_circ[, "Total"]),]
mat_circ
## 1997 2002 2007 2012 2017 Total
## 2007 27 206 0 6 6 245
## 2002 14 0 371 7 8 400
## 1997 0 563 550 5 28 1146
## 2017 549 569 571 32 0 1721
## 2012 572 570 571 0 545 2258
Ranking (Figure 4 in the article) :
circ_by_col <-
cbind(as.data.frame(circ_1997)[, "L"],
as.data.frame(circ_2002)[, "L"],
as.data.frame(circ_2007)[, "L"],
as.data.frame(circ_2012)[, "L"],
as.data.frame(circ_2017)[, "L"]
)
# check equality
# any_dupli <- apply(circ_by_col, 1, anyDuplicated)
rank_years <- apply(circ_by_col, 1, rank)
df_ranking <- rbind(
data.frame(year = "1997", seat = 1:576, rank = rank_years[1, 1:576]),
data.frame(year = "2002", seat = 1:576, rank = rank_years[2, 1:576]),
data.frame(year = "2007", seat = 1:576, rank = rank_years[3, 1:576]),
data.frame(year = "2012", seat = 1:576, rank = rank_years[4, 1:576]),
data.frame(year = "2017", seat = 1:576, rank = rank_years[5, 1:576]))
df_ranking %>% ggplot(aes(x = seat, y = rank)) +
geom_line(size = 1, aes(color = year, linetype = as.factor(year))) +
scale_color_grey(start=0.8, end=0.2) +
scale_x_continuous(name = "seat") +
scale_y_continuous(name = "rank") +
coord_cartesian(
xlim = c(0, 40),
ylim = c(1, 5),
expand = TRUE,
default = FALSE,
clip = "on") +
theme(legend.position = "none")
ggsave("ranking_1.pdf", width = 4, height = 3.5)
df_ranking %>% ggplot(aes(x = seat, y = rank, color = year)) +
geom_line(size = 1, aes(color = year, linetype = as.factor(year))) +
scale_color_grey(start=0.8, end=0.2) +
scale_x_continuous(name = "seat") +
scale_y_continuous(name = "rank") +
coord_cartesian(
xlim = c(1, 577),
ylim = c(1, 5),
expand = TRUE,
default = FALSE,
clip = "on") +
guides(color=guide_legend(),
linetype = guide_legend()) +
labs(color='year',
linetype = 'year')
ggsave("ranking.pdf", width = 7, height = 3.5)
df_ranking %>% ggplot(aes(x = seat, y = rank, color = year)) +
geom_line(size = 1, aes(color = year, linetype = as.factor(year))) +
scale_color_grey(start=0.8, end=0.2) +
scale_x_continuous(name = "seat") +
scale_y_continuous(name = "rank") +
coord_cartesian(
xlim = c(540, 577),
ylim = c(1, 5),
expand = TRUE,
default = FALSE,
clip = "on") +
theme(legend.position = "none")
ggsave("ranking_3.pdf", width = 4, height = 3.5)
For computing the Gini index, we create a function which computes the area \(A\) above the Lorenz curve and then, we get the Gini index by using \(1 / 2 - A\):
gini <- function(x, y) {
# initialisation
n <- length(x)
my_int <- 0
# we add the origin if it does not exist
if (x[1] != 0 | y[1] != 0) {
x <- c(0, x)
y <- c(0, y)
}
for (i in 1:n) {
my_int <- my_int +
(x[i + 1] - x[i]) * y[i] +
1 / 2 * (x[i + 1] - x[i]) * (y[i + 1] - y[i])
}
return(1 / 2 - my_int)
}
gini(circ_1997$L, circ_1997$p)
## [1] 0.05166801
gini(circ_2002$L, circ_2002$p)
## [1] 0.05886212
gini(circ_2007$L, circ_2007$p)
## [1] 0.06128728
gini(circ_2012$L, circ_2012$p)
## [1] 0.04639541
gini(circ_2017$L, circ_2017$p)
## [1] 0.0496606
For computing the DK index, we consider:
DK <- function(x, y, type = "continuous") {
stopifnot(type %in% c("discrete", "continuous"))
if (type == "discrete") {
1 - x[which(y > 0.5)[1]]}
else {
ind <- findInterval(0.5, y)
y_1 <- y[ind]
y_2 <- y[ind + 1]
x_1 <- x[ind]
x_2 <- x[ind + 1]
1 - (x_1 + (0.5 - y_1) * (x_2 - x_1) / (y_2 - y_1))
}
}
DK(circ_1997$L, circ_1997$p, type = "discrete")
## [1] 0.4279511
DK(circ_2002$L, circ_2002$p, type = "discrete")
## [1] 0.4181568
DK(circ_2007$L, circ_2007$p, type = "discrete")
## [1] 0.4154632
DK(circ_2012$L, circ_2012$p, type = "discrete")
## [1] 0.4338471
DK(circ_2017$L, circ_2017$p, type = "discrete")
## [1] 0.4298181
DK(circ_1997$L, circ_1997$p)
## [1] 0.4288246
DK(circ_2002$L, circ_2002$p)
## [1] 0.4190185
DK(circ_2007$L, circ_2007$p)
## [1] 0.4163083
DK(circ_2012$L, circ_2012$p)
## [1] 0.4347184
DK(circ_2017$L, circ_2017$p)
## [1] 0.430682
We do not detail the preparation of the data which is included in the file “source_leg_appl_2.R”.
source("http://www.thibault.laurent.free.fr/code/4CT/source_leg_appl_2.R")
The idea is to compare the vote shares per party and the seat shares.
Figure 5 in the chapter book:
share_parti_93 <- rbind(cbind(parti_1993[ , c("parti", "x")],
share = parti_1993$ratio_parti,
time = "1st round votes"),
cbind(parti_1993[ , c("parti", "x")],
share = parti_1993$ratio_siege,
time = "seats obtained"))
share_parti_93$parti <- ordered(share_parti_93$parti,
levels = parti_1993$parti)
ggplot(share_parti_93,
aes(x = parti,
y = share,
fill = time)) +
geom_bar(stat="identity", position="dodge") +
coord_flip() +
scale_fill_grey() +
labs(title = "Results at the 1st round VS seats obtained in 1993",
x = "Party",
y = "Shares",
fill = "") +
theme_minimal() +
theme(legend.position="bottom")
share_parti_97 <- rbind(cbind(parti_1997[ , c("parti", "x")],
share = parti_1997$ratio_parti, time = "1st round"),
cbind(parti_1997[ , c("parti", "x")],
share = parti_1997$ratio_siege, time = "seats"))
share_parti_97$parti <- ordered(share_parti_97$parti, levels = parti_1997$parti)
ggplot(share_parti_97,
aes(x = parti,
y = share,
fill = time)) +
geom_bar(stat="identity", position="dodge") +
coord_flip() +
labs(title = "Results at the 1st round VS seats obtained in 1997",
x = "Party",
y = "Percentage",
fill = "") +
theme_minimal()
share_parti_02 <- rbind(cbind(parti_2002[ , c("parti", "x")],
share = parti_2002$ratio_parti, time = "1st round"),
cbind(parti_2002[ , c("parti", "x")],
share = parti_2002$ratio_siege, time = "seats"))
share_parti_02$parti <- ordered(share_parti_02$parti, levels = parti_2002$parti)
ggplot(share_parti_02,
aes(x = parti,
y = share,
fill = time)) +
geom_bar(stat="identity", position="dodge") +
coord_flip() +
labs(title = "Results at the 1st round VS seats obtained in 2002",
x = "Party",
y = "Percentage",
fill = "") +
theme_minimal()
share_parti_07 <- rbind(cbind(parti_2007[ , c("parti", "x")],
share = parti_2007$ratio_parti, time = "1st round"),
cbind(parti_2007[ , c("parti", "x")],
share = parti_2007$ratio_siege, time = "seats"))
share_parti_07$parti <- ordered(share_parti_07$parti, levels = parti_2007$parti)
ggplot(share_parti_07,
aes(x = parti,
y = share,
fill = time)) +
geom_bar(stat="identity", position="dodge") +
coord_flip() +
labs(title = "Results at the 1st round VS seats obtained in 2007",
x = "Party",
y = "Percentage",
fill = "") +
theme_minimal()
share_parti_12 <- rbind(cbind(parti_2012[ , c("parti", "x")],
share = parti_2012$ratio_parti, time = "1st round"),
cbind(parti_2012[ , c("parti", "x")],
share = parti_2012$ratio_siege, time = "seats"))
share_parti_12$parti <- ordered(share_parti_12$parti, levels = parti_2012$parti)
ggplot(share_parti_12,
aes(x = parti,
y = share,
fill = time)) +
geom_bar(stat="identity", position="dodge") +
coord_flip() +
labs(title = "Results at the 1st round VS seats obtained in 2012",
x = "Party",
y = "Percentage",
fill = "") +
theme_minimal()
share_parti_17 <- rbind(cbind(parti_2017[ , c("parti", "x")],
share = parti_2017$ratio_parti, time = "1st round"),
cbind(parti_2017[ , c("parti", "x")],
share = parti_2017$ratio_siege, time = "seats"))
share_parti_17$parti <- ordered(share_parti_17$parti, levels = parti_2017$parti)
ggplot(share_parti_17,
aes(x = parti,
y = share,
fill = time)) +
geom_bar(stat="identity", position="dodge") +
coord_flip() +
labs(title = "Results at the 1st round VS seats obtained in 2017",
x = "Party",
y = "Percentage",
fill = "") +
theme_minimal()
We now plot the Lorenz curve for the different elections (figure 6 in the chapter book):
parti <- rbind(
as.data.frame(parti_1993),
as.data.frame(parti_1997),
as.data.frame(parti_2002),
as.data.frame(parti_2007),
as.data.frame(parti_2012),
as.data.frame(parti_2017))
parti_to_lorenz <- rbind(
rbind(c(NA, NA, NA, NA, NA, 1993, 0, 0, NA, NA),
as.data.frame(parti_1993)),
rbind(c(NA, NA, NA, NA, NA, 1997, 0, 0, NA, NA),
as.data.frame(parti_1997)),
rbind(c(NA, NA, NA, NA, NA, 2002, 0, 0, NA, NA),
as.data.frame(parti_2002)),
rbind(c(NA, NA, NA, NA, NA, 2007, 0, 0, NA, NA),
as.data.frame(parti_2007)),
rbind(c(NA, NA, NA, NA, NA, 2012, 0, 0, NA, NA),
as.data.frame(parti_2012)),
rbind(c(NA, NA, NA, NA, NA, 2017, 0, 0, NA, NA),
as.data.frame(parti_2017)))
t1.rect1 <- data.frame(x1 = c(0, 0.35, 0.75),
x2 = c(0.35, 0.75, 1),
y1 = c(0, 0.05, 0.5),
y2 = c(0.15, 0.65, 1))
p <- ggplot(data = parti_to_lorenz) +
geom_line(aes(x = L, y = p, color = year, linetype = as.factor(year)),
size = 1.2) +
scale_color_grey(start=0.8, end=0.1) +
scale_x_continuous(name = "Cumulative share of parties", limits = c(0, 1)) +
scale_y_continuous(name = "Cumulative share of deputies", limits = c(0, 1)) +
geom_abline() +
guides(color=guide_legend(),
linetype = guide_legend()) +
labs(color='year',
linetype = 'year')
p
We decompose the Lorenz curve in small squares:
p1 <- ggplot(data = parti_to_lorenz) +
geom_line(aes(x = L, y = p, color = year)) +
scale_x_continuous(name = "Cumulative share of X") +
scale_y_continuous(name = "Cumulative share of Y") +
coord_cartesian(
xlim = c(0, 0.35),
ylim = c(0, 0.15),
expand = TRUE,
default = FALSE,
clip = "on") +
geom_abline()
p1
p2 <- ggplot(data = parti_to_lorenz) +
geom_line(aes(x = L, y = p, color = year)) +
scale_x_continuous(name = "Cumulative share of X") +
scale_y_continuous(name = "Cumulative share of Y") +
coord_cartesian(
xlim = c(0.35, 0.75),
ylim = c(0.05, 0.65),
expand = TRUE,
default = FALSE,
clip = "on") +
geom_abline()
p2
p3 <- ggplot(data = parti_to_lorenz) +
geom_line(aes(x = L, y = p, color = year)) +
scale_x_continuous(name = "Cumulative share of X") +
scale_y_continuous(name = "Cumulative share of Y") +
coord_cartesian(
xlim = c(0.75, 1),
ylim = c(0.5, 1),
expand = TRUE,
default = FALSE,
clip = "on") +
geom_abline()
p3
gini(parti_1993$L, parti_1993$p)
## [1] 0.1903433
gini(parti_1997$L, parti_1997$p)
## [1] 0.1804803
gini(parti_2002$L, parti_2002$p)
## [1] 0.1227368
gini(parti_2007$L, parti_2007$p)
## [1] 0.09461065
gini(parti_2012$L, parti_2012$p)
## [1] 0.1624615
gini(parti_2017$L, parti_2017$p)
## [1] 0.2013659
DK(parti_1993$L, parti_1993$p)
## [1] 0.2813604
DK(parti_1997$L, parti_1997$p)
## [1] 0.2988046
DK(parti_2002$L, parti_2002$p)
## [1] 0.3355661
DK(parti_2007$L, parti_2007$p)
## [1] 0.3991031
DK(parti_2012$L, parti_2012$p)
## [1] 0.3118674
DK(parti_2017$L, parti_2017$p)
## [1] 0.2641449
We import the data of the election
don.dep.2015 <- read.table("http://www.thibault.laurent.free.fr/code/4CT/DP15_Bvot_T1T2.txt", sep = ";",
header = T, stringsAsFactors = F, quote = "")
don.dep.2015 <- don.dep.2015[don.dep.2015$NUMTOUR == 1, ]
don.dep.2015 <- unique(don.dep.2015[, c("CODDPT", "CODCAN",
"CODSUBCOM", "CODBURVOT", "NBRINS")])
don.dep.2015 <- aggregate(don.dep.2015[, "NBRINS"],
by = list(CODDPT = don.dep.2015$CODDPT,
CODCAN = don.dep.2015$CODCAN),
sum)
We prepare the data departement per departement:
dep_split <- split(don.dep.2015, don.dep.2015$CODDPT)
Source of the data: https://www.data.gouv.fr/fr/datasets/elections-cantonales-1988-2011/
tour1_dep_2011 <- read.csv2("http://www.thibault.laurent.free.fr/code/4CT/cdsp_cant2011t1_canton.csv", sep = ",")
tour1_dep_2008 <- read.csv2("http://www.thibault.laurent.free.fr/code/4CT/cdsp_cant2008t1_canton.csv", sep = ",")
tour1_dep_2011 <- tour1_dep_2011[,
c("Code.du.département", "Code.du.canton", "Inscrits")]
tour1_dep_2008 <- tour1_dep_2008[, c("Code.département", "Code.canton", "Inscrits")]
names(tour1_dep_2011) <- c("CODDPT", "CODCAN", "x")
names(tour1_dep_2008) <- c("CODDPT", "CODCAN", "x")
dep_previous <- rbind(tour1_dep_2011, tour1_dep_2008)
dep_previous <- dep_previous[!duplicated(dep_previous[,
c("CODDPT", "CODCAN")]), ]
Change the code of some departements located outside the metropolitean France (971 = guadeloupe = ZA, 972 = MARTINIQUE, 973 = GUYANE, 974 = LA REUNION = ZD, 985 = MAYOTTE = ZM)
dep_previous$CODDPT <- as.character(dep_previous$CODDPT)
dep_previous$CODDPT[nchar(dep_previous$CODDPT) == 1] <- paste0("0",
dep_previous$CODDPT[nchar(dep_previous$CODDPT) == 1])
dep_previous$CODDPT[dep_previous$CODDPT == "971"] <- "ZA"
dep_previous$CODDPT[dep_previous$CODDPT == "974"] <- "ZD"
dep_previous$CODDPT[dep_previous$CODDPT == "985"] <- "ZM"
We split the data departement per departement
dep_split_previous <- split(dep_previous, dep_previous$CODDPT)
We prepare the function that permits to create the \((x,y)\) coordinates of the Lorenz curves for each departement per departement:
create_lp <- function(x, Election = "") {
x$Election <- Election
x$rank <- rank(-x$x, ties.method = "random")
x <- x[order(x$rank), ]
x$p <- x$rank/nrow(x)
x$L <- cumsum(x$x)/sum(x$x)
return(x)
}
dep_split <- lapply(dep_split, function(x)
create_lp(x, Election = "NEW"))
dep_split_previous <- lapply(dep_split_previous, function(x)
create_lp(x, Election = "OLD"))
We merge the data in one unique data.frame:
final_dep <- dep_split_previous[[1]]
for (k in 2:length(dep_split_previous))
final_dep <- rbind(final_dep, dep_split_previous[[k]])
for (k in 1:length(dep_split))
final_dep <- rbind(final_dep, dep_split[[k]])
We plot a Lorenz curve year by year (Figure 7):
p <- ggplot(data = final_dep) +
geom_line(aes(x = L, y = p, color = Election, linetype = as.factor(Election))) +
scale_color_grey(start=0.5, end=0.1) +
scale_x_continuous(name = "Cumulative voters", limits = c(0, 1)) +
scale_y_continuous(name = "Cumulative seats", limits = c(0, 1)) +
#geom_abline() +
facet_wrap(~ CODDPT) +
guides(color=guide_legend(),
linetype = guide_legend()) +
labs(color='Election',
linetype = 'Election') +
theme(axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank())
p
ggsave("departement_lorenz.pdf", width = 9, height = 9)
Lorenz for departements
We create the contours of the departements to store the results of the gini indices
library(sf)
geo_circ_simp <- geo_circ[, c("code_dpt", "inscrits_2012")] %>%
filter(! geo_circ$nom_dpt %in% c("MARTINIQUE", "GUYANE",
"NOUVELLE-CALEDONIE" , "POLYNESIE-FRANCAISE", "SAINT-PIERRE-ET-MIQUELON",
"WALLIS-ET-FUTUNA", "SAINT-MARTIN/SAINT-BARTHELEMY", "PARIS", "ZZ"))
dep_spatial <- aggregate(geo_circ_simp,
by = list(code_dpt = geo_circ_simp$code_dpt), FUN = mean) %>%
select(-inscrits_2012, -code_dpt.1)
Import the evolution of the population
download.file(url = "http://www.thibault.laurent.free.fr/code/4CT/TCRD_004.xls",
destfile = paste0(getwd(), "/TCRD_004.xls"))
df_pop <- read_xls("TCRD_004.xls",
skip = 3, n_max = 102, na = "-")
df_pop <- df_pop %>%
select(1, 5, 8) %>%
rename(code_dpt = 1,
pop_2016 = 2,
pop_1999 = 3) %>%
mutate(evol_pop = (pop_2016 / pop_1999) ^ (1 / 17) - 1) %>%
select(code_dpt, evol_pop)
df_pop[nrow(df_pop), "evol_pop"] <- 0.031
df_pop$code_dpt[df_pop$code_dpt == "971"] <- "ZA"
df_pop$code_dpt[df_pop$code_dpt == "974"] <- "ZD"
df_pop$code_dpt[df_pop$code_dpt == "976"] <- "ZM"
dep_spatial <- merge(dep_spatial, df_pop, by = "code_dpt")
We compute the gini index for each departement
for (k in 1:nrow(dep_spatial)) {
temp_file_1 <- final_dep %>%
filter(CODDPT == dep_spatial$code_dpt[k], Election == "OLD")
dep_spatial$Old[k] <- gini(temp_file_1$L, temp_file_1$p)
temp_file_2 <- final_dep %>%
filter(CODDPT == dep_spatial$code_dpt[k], Election == "NEW")
dep_spatial$New[k] <- gini(temp_file_2$L, temp_file_2$p)
}
We class the departements in 3 groups with respect to the evolution of the Gini index
dep_spatial$slope_gini <- abs((dep_spatial$New - dep_spatial$Old) / dep_spatial$Old)
bk <- round(classIntervals(dep_spatial$slope_gini, 3, "kmeans")$brks,
digits = 3)
ind <- findInterval(dep_spatial$slope_gini, bk, all.inside = TRUE)
dep_spatial$slope_gini <- factor(c("low", "medium", "high")[ind],
levels = c("low", "medium", "high"),
ordered = T)
We plot the evolution of the Gini index a Lorenz curve year by year (figure 8 in the chapter)
library(GGally)
ggparcoord(st_drop_geometry(dep_spatial), columns = 3:4,
groupColumn = 'slope_gini',
scale = 'globalminmax',
boxplot = T) +
scale_x_discrete(name = "Gini") +
scale_color_grey(start=0.1, end=0.8) +
labs(color='Classes') +
ggsave("gini_dep1.pdf", width = 14, height = 12, units = "cm")
We represent the Gini index before and after the reform:
ggplot(data = dep_spatial) +
geom_sf(aes(fill = slope_gini)) + theme(
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank()) +
scale_fill_grey() +
labs(fill='Classes')
We compute the gini index for each departement
for (k in 1:nrow(dep_spatial)) {
temp_file_1 <- final_dep %>%
filter(CODDPT == dep_spatial$code_dpt[k], Election == "OLD")
dep_spatial$dk_old[k] <- DK(temp_file_1$L, temp_file_1$p)
temp_file_2 <- final_dep %>%
filter(CODDPT == dep_spatial$code_dpt[k], Election == "NEW")
dep_spatial$dk_new[k] <- DK(temp_file_2$L, temp_file_2$p)
}
We class the departements in 3 groups with respect to the evolution of the Gini index
dep_spatial$slope_dk <- abs(dep_spatial$dk_new - dep_spatial$dk_old)
bk <- round(classIntervals(dep_spatial$slope_dk, 3, "kmeans")$brks,
digits = 3)
ind <- findInterval(dep_spatial$slope_dk, bk, all.inside = TRUE)
dep_spatial$slope_dk <- factor(c("low", "medium", "high")[ind],
levels = c("low", "medium", "high"),
ordered = T)
We plot the evolution of the DK index a Lorenz curve year by year (figure 9)
library(GGally)
ggparcoord(st_drop_geometry(dep_spatial), columns = 6:7,
groupColumn = 'slope_dk',
scale = 'globalminmax',
boxplot = T) +
scale_x_discrete(name = "DK") +
scale_color_grey(start=0.1, end=0.8) +
labs(color='Classes') +
ggsave("DK_dep1.pdf", width = 14, height = 12, units = "cm")
We represent the Gini index before and after the reform:
ggplot(data = dep_spatial) +
geom_sf(aes(fill = slope_dk)) + theme(
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank()) +
scale_fill_grey() +
labs(fill='Classes')
To load the data:
download.file(url = "http://www.thibault.laurent.free.fr/code/4CT/electoral_college.xls",
destfile = paste0(getwd(), "/electoral_college.xls"))
df_election_usa <- read_xls("electoral_college.xls", skip = 1, n_max = 51)
# For year 2000
df_election_usa_2000_college <- df_election_usa %>%
select(state, college_2004, pop_2000) %>%
rename(college = college_2004,
pop = pop_2000) %>%
mutate(ratio1 = college / pop,
rank_1 = rank(ratio1, ties.method = "random"),
year = "2000") %>%
arrange(rank_1) %>%
mutate(p = cumsum(college / sum(college)),
L = cumsum(pop / sum(pop)))
df_election_usa_2000_college <- rbind(c(NA, NA, NA, NA, NA, 2000, 0, 0, 0),
df_election_usa_2000_college)
df_election_usa_2000_rep <- df_election_usa %>%
select(state, college_2004, pop_2000) %>%
rename(college = college_2004,
pop = pop_2000) %>%
mutate(nb_representatives = college - 2,
ratio1 = nb_representatives / pop,
rank_1 = rank(ratio1, ties.method = "random"),
year = "2000") %>%
arrange(rank_1) %>%
mutate(p = cumsum(nb_representatives / sum(nb_representatives)),
L = cumsum(pop / sum(pop)))
df_election_usa_2000_rep <- rbind(c(NA, NA, NA, NA, NA, NA, 2000, 0, 0, 0),
df_election_usa_2000_rep)
# for year 2010
df_election_usa_2010_college <- df_election_usa %>%
select(state, college_2012, pop_2010) %>%
rename(college = college_2012,
pop = pop_2010) %>%
mutate(ratio1 = college / pop,
rank_1 = rank(ratio1, ties.method = "random"),
year = "2010") %>%
arrange(rank_1) %>%
mutate(p = cumsum(college / sum(college)),
L = cumsum(pop / sum(pop)))
df_election_usa_2010_college <- rbind(c(NA, NA, NA, NA, NA, 2010, 0, 0, 0),
df_election_usa_2010_college)
df_election_usa_2010_rep <- df_election_usa %>%
select(state, college_2012, pop_2010) %>%
rename(college = college_2012,
pop = pop_2010) %>%
mutate(nb_representatives = college - 2,
ratio1 = nb_representatives / pop,
rank_1 = rank(ratio1, ties.method = "random"),
year = "2010") %>%
arrange(rank_1) %>%
mutate(p = cumsum(nb_representatives / sum(nb_representatives)),
L = cumsum(pop / sum(pop)))
df_election_usa_2010_rep <- rbind(c(NA, NA, NA, NA, NA, NA, 2010, 0, 0, 0),
df_election_usa_2010_rep)
EC <- rbind(
as.data.frame(df_election_usa_2000_college),
as.data.frame(df_election_usa_2010_college))
rep <- rbind(
as.data.frame(df_election_usa_2000_rep),
as.data.frame(df_election_usa_2010_rep))
Lorenz curve on the Electoral college
p <- ggplot(data = EC) +
geom_line(aes(x = L, y = p, color = year)) +
scale_x_continuous(name = "Cumulative share of X", limits = c(0, 1)) +
scale_y_continuous(name = "Cumulative share of Y", limits = c(0, 1)) +
geom_abline()
p
Lorenz curve on the number of represententaives:
p <- ggplot(data = rep) +
geom_line(aes(x = L, y = p, color = year)) +
scale_x_continuous(name = "Cumulative share of X", limits = c(0, 1)) +
scale_y_continuous(name = "Cumulative share of Y", limits = c(0, 1)) +
geom_abline()
p
We plot the college and representative on the same graph (figure 10):
both <- rbind(
data.frame(type = "Seats", df_election_usa_2010_college[, c("p", "L")]),
data.frame(type = "Representatives", df_election_usa_2010_rep[, c("p", "L")]))
p <- ggplot(data = both) +
geom_line(aes(x = L, y = p, color = type, linetype = as.factor(type))) +
scale_color_grey(start=0.8, end=0.2) +
scale_x_continuous(name = "Cumulative voters", limits = c(0, 1)) +
scale_y_continuous(name = "Cumulative seats", limits = c(0, 1)) +
geom_abline() +
guides(color=guide_legend(),
linetype = guide_legend()) +
labs(color='type',
linetype = 'type')
p
gini(df_election_usa_2000_college[-1, ]$L, df_election_usa_2000_college[-1, ]$p)
## [1] 0.04821547
gini(df_election_usa_2000_rep[-1, ]$L, df_election_usa_2000_rep[-1, ]$p)
## [1] 0.01024389
gini(df_election_usa_2010_college[-1, ]$L, df_election_usa_2010_college[-1, ]$p)
## [1] 0.04843957
gini(df_election_usa_2010_rep[-1, ]$L, df_election_usa_2010_rep[-1, ]$p)
## [1] 0.01056356
DK(df_election_usa_2000_college[-1, ]$L, df_election_usa_2000_college[-1, ]$p)
## [1] 0.4334915
DK(df_election_usa_2000_rep[-1, ]$L, df_election_usa_2000_rep[-1, ]$p)
## [1] 0.4864924
DK(df_election_usa_2010_college[-1, ]$L, df_election_usa_2010_college[-1, ]$p)
## [1] 0.4330311
DK(df_election_usa_2010_rep[-1, ]$L, df_election_usa_2010_rep[-1, ]$p)
## [1] 0.4863197
We use some extrafonts in the figures. To save the graphics, user needs to install extrafont package and install extra fonts by :
extrafont::font_import()
To get Figure 1, compile the following code :
extrafont::loadfonts()
my_data <- data.frame(n = c(0, 0.32, 0.62, 0.82, 0.92, 1, 0, 1, 0, 1, 1, 1),
r = c(0, 0.2, 0.4, 0.6, 0.8, 1, 0, 1, 0, 0, 1, 1),
type = c(rep("Lorenz curve", 6),
rep("Perfect equality line", 2),
rep("Perfect inequality line", 4)))
p <- ggplot(data = my_data) +
geom_line(aes(x = n, y = r, color = type, linetype = type),
size = 1.2) +
scale_x_continuous(name = "Cumulative n", limits = c(0, 1),
expand = c(0, 0)) +
scale_y_continuous(name = "Cumulative r", limits = c(0, 1),
expand = c(0, 0)) +
theme(legend.title=element_blank(),
plot.title=element_text(size = 12, family="Helvetica"),
text=element_text(size = 14, family="Helvetica"),
axis.text.x=element_text(colour="black", size = 12),
axis.text.y=element_text(colour="black", size = 12))
ggsave("figure_1_lorenz.pdf", width = 8, height = 6)
To get Figure 2, compile the following code :
extrafont::loadfonts()
my_data <- data.frame(n = c(0, 0.75, 1, 0, 0.5, 1),
r = c(0, 0, 1, 0, 0, 1),
case = c("case 1", "case 1", "case 1",
"case 2", "case 2", "case 2"))
my_line <- df <- data.frame(x1 = 0, x2 = 1, y1 = 0, y2 = 1)
my_line_1 <- data.frame(x1 = 0, x2 = 0, y1 = 0, y2 = 1)
my_line_2 <- data.frame(x1 = 0, x2 = 1, y1 = 0, y2 = 0)
my_line_3 <- data.frame(x1 = 1, x2 = 1, y1 = 0, y2 = 1)
p <- ggplot(data = my_data) +
geom_segment(data = my_line, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "darkgrey", lty = 2, size = 0.1) +
geom_segment(data = my_line_3, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "darkgrey", lty = 2, size = 0.1) +
geom_segment(data = my_line_2, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "black", lty = 1) +
geom_segment(data = my_line_1, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "black", lty = 1) +
geom_line(aes(x = n, y = r, color = case, linetype = case),
size = 1.2) +
scale_x_continuous(name = "Cumulative n", limits = c(0, 1),
breaks = c(0, 0.5, 0.75, 1),
labels = c("0", TeX("$1-n_l$"), TeX("$1-n_k$"), "1"),
expand = c(0, 0)) +
scale_y_continuous(name = "Cumulative r", limits = c(0, 1),
breaks = c(0, 1), labels = c(0, 1),
expand = c(0, 0)) +
theme(legend.title=element_blank(),
plot.title=element_text(size = 12, family="Helvetica"),
text=element_text(size = 14, family="Helvetica"),
axis.text.x=element_text(colour="black", size = 12),
axis.text.y=element_text(colour="black", size = 12))
p
ggsave("figure_2_lorenz.pdf", width = 8, height = 6)
my_data <- data.frame(n = c(0, 0.75, 1, 0, 0.5, 1),
r = c(0, 0, 1, 0, 0, 1),
case = c("case 1", "case 1", "case 1",
"case 2", "case 2", "case 2"))
my_line <- df <- data.frame(x1 = 0, x2 = 1, y1 = 0, y2 = 1)
my_line_1 <- data.frame(x1 = 1, x2 = 1, y1 = 0, y2 = 1)
my_line_2 <- data.frame(x1 = 0, x2 = 1, y1 = 0, y2 = 0)
my_line_3 <- data.frame(x1 = 1, x2 = 1, y1 = 0, y2 = 1)
p <- ggplot(data = my_data) +
geom_line(aes(x = n, y = r, color = case, linetype = case), size = 1.2) +
coord_cartesian(
xlim = c(0, 1),
ylim = c(0, 1),
expand = TRUE,
default = FALSE,
clip = "on") +
scale_x_continuous(name = "Cumulative n", limits = c(0, 1),
breaks = c(0, 0.5, 0.75, 1),
labels = c("0", TeX("$1-n_l$"), TeX("$1-n_k$"), "1"),
expand = c(0, 0)) +
scale_y_continuous(name = "Cumulative r", limits = c(0, 1),
breaks = c(0, 1), labels = c(0, 1),
expand = c(0, 0)) +
geom_segment(data = my_line, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "darkgrey", lty = 2) +
theme(axis.line = element_line(size=0.5, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.title=element_text(size = 12, family="Helvetica"),
text=element_text(size = 12, family="Helvetica"),
axis.text.x=element_text(colour="black", size = 10),
axis.text.y=element_text(colour="black", size = 10)) +
geom_segment(data = my_line_3, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "darkgrey", lty = 2)
p + labs(color = "")
ggsave("figure_2_lorenz.pdf", width = 20, height = 14, units = "cm")
To get Figure 1 in the supplementary material (Theory), compile the following code :
my_Fun <- function(x) {
3/4 - 2 / pi * asin(sqrt(x)) - 1 / pi * asin(sqrt(1 - 2 * x))
}
my_line_1 <- data.frame(x1 = 0, x2 = 1/3,
y1 = 3 * acos(sqrt(3) / 3) / pi - 3/4,
y2 = 3 * acos(sqrt(3) / 3) / pi - 3/4)
p9 <- ggplot(data.frame(x = c(0, 1/3)), aes(x = x)) +
stat_function(fun = my_Fun)
p9 +
theme(axis.line = element_line(size=0.5, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.title=element_text(size = 12, family="Helvetica"),
text=element_text(size = 12, family="Helvetica"),
axis.text.x=element_text(colour="black", size = 10),
axis.text.y=element_text(colour="black", size = 10)) +
scale_x_continuous(name = "x", limits = c(0, 1/3),
expand = c(0, 0)) +
scale_y_continuous(name = TeX("$\\Gamma_{\\lambda }\\left(N,R\\right)$"),
breaks = c(3 * acos(sqrt(3) / 3) / pi - 3/4, 0.175, 0.2, 0.225, 0.25),
labels = c("0.162", "0.175", "0.2", "0.225", "0.25")) +
geom_segment(data = my_line_1, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "darkgrey", lty = 2)
ggsave("figure_1_annexe.pdf", width = 16, height = 10, units = "cm")
To get Figure 2, compile the following code :
my_data <- data.frame(n = c(0, 0.5, 1, 0, 0.75, 1),
r = c(0, 0, 1, 0, 0, 1),
case = c("case 1", "case 1", "case 1",
"case 2", "case 2", "case 2"))
my_line <- df <- data.frame(x1 = 0, x2 = 1.3, y1 = 0, y2 = 1.3)
my_line_1 <- data.frame(x1 = 1, x2 = 1, y1 = 0, y2 = 1)
my_line_2 <- data.frame(x1 = 0, x2 = 1, y1 = 1, y2 = 1)
my_line_3 <- data.frame(x1 = 0, x2 = 1, y1 = 1/2, y2 = 0)
my_line_4 <- data.frame(x1 = 1/3, x2 = 1/3, y1 = 0, y2 = 1/3)
my_line_5 <- data.frame(x1 = 0, x2 = 1/3, y1 = 1/3, y2 = 1/3)
annotation <- data.frame(
x = c(1.2),
y = c(1.27),
label = "n2=n1"
)
d <- data.frame(x=c(0,1/3,0,0), y=c(0,1/3,1/2,0))
p <- ggplot(data = my_data) +
coord_cartesian(
xlim = c(0, 1.5),
ylim = c(0, 1.3),
expand = TRUE,
default = FALSE,
clip = "on") +
scale_x_continuous(name = "", limits = c(0, 1.5),
breaks = c(0, 1/3, 1),
labels = c("0", TeX("$\\frac{1}{3}$"), "1"),
expand = c(0, 0)) +
scale_y_continuous(name = "", limits = c(0, 1.3),
breaks = c(0, 1/3, 1/2, 1),
labels = c("0", TeX("$\\frac{1}{3}$"), TeX("$\\frac{1}{2}$"), "1"),
expand = c(0, 0)) +
geom_segment(data = my_line, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "darkgrey") +
geom_segment(data = my_line_1, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "darkgrey") +
geom_segment(data = my_line_2, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "darkgrey") +
geom_segment(data = my_line_3, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "darkgrey") +
geom_segment(data = my_line_4, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "darkgrey", lty = 2) +
geom_segment(data = my_line_5, aes(x = x1, y = y1, xend = x2, yend = y2),
col = "darkgrey", lty = 2) +
theme(axis.line = element_line(size=0.5, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.title=element_text(size = 12, family="Helvetica"),
text=element_text(size = 12, family="Helvetica"),
axis.text.x=element_text(colour="black", size = 10),
axis.text.y=element_text(colour="black", size = 10)) +
geom_text(data=annotation, aes(x=x, y=y, label=label),
color="orange",
size=5, fontface="bold" ) +
geom_polygon(data=d, mapping=aes(x=x, y=y), alpha=0.2, color="blue", fill="blue")
p
ggsave("figure_2_annexe.pdf", width = 14, height = 14, units = "cm")
To get Figure 3, compile the following code :
my_Fun <- function(x) {
2 * sqrt(1 / x) + sqrt(1/(1 - 2 * x))
}
p9 <- ggplot(data.frame(x = c(0, 1/3)), aes(x = x)) +
stat_function(fun = my_Fun)
p9 +
theme(axis.line = element_line(size=0.5, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.title=element_text(size = 12, family="Helvetica"),
text=element_text(size = 12, family="Helvetica"),
axis.text.x=element_text(colour="black", size = 10),
axis.text.y=element_text(colour="black", size = 10)) +
scale_y_continuous(name = TeX("$\\Delta_{\\lambda }^{1}\\left(N,R\\right)$"))
ggsave("figure_3_annexe.pdf", width = 16, height = 10, units = "cm")