A quick port of Joël Gombin’s spReapportion
package, without the dependencies to the maptools
and rgeos
packages, which were both
retired in 2023, and with additional support for sf
objects.
The sfReapportion function is intended as a drop-in
replacement for Joël’s spReapportion function: it takes
exactly the same arguments, and outputs exactly the same kind of
results, also as a data frame.
Use the following options with caution:
weights with mode = "proportion"
has only been lightly testedweight_matrix and weight_matrix_var
has only been lightly testedThe package was ported in order to be used in this project.
If the package is not available from CRAN, install from GitHub instead:
install.packages("remotes")
remotes::install_github("briatte/sfReapportion")library(sfReapportion)
# Joël's example
data(ParisPollingStations2012)
data(ParisIris)
data(RP_2011_CS8_Paris)
CS_ParisPollingStations <- sfReapportion(ParisIris,
ParisPollingStations2012,
RP_2011_CS8_Paris,
"DCOMIRIS",
"ID",
"IRIS")# our results
> head(CS_ParisPollingStations)[, 1:4]
ID C11_POP15P C11_POP15P_CS1 C11_POP15P_CS2
1 750010001 1385.539 0.007534526 88.64129
2 750010002 1389.989 1.158961646 65.49323
3 750010003 1921.008 3.410431283 85.39031
4 750010004 1577.544 2.504085472 62.79095
5 750010005 1802.787 1.803202193 68.64467
6 750010006 1619.144 5.790202710 83.05042
# compare to Joël's test file
> head(readRDS("tests/testthat/CS_ParisPollingStations.rds"))[, 1:4]
ID C11_POP15P C11_POP15P_CS1 C11_POP15P_CS2
1 750010001 1385.539 0.007534526 88.64129
2 750010002 1389.989 1.158961646 65.49323
3 750010003 1921.008 3.410431283 85.39031
4 750010004 1577.544 2.504085472 62.79095
5 750010005 1802.787 1.803202193 68.64467
6 750010006 1619.144 5.790202710 83.05042
The results match those of the areal
package:
library(areal)
library(dplyr)
library(sf)
ParisPollingStations2012_sf <- sf::st_as_sf(ParisPollingStations2012)
ParisIris_sf <- sf::st_as_sf(ParisIris) %>%
left_join(RP_2011_CS8_Paris, by = c("DCOMIRIS" = "IRIS"))
areal_equiv <- areal::aw_interpolate(ParisPollingStations2012_sf, tid = ID,
source = ParisIris_sf, sid = DCOMIRIS,
weight = "total", output = "sf",
extensive = c("C11_POP15P",
"C11_POP15P_CS1",
"C11_POP15P_CS2"))# match display of previous results
> select(areal_equiv, ID, C11_POP15P, C11_POP15P_CS1, C11_POP15P_CS2) %>%
+ arrange(ID) %>%
+ sf::st_drop_geometry() %>%
+ head()
ID C11_POP15P C11_POP15P_CS1 C11_POP15P_CS2
1 750010001 1385.539 0.007534526 88.64129
2 750010002 1389.989 1.158961646 65.49323
3 750010003 1921.008 3.410431283 85.39031
4 750010004 1577.544 2.504085472 62.79095
5 750010005 1802.787 1.803202193 68.64467
6 750010006 1619.144 5.790202710 83.05042
The package contains further tests against the st_interpolate_aw
function of the sf
package.
The package contains an example weights matrix that contains voter
addresses located in the 20th arrondissement of Paris. The weighting
variable, nb_adresses, only approximately counts voters at
a given location (the number of voters at each address is unknown).
The data were obtained by subsetting from the Répertoire électoral unique (REU):
library(arrow)
library(dplyr)
library(sf)
# subset polling stations
bv20 <- arrow::read_parquet("table-bv-reu.parquet") %>%
dplyr::filter(code_commune %in% c("75120")) %>%
dplyr::select(id_brut_reu, libelle_reu, nb_adresses)
# subset voter addresses (slow)
addr20 <- arrow::read_parquet("table-adresses-reu.parquet") %>%
dplyr::filter(id_brut_bv_reu %in% unique(bv20$id_brut_reu))
# convert to spatial points
Paris20eAddresses <- addr20 %>%
dplyr::group_by(geo_adresse, longitude, latitude) %>%
dplyr::summarise(nb_adresses = sum(nb_adresses)) %>%
sf::st_as_sf(coords = c("longitude", "latitude")) %>%
sf::st_set_crs(4326)
# save to .rda (LazyData: true)
save(Paris20eAddresses, file = "Paris20eAddresses.rda", compress = "xz")The results can be tested by subsetting the rest of the test data included in the package to the 20th arrondissement of Paris:
library(dplyr)
library(ggplot2)
library(sf)
library(sfReapportion)
# spatial points of voter addresses in Paris 20th district
data(Paris20eAddresses)
data(ParisPollingStations2012)
data(ParisIris)
data(RP_2011_CS8_Paris)
# subset geometry of polling stations (new geom, 76 polling stations)
ParisPollingStations2012 <- sf::st_as_sf(ParisPollingStations2012) %>%
dplyr::filter(arrondisse %in% c(20)) %>%
dplyr::mutate(id_brut_bv_reu = paste("75020_", num_bv))
# subset geometry of census tracts (old geom, 356 polygons)
# will throw a warning about polygons expected to be spatially constant
ParisIris <- sf::st_as_sf(ParisIris) %>%
sf::st_intersection(ParisPollingStations2012)
# this is what we're reapportioning
ggplot(ParisIris) +
geom_sf() +
geom_sf(data = Paris20eAddresses, aes(size = nb_adresses), alpha = 1/4) +
scale_size_area(max_size = 10) +
theme_void()
# subset census data to reapportion (93 distinct census tracts)
RP_2011_CS8_Paris <- dplyr::filter(RP_2011_CS8_Paris,
IRIS %in% ParisIris$DCOMIRIS)Comparing weighted and unweighted results is probably a good idea:
# unweighted
r1 <- sfReapportion(ParisIris, ParisPollingStations2012, RP_2011_CS8_Paris,
"DCOMIRIS", "ID", "IRIS")
# weighted
# will throw a warning about `weight_matrix` having only been lightly tested
r2 <- sfReapportion(ParisIris, ParisPollingStations2012,
RP_2011_CS8_Paris,
"DCOMIRIS", "ID", "IRIS",
weight_matrix = Paris20eAddresses,
weight_matrix_var = "nb_adresses")The differences are non-trivial, at least when looking at adult population and socio-professional categories as we do in this example:
> # unweighted
> head(r1[, 1:4])
ID C11_POP15P C11_POP15P_CS1 C11_POP15P_CS2
1 750200001 2026.819 0 56.46644
2 750200002 1465.669 0 34.65999
3 750200003 2308.119 0 60.81294
4 750200004 1934.005 0 53.48857
5 750200005 1886.533 0 45.53472
6 750200006 1873.657 0 44.32671
>
> # weighted
> head(r2[, 1:4])
ID C11_POP15P C11_POP15P_CS1 C11_POP15P_CS2
1 750200001 2006.434 0 54.75713
2 750200002 1543.915 0 35.65698
3 750200003 1930.616 0 51.33953
4 750200004 2135.882 0 60.72449
5 750200005 1925.147 0 46.37813
6 750200006 1953.638 0 47.51444
>
> # correlations
> round(diag(cor(r1[,-1], r2[,-1])), 2)
C11_POP15P C11_POP15P_CS1 C11_POP15P_CS2 C11_POP15P_CS3 C11_POP15P_CS4
0.30 0.75 0.58 0.46 0.30
C11_POP15P_CS5 C11_POP15P_CS6 C11_POP15P_CS7 C11_POP15P_CS8
0.48 0.70 0.26 0.52
areal
packageareal
packagepopulR
packageI also wrote a very short blog post to illustrate what the package does.