
Create readr column specification using regular expression matching
Source:R/pal.gen.R
cols_regex.Rd
Allows to define a regular expression per desired column specification object matching the respective column names.
Usage
cols_regex(..., .col_names, .default = readr::col_character())
Arguments
- ...
Named arguments where the names are (Perl-compatible) regular expressions and the values are column objects created by
col_*()
, or their abbreviated character names (as described in thecol_types
parameter ofreadr::read_delim()
). Dynamic dots are supported.- .col_names
Column names which should be matched by
...
.- .default
Any named columns not matched by any of the regular expressions in
...
will be read with this column type.
Details
The main limitation of cols_regex()
is that it needs to know the input dataset's full set of .col_names
in advance, for which dsv_colnames()
can help.
See the examples for further details.
Examples
library(magrittr)
# for some hypothetical CSV data column names like these...
col_names <- c("VAR1_Text",
"VAR2_Text",
"VAR3_Text_Other",
"VAR1_Code_R1",
"VAR2_Code_R2",
"HAS_R1_Lag",
"HAS_R2_Lag",
"GARBAGEX67",
"GARBAGEY09")
# ...a column spec could be created concisely as follows:
col_regex <- list("_Text(_|$)" = "c",
"_Code(_|$)" = "i",
"^GARBAGE" = readr::col_skip())
pal::cols_regex(.col_names = col_names,
!!!col_regex,
.default = "l")
#> cols(
#> .default = col_logical(),
#> VAR1_Text = col_character(),
#> VAR2_Text = col_character(),
#> VAR3_Text_Other = col_character(),
#> VAR1_Code_R1 = col_integer(),
#> VAR2_Code_R2 = col_integer(),
#> GARBAGEX67 = col_skip(),
#> GARBAGEY09 = col_skip()
#> )
# we can parse some real data:
url <- "https://salim_b.gitlab.io/misc/Kantonsratswahl_Zuerich_2019_Ergebnisse_Gemeinden.csv"
raw_data <-
httr2::request(url) |>
httr2::req_perform() |>
httr2::resp_body_string()
col_spec <- pal::cols_regex("^(Gemeindenamen|Liste|Wahlkreis)$" = "c",
"(?i)anteil" = "d",
.default = "i",
.col_names = pal::dsv_colnames(raw_data))
print(col_spec)
#> cols(
#> .default = col_integer(),
#> Gemeindenamen = col_character(),
#> Liste = col_character(),
#> Wahlkreis = col_character(),
#> Stimmenanteil = col_double(),
#> `+/- (Stimmenanteil)` = col_double(),
#> Wähleranteil = col_double(),
#> `+/- (Wähleranteil)` = col_double(),
#> `Stimmenanteil 2015` = col_double(),
#> `Wähleranteil 2015` = col_double()
#> )
readr::read_csv(file = raw_data,
col_types = col_spec)
#> # A tibble: 1,770 × 15
#> Gemeindenamen `BFS-Nr.` `Listen-Nr.` Liste `Wahlkreis-Nr.` Wahlkreis Stimmen Stimmenanteil `+/- (Stimmenanteil)` Stimmenzusatz Wähler Wähleranteil
#> <chr> <int> <int> <chr> <int> <chr> <int> <dbl> <dbl> <int> <int> <dbl>
#> 1 Adlikon 21 1 SVP 16 Wahlkreis XVI, Ande… 468 56.0 -6.21 8 117 56.0
#> 2 Adlikon 21 2 SP 16 Wahlkreis XVI, Ande… 39 4.67 -0.19 0 10 4.67
#> 3 Adlikon 21 3 FDP 16 Wahlkreis XVI, Ande… 106 12.7 1.73 0 27 12.7
#> 4 Adlikon 21 4 GLP 16 Wahlkreis XVI, Ande… 36 4.31 2.94 0 9 4.31
#> 5 Adlikon 21 5 Grüne 16 Wahlkreis XVI, Ande… 54 6.46 0.86 0 14 6.46
#> 6 Adlikon 21 6 CVP 16 Wahlkreis XVI, Ande… 6 0.72 -0.03 0 2 0.72
#> 7 Adlikon 21 7 EVP 16 Wahlkreis XVI, Ande… 52 6.22 3.61 0 13 6.22
#> 8 Adlikon 21 8 AL 16 Wahlkreis XVI, Ande… 7 0.84 -0.28 0 2 0.84
#> 9 Adlikon 21 9 BDP 16 Wahlkreis XVI, Ande… 45 5.38 -1.46 4 11 5.38
#> 10 Adlikon 21 10 EDU 16 Wahlkreis XVI, Ande… 23 2.75 -0.98 0 6 2.75
#> # ℹ 1,760 more rows
#> # ℹ 3 more variables: `+/- (Wähleranteil)` <dbl>, `Stimmenanteil 2015` <dbl>, `Wähleranteil 2015` <dbl>
# we can also do basically the same in a more concise way without having to rely on
# `pal::dsv_colnames()`:
readr::read_csv(file = url,
col_types = list(.default = "c")) %>%
readr::type_convert(col_types = pal::cols_regex("^(Gemeindenamen|Liste|Wahlkreis)$" = "c",
"(?i)anteil" = "d",
.default = "i",
.col_names = colnames(.)))
#> # A tibble: 1,770 × 15
#> Gemeindenamen `BFS-Nr.` `Listen-Nr.` Liste `Wahlkreis-Nr.` Wahlkreis Stimmen Stimmenanteil `+/- (Stimmenanteil)` Stimmenzusatz Wähler Wähleranteil
#> <chr> <int> <int> <chr> <int> <chr> <int> <dbl> <dbl> <int> <int> <dbl>
#> 1 Adlikon 21 1 SVP 16 Wahlkreis XVI, Ande… 468 56.0 -6.21 8 117 56.0
#> 2 Adlikon 21 2 SP 16 Wahlkreis XVI, Ande… 39 4.67 -0.19 0 10 4.67
#> 3 Adlikon 21 3 FDP 16 Wahlkreis XVI, Ande… 106 12.7 1.73 0 27 12.7
#> 4 Adlikon 21 4 GLP 16 Wahlkreis XVI, Ande… 36 4.31 2.94 0 9 4.31
#> 5 Adlikon 21 5 Grüne 16 Wahlkreis XVI, Ande… 54 6.46 0.86 0 14 6.46
#> 6 Adlikon 21 6 CVP 16 Wahlkreis XVI, Ande… 6 0.72 -0.03 0 2 0.72
#> 7 Adlikon 21 7 EVP 16 Wahlkreis XVI, Ande… 52 6.22 3.61 0 13 6.22
#> 8 Adlikon 21 8 AL 16 Wahlkreis XVI, Ande… 7 0.84 -0.28 0 2 0.84
#> 9 Adlikon 21 9 BDP 16 Wahlkreis XVI, Ande… 45 5.38 -1.46 4 11 5.38
#> 10 Adlikon 21 10 EDU 16 Wahlkreis XVI, Ande… 23 2.75 -0.98 0 6 2.75
#> # ℹ 1,760 more rows
#> # ℹ 3 more variables: `+/- (Wähleranteil)` <dbl>, `Stimmenanteil 2015` <dbl>, `Wähleranteil 2015` <dbl>