Skip to contents

Allows to define a regular expression per desired column specification object matching the respective column names.

Usage

cols_regex(..., .col_names, .default = readr::col_character())

Arguments

...

Named arguments where the names are (Perl-compatible) regular expressions and the values are column objects created by col_*(), or their abbreviated character names (as described in the col_types parameter of readr::read_delim()). Dynamic dots are supported.

.col_names

Column names which should be matched by ....

.default

Any named columns not matched by any of the regular expressions in ... will be read with this column type.

Details

The main limitation of cols_regex() is that it needs to know the input dataset's full set of .col_names in advance, for which dsv_colnames() can help. See the examples for further details.

Examples

library(magrittr)

# for some hypothetical CSV data column names like these...
col_names <- c("VAR1_Text",
               "VAR2_Text",
               "VAR3_Text_Other",
               "VAR1_Code_R1",
               "VAR2_Code_R2",
               "HAS_R1_Lag",
               "HAS_R2_Lag",
               "GARBAGEX67",
               "GARBAGEY09")

# ...a column spec could be created concisely as follows:
col_regex <- list("_Text(_|$)" = "c",
                  "_Code(_|$)" = "i",
                  "^GARBAGE"   = readr::col_skip())

pal::cols_regex(.col_names = col_names,
                !!!col_regex,
                .default = "l")
#> cols(
#>   .default = col_logical(),
#>   VAR1_Text = col_character(),
#>   VAR2_Text = col_character(),
#>   VAR3_Text_Other = col_character(),
#>   VAR1_Code_R1 = col_integer(),
#>   VAR2_Code_R2 = col_integer(),
#>   GARBAGEX67 = col_skip(),
#>   GARBAGEY09 = col_skip()
#> )

# we can parse some real data:
url <- "https://salim_b.gitlab.io/misc/Kantonsratswahl_Zuerich_2019_Ergebnisse_Gemeinden.csv"

raw_data <-
  httr2::request(url) |>
  httr2::req_perform() |>
  httr2::resp_body_string()

col_spec <- pal::cols_regex("^(Gemeindenamen|Liste|Wahlkreis)$" = "c",
                            "(?i)anteil" = "d",
                            .default = "i",
                            .col_names = pal::dsv_colnames(raw_data))
print(col_spec)
#> cols(
#>   .default = col_integer(),
#>   Gemeindenamen = col_character(),
#>   Liste = col_character(),
#>   Wahlkreis = col_character(),
#>   Stimmenanteil = col_double(),
#>   `+/- (Stimmenanteil)` = col_double(),
#>   Wähleranteil = col_double(),
#>   `+/- (Wähleranteil)` = col_double(),
#>   `Stimmenanteil 2015` = col_double(),
#>   `Wähleranteil 2015` = col_double()
#> )

readr::read_csv(file = raw_data,
                col_types = col_spec)
#> # A tibble: 1,770 × 15
#>    Gemeindenamen `BFS-Nr.` `Listen-Nr.` Liste `Wahlkreis-Nr.` Wahlkreis            Stimmen Stimmenanteil `+/- (Stimmenanteil)` Stimmenzusatz Wähler Wähleranteil
#>    <chr>             <int>        <int> <chr>           <int> <chr>                  <int>         <dbl>                 <dbl>         <int>  <int>        <dbl>
#>  1 Adlikon              21            1 SVP                16 Wahlkreis XVI, Ande…     468         56.0                  -6.21             8    117        56.0 
#>  2 Adlikon              21            2 SP                 16 Wahlkreis XVI, Ande…      39          4.67                 -0.19             0     10         4.67
#>  3 Adlikon              21            3 FDP                16 Wahlkreis XVI, Ande…     106         12.7                   1.73             0     27        12.7 
#>  4 Adlikon              21            4 GLP                16 Wahlkreis XVI, Ande…      36          4.31                  2.94             0      9         4.31
#>  5 Adlikon              21            5 Grüne              16 Wahlkreis XVI, Ande…      54          6.46                  0.86             0     14         6.46
#>  6 Adlikon              21            6 CVP                16 Wahlkreis XVI, Ande…       6          0.72                 -0.03             0      2         0.72
#>  7 Adlikon              21            7 EVP                16 Wahlkreis XVI, Ande…      52          6.22                  3.61             0     13         6.22
#>  8 Adlikon              21            8 AL                 16 Wahlkreis XVI, Ande…       7          0.84                 -0.28             0      2         0.84
#>  9 Adlikon              21            9 BDP                16 Wahlkreis XVI, Ande…      45          5.38                 -1.46             4     11         5.38
#> 10 Adlikon              21           10 EDU                16 Wahlkreis XVI, Ande…      23          2.75                 -0.98             0      6         2.75
#> # ℹ 1,760 more rows
#> # ℹ 3 more variables: `+/- (Wähleranteil)` <dbl>, `Stimmenanteil 2015` <dbl>, `Wähleranteil 2015` <dbl>

# we can also do basically the same in a more concise way without having to rely on
# `pal::dsv_colnames()`:
readr::read_csv(file = url,
                col_types = list(.default = "c")) %>%
  readr::type_convert(col_types = pal::cols_regex("^(Gemeindenamen|Liste|Wahlkreis)$" = "c",
                                                  "(?i)anteil" = "d",
                                                  .default = "i",
                                                  .col_names = colnames(.)))
#> # A tibble: 1,770 × 15
#>    Gemeindenamen `BFS-Nr.` `Listen-Nr.` Liste `Wahlkreis-Nr.` Wahlkreis            Stimmen Stimmenanteil `+/- (Stimmenanteil)` Stimmenzusatz Wähler Wähleranteil
#>    <chr>             <int>        <int> <chr>           <int> <chr>                  <int>         <dbl>                 <dbl>         <int>  <int>        <dbl>
#>  1 Adlikon              21            1 SVP                16 Wahlkreis XVI, Ande…     468         56.0                  -6.21             8    117        56.0 
#>  2 Adlikon              21            2 SP                 16 Wahlkreis XVI, Ande…      39          4.67                 -0.19             0     10         4.67
#>  3 Adlikon              21            3 FDP                16 Wahlkreis XVI, Ande…     106         12.7                   1.73             0     27        12.7 
#>  4 Adlikon              21            4 GLP                16 Wahlkreis XVI, Ande…      36          4.31                  2.94             0      9         4.31
#>  5 Adlikon              21            5 Grüne              16 Wahlkreis XVI, Ande…      54          6.46                  0.86             0     14         6.46
#>  6 Adlikon              21            6 CVP                16 Wahlkreis XVI, Ande…       6          0.72                 -0.03             0      2         0.72
#>  7 Adlikon              21            7 EVP                16 Wahlkreis XVI, Ande…      52          6.22                  3.61             0     13         6.22
#>  8 Adlikon              21            8 AL                 16 Wahlkreis XVI, Ande…       7          0.84                 -0.28             0      2         0.84
#>  9 Adlikon              21            9 BDP                16 Wahlkreis XVI, Ande…      45          5.38                 -1.46             4     11         5.38
#> 10 Adlikon              21           10 EDU                16 Wahlkreis XVI, Ande…      23          2.75                 -0.98             0      6         2.75
#> # ℹ 1,760 more rows
#> # ℹ 3 more variables: `+/- (Wähleranteil)` <dbl>, `Stimmenanteil 2015` <dbl>, `Wähleranteil 2015` <dbl>