Join a table with a string column by a regular expression column in another table

regex_join(x, y, by = NULL, mode = "inner", ignore_case = FALSE)

regex_inner_join(x, y, by = NULL, ignore_case = FALSE)

regex_left_join(x, y, by = NULL, ignore_case = FALSE)

regex_right_join(x, y, by = NULL, ignore_case = FALSE)

regex_full_join(x, y, by = NULL, ignore_case = FALSE)

regex_semi_join(x, y, by = NULL, ignore_case = FALSE)

regex_anti_join(x, y, by = NULL, ignore_case = FALSE)

Arguments

x

A tbl

y

A tbl

by

Columns by which to join the two tables

mode

One of "inner", "left", "right", "full" "semi", or "anti"

ignore_case

Whether to be case insensitive (default no)

See also

str_detect

Examples

library(dplyr) library(ggplot2) data(diamonds) diamonds <- tbl_df(diamonds)
#> Warning: `tbl_df()` is deprecated as of dplyr 1.0.0. #> Please use `tibble::as_tibble()` instead. #> This warning is displayed once every 8 hours. #> Call `lifecycle::last_warnings()` to see where this warning was generated.
d <- data_frame(regex_name = c("^Idea", "mium", "Good"), type = 1:3) # When they are inner_joined, only Good<->Good matches diamonds %>% inner_join(d, by = c(cut = "regex_name"))
#> # A tibble: 4,906 x 11 #> carat cut color clarity depth table price x y z type #> <dbl> <chr> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <int> #> 1 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 3 #> 2 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 3 #> 3 0.3 Good J SI1 64 55 339 4.25 4.28 2.73 3 #> 4 0.3 Good J SI1 63.4 54 351 4.23 4.29 2.7 3 #> 5 0.3 Good J SI1 63.8 56 351 4.23 4.26 2.71 3 #> 6 0.3 Good I SI2 63.3 56 351 4.26 4.3 2.71 3 #> 7 0.23 Good F VS1 58.2 59 402 4.06 4.08 2.37 3 #> 8 0.23 Good E VS1 64.1 59 402 3.83 3.85 2.46 3 #> 9 0.31 Good H SI1 64 54 402 4.29 4.31 2.75 3 #> 10 0.26 Good D VS2 65.2 56 403 3.99 4.02 2.61 3 #> # … with 4,896 more rows
# but we can regex match them diamonds %>% regex_inner_join(d, by = c(cut = "regex_name"))
#> # A tibble: 52,330 x 12 #> carat cut color clarity depth table price x y z regex_name #> <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <chr> #> 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 ^Idea #> 2 0.21 Prem… E SI1 59.8 61 326 3.89 3.84 2.31 mium #> 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 Good #> 4 0.290 Prem… I VS2 62.4 58 334 4.2 4.23 2.63 mium #> 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 Good #> 6 0.24 Very… J VVS2 62.8 57 336 3.94 3.96 2.48 Good #> 7 0.24 Very… I VVS1 62.3 57 336 3.95 3.98 2.47 Good #> 8 0.26 Very… H SI1 61.9 55 337 4.07 4.11 2.53 Good #> 9 0.23 Very… H VS1 59.4 61 338 4 4.05 2.39 Good #> 10 0.3 Good J SI1 64 55 339 4.25 4.28 2.73 Good #> # … with 52,320 more rows, and 1 more variable: type <int>