Joining Data

ISI-BUDS 2023

Image from R for Data Science book licensed under the Creative Commons Attribution-NonCommercial-NoDerivs 3.0 License

Inner Join

Image from R for Data Science book licensed under the Creative Commons Attribution-NonCommercial-NoDerivs 3.0 License

Outer Joins

Image from R for Data Science book licensed under the Creative Commons Attribution-NonCommercial-NoDerivs 3.0 License

Data Joins

Image from R for Data Science book licensed under the Creative Commons Attribution-NonCommercial-NoDerivs 3.0 License

artists
# A tibble: 4 × 2
  name          followers
  <chr>             <dbl>
1 Beyoncé        24757958
2 Taylor Swift   33098116
3 Ariana Grande  51807131
4 Drake          50252529
songs
# A tibble: 5 × 4
  name          song_name                            album_name  song_popularity
  <chr>         <chr>                                <chr>                 <dbl>
1 Beyoncé       Savage Remix (feat. Beyoncé)         Savage Rem…              83
2 Taylor Swift  cardigan                             folklore                 85
3 Drake         Laugh Now Cry Later (feat. Lil Durk) Laugh Now …              95
4 Beyoncé       Halo                                 I AM…SASHA…              NA
5 Ariana Grande Stuck with U (with Justin Bieber)    Stuck with…              NA
albums
# A tibble: 4 × 2
  album_name                           album_release_date
  <chr>                                <date>            
1 Savage Remix (feat. Beyoncé)         2020-04-29        
2 I AM…SASHA FIERCE                    2008-11-14        
3 Stuck with U                         2020-05-08        
4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14        

Join Functions

something_join(x, y) All the join functions in dplyr package are in this fashion where x represents the first data frame and y represents the second data frame.

songs
# A tibble: 5 × 4
  name          song_name                            album_name  song_popularity
  <chr>         <chr>                                <chr>                 <dbl>
1 Beyoncé       Savage Remix (feat. Beyoncé)         Savage Rem…              83
2 Taylor Swift  cardigan                             folklore                 85
3 Drake         Laugh Now Cry Later (feat. Lil Durk) Laugh Now …              95
4 Beyoncé       Halo                                 I AM…SASHA…              NA
5 Ariana Grande Stuck with U (with Justin Bieber)    Stuck with…              NA
albums
# A tibble: 4 × 2
  album_name                           album_release_date
  <chr>                                <date>            
1 Savage Remix (feat. Beyoncé)         2020-04-29        
2 I AM…SASHA FIERCE                    2008-11-14        
3 Stuck with U                         2020-05-08        
4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14        
left_join(songs, albums, by = "album_name")
# A tibble: 5 × 5
  name          song_name          album_name song_popularity album_release_date
  <chr>         <chr>              <chr>                <dbl> <date>            
1 Beyoncé       Savage Remix (fea… Savage Re…              83 2020-04-29        
2 Taylor Swift  cardigan           folklore                85 NA                
3 Drake         Laugh Now Cry Lat… Laugh Now…              95 2020-08-14        
4 Beyoncé       Halo               I AM…SASH…              NA 2008-11-14        
5 Ariana Grande Stuck with U (wit… Stuck wit…              NA 2020-05-08        

left_join() includes all rows from x

songs
# A tibble: 5 × 4
  name          song_name                            album_name  song_popularity
  <chr>         <chr>                                <chr>                 <dbl>
1 Beyoncé       Savage Remix (feat. Beyoncé)         Savage Rem…              83
2 Taylor Swift  cardigan                             folklore                 85
3 Drake         Laugh Now Cry Later (feat. Lil Durk) Laugh Now …              95
4 Beyoncé       Halo                                 I AM…SASHA…              NA
5 Ariana Grande Stuck with U (with Justin Bieber)    Stuck with…              NA
albums
# A tibble: 4 × 2
  album_name                           album_release_date
  <chr>                                <date>            
1 Savage Remix (feat. Beyoncé)         2020-04-29        
2 I AM…SASHA FIERCE                    2008-11-14        
3 Stuck with U                         2020-05-08        
4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14        
right_join(songs, albums, by = "album_name")
# A tibble: 4 × 5
  name          song_name          album_name song_popularity album_release_date
  <chr>         <chr>              <chr>                <dbl> <date>            
1 Beyoncé       Savage Remix (fea… Savage Re…              83 2020-04-29        
2 Drake         Laugh Now Cry Lat… Laugh Now…              95 2020-08-14        
3 Beyoncé       Halo               I AM…SASH…              NA 2008-11-14        
4 Ariana Grande Stuck with U (wit… Stuck wit…              NA 2020-05-08        

right_join() includes all rows from y

songs
# A tibble: 5 × 4
  name          song_name                            album_name  song_popularity
  <chr>         <chr>                                <chr>                 <dbl>
1 Beyoncé       Savage Remix (feat. Beyoncé)         Savage Rem…              83
2 Taylor Swift  cardigan                             folklore                 85
3 Drake         Laugh Now Cry Later (feat. Lil Durk) Laugh Now …              95
4 Beyoncé       Halo                                 I AM…SASHA…              NA
5 Ariana Grande Stuck with U (with Justin Bieber)    Stuck with…              NA
albums
# A tibble: 4 × 2
  album_name                           album_release_date
  <chr>                                <date>            
1 Savage Remix (feat. Beyoncé)         2020-04-29        
2 I AM…SASHA FIERCE                    2008-11-14        
3 Stuck with U                         2020-05-08        
4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14        
inner_join(songs, albums, by = "album_name")
# A tibble: 4 × 5
  name          song_name          album_name song_popularity album_release_date
  <chr>         <chr>              <chr>                <dbl> <date>            
1 Beyoncé       Savage Remix (fea… Savage Re…              83 2020-04-29        
2 Drake         Laugh Now Cry Lat… Laugh Now…              95 2020-08-14        
3 Beyoncé       Halo               I AM…SASH…              NA 2008-11-14        
4 Ariana Grande Stuck with U (wit… Stuck wit…              NA 2020-05-08        

inner_join() includes all rows that are in x and y

songs
# A tibble: 5 × 4
  name          song_name                            album_name  song_popularity
  <chr>         <chr>                                <chr>                 <dbl>
1 Beyoncé       Savage Remix (feat. Beyoncé)         Savage Rem…              83
2 Taylor Swift  cardigan                             folklore                 85
3 Drake         Laugh Now Cry Later (feat. Lil Durk) Laugh Now …              95
4 Beyoncé       Halo                                 I AM…SASHA…              NA
5 Ariana Grande Stuck with U (with Justin Bieber)    Stuck with…              NA
albums
# A tibble: 4 × 2
  album_name                           album_release_date
  <chr>                                <date>            
1 Savage Remix (feat. Beyoncé)         2020-04-29        
2 I AM…SASHA FIERCE                    2008-11-14        
3 Stuck with U                         2020-05-08        
4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14        
full_join(songs, albums, by = "album_name")
# A tibble: 5 × 5
  name          song_name          album_name song_popularity album_release_date
  <chr>         <chr>              <chr>                <dbl> <date>            
1 Beyoncé       Savage Remix (fea… Savage Re…              83 2020-04-29        
2 Taylor Swift  cardigan           folklore                85 NA                
3 Drake         Laugh Now Cry Lat… Laugh Now…              95 2020-08-14        
4 Beyoncé       Halo               I AM…SASH…              NA 2008-11-14        
5 Ariana Grande Stuck with U (wit… Stuck wit…              NA 2020-05-08        

full_join() includes all rows that are in x or y

artists
# A tibble: 4 × 2
  name          followers
  <chr>             <dbl>
1 Beyoncé        24757958
2 Taylor Swift   33098116
3 Ariana Grande  51807131
4 Drake          50252529
songs
# A tibble: 5 × 4
  name          song_name                            album_name  song_popularity
  <chr>         <chr>                                <chr>                 <dbl>
1 Beyoncé       Savage Remix (feat. Beyoncé)         Savage Rem…              83
2 Taylor Swift  cardigan                             folklore                 85
3 Drake         Laugh Now Cry Later (feat. Lil Durk) Laugh Now …              95
4 Beyoncé       Halo                                 I AM…SASHA…              NA
5 Ariana Grande Stuck with U (with Justin Bieber)    Stuck with…              NA
albums
# A tibble: 4 × 2
  album_name                           album_release_date
  <chr>                                <date>            
1 Savage Remix (feat. Beyoncé)         2020-04-29        
2 I AM…SASHA FIERCE                    2008-11-14        
3 Stuck with U                         2020-05-08        
4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14        
full_join(artists, songs, by = "name") %>% 
  full_join(albums, by = "album_name")
# A tibble: 5 × 6
  name         followers song_name album_name song_popularity album_release_date
  <chr>            <dbl> <chr>     <chr>                <dbl> <date>            
1 Beyoncé       24757958 Savage R… Savage Re…              83 2020-04-29        
2 Beyoncé       24757958 Halo      I AM…SASH…              NA 2008-11-14        
3 Taylor Swift  33098116 cardigan  folklore                85 NA                
4 Ariana Gran…  51807131 Stuck wi… Stuck wit…              NA 2020-05-08        
5 Drake         50252529 Laugh No… Laugh Now…              95 2020-08-14