C12. Logical Vectors
library(tidyverse)
library(nycflights13)
library(dplyr)
12.2. Comparisions
x <- c(1, 2, 3, 5, 7, 11, 13)
x * 2
df <- tibble(x)
df |>
mutate(y = x * 2)
<ol class=list-inline><li>2</li><li>4</li><li>6</li><li>10</li><li>14</li><li>22</li><li>26</li></ol>
x | y |
---|---|
<dbl> | <dbl> |
1 | 2 |
2 | 4 |
3 | 6 |
5 | 10 |
7 | 14 |
11 | 22 |
13 | 26 |
flights |>
mutate(
daytime = dep_time > 600 & dep_time < 2000,
approx_ontime = abs(arr_delay) < 20,
.keep = "used"
)
dep_time | arr_delay | daytime | approx_ontime |
---|---|---|---|
<int> | <dbl> | <lgl> | <lgl> |
517 | 11 | FALSE | TRUE |
533 | 20 | FALSE | FALSE |
542 | 33 | FALSE | FALSE |
544 | -18 | FALSE | TRUE |
554 | -25 | FALSE | FALSE |
554 | 12 | FALSE | TRUE |
555 | 19 | FALSE | TRUE |
557 | -14 | FALSE | TRUE |
557 | -8 | FALSE | TRUE |
558 | 8 | FALSE | TRUE |
558 | -2 | FALSE | TRUE |
558 | -3 | FALSE | TRUE |
558 | 7 | FALSE | TRUE |
558 | -14 | FALSE | TRUE |
559 | 31 | FALSE | FALSE |
559 | -4 | FALSE | TRUE |
559 | -8 | FALSE | TRUE |
600 | -7 | FALSE | TRUE |
600 | 12 | FALSE | TRUE |
601 | -6 | TRUE | TRUE |
602 | -8 | TRUE | TRUE |
602 | 16 | TRUE | TRUE |
606 | -12 | TRUE | TRUE |
606 | -8 | TRUE | TRUE |
607 | -17 | TRUE | TRUE |
608 | 32 | TRUE | FALSE |
611 | 14 | TRUE | TRUE |
613 | 4 | TRUE | TRUE |
615 | -21 | TRUE | FALSE |
615 | -9 | TRUE | TRUE |
... | ... | ... | ... |
2123 | -24 | FALSE | FALSE |
2127 | -9 | FALSE | TRUE |
2128 | -31 | FALSE | FALSE |
2129 | -2 | FALSE | TRUE |
2131 | -30 | FALSE | FALSE |
2140 | -30 | FALSE | FALSE |
2142 | 11 | FALSE | TRUE |
2145 | -25 | FALSE | FALSE |
2147 | 3 | FALSE | TRUE |
2149 | -23 | FALSE | FALSE |
2150 | -16 | FALSE | TRUE |
2159 | 194 | FALSE | FALSE |
2203 | 8 | FALSE | TRUE |
2207 | 7 | FALSE | TRUE |
2211 | 57 | FALSE | FALSE |
2231 | -21 | FALSE | FALSE |
2233 | 42 | FALSE | FALSE |
2235 | 130 | FALSE | FALSE |
2237 | -8 | FALSE | TRUE |
2240 | -17 | FALSE | TRUE |
2240 | -20 | FALSE | FALSE |
2241 | -16 | FALSE | TRUE |
2307 | 1 | FALSE | TRUE |
2349 | -25 | FALSE | FALSE |
NA | NA | NA | NA |
NA | NA | NA | NA |
NA | NA | NA | NA |
NA | NA | NA | NA |
NA | NA | NA | NA |
NA | NA | NA | NA |
flights |>
mutate(
daytime = dep_time > 600 & dep_time < 2000,
approx_ontime = abs(arr_delay) < 20,
) |>
filter(daytime & approx_ontime)
year | month | day | dep_time | sched_dep_time | dep_delay | arr_time | sched_arr_time | arr_delay | carrier | ... | tailnum | origin | dest | air_time | distance | hour | minute | time_hour | daytime | approx_ontime |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<int> | <int> | <int> | <int> | <int> | <dbl> | <int> | <int> | <dbl> | <chr> | ... | <chr> | <chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dttm> | <lgl> | <lgl> |
2013 | 1 | 1 | 601 | 600 | 1 | 844 | 850 | -6 | B6 | ... | N644JB | EWR | PBI | 147 | 1023 | 6 | 0 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 602 | 610 | -8 | 812 | 820 | -8 | DL | ... | N971DL | LGA | MSP | 170 | 1020 | 6 | 10 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 602 | 605 | -3 | 821 | 805 | 16 | MQ | ... | N730MQ | LGA | DTW | 105 | 502 | 6 | 5 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 606 | 610 | -4 | 858 | 910 | -12 | AA | ... | N633AA | EWR | MIA | 152 | 1085 | 6 | 10 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 606 | 610 | -4 | 837 | 845 | -8 | DL | ... | N3739P | JFK | ATL | 128 | 760 | 6 | 10 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 607 | 607 | 0 | 858 | 915 | -17 | UA | ... | N53442 | EWR | MIA | 157 | 1085 | 6 | 7 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 611 | 600 | 11 | 945 | 931 | 14 | UA | ... | N532UA | JFK | SFO | 366 | 2586 | 6 | 0 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 613 | 610 | 3 | 925 | 921 | 4 | B6 | ... | N635JB | JFK | RSW | 175 | 1074 | 6 | 10 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 615 | 615 | 0 | 833 | 842 | -9 | DL | ... | N326NB | EWR | ATL | 120 | 746 | 6 | 15 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 622 | 630 | -8 | 1017 | 1014 | 3 | US | ... | N807AW | EWR | PHX | 342 | 2133 | 6 | 30 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 623 | 610 | 13 | 920 | 915 | 5 | AA | ... | N3EMAA | LGA | MIA | 153 | 1096 | 6 | 10 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 623 | 627 | -4 | 933 | 932 | 1 | UA | ... | N459UA | LGA | IAH | 229 | 1416 | 6 | 27 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 624 | 630 | -6 | 840 | 830 | 10 | MQ | ... | N518MQ | LGA | MSP | 166 | 1020 | 6 | 30 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 627 | 630 | -3 | 1018 | 1018 | 0 | US | ... | N535UW | JFK | PHX | 330 | 2153 | 6 | 30 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 628 | 630 | -2 | 1137 | 1140 | -3 | AA | ... | N3BAAA | JFK | SJU | 192 | 1598 | 6 | 30 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 629 | 630 | -1 | 824 | 810 | 14 | AA | ... | N3CYAA | LGA | ORD | 140 | 733 | 6 | 30 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 629 | 630 | -1 | 721 | 740 | -19 | WN | ... | N273WN | LGA | BWI | 40 | 185 | 6 | 30 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 629 | 630 | -1 | 824 | 833 | -9 | US | ... | N426US | EWR | CLT | 91 | 529 | 6 | 30 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 632 | 608 | 24 | 740 | 728 | 12 | EV | ... | N13553 | EWR | IAD | 52 | 212 | 6 | 8 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 637 | 645 | -8 | 930 | 935 | -5 | B6 | ... | N709JB | LGA | MCO | 144 | 950 | 6 | 45 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 639 | 640 | -1 | 739 | 749 | -10 | B6 | ... | N805JB | JFK | BOS | 41 | 187 | 6 | 40 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 643 | 646 | -3 | 922 | 940 | -18 | UA | ... | N497UA | EWR | PBI | 146 | 1023 | 6 | 46 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 643 | 645 | -2 | 837 | 848 | -11 | US | ... | N178US | EWR | CLT | 91 | 529 | 6 | 45 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 644 | 636 | 8 | 931 | 940 | -9 | UA | ... | N75435 | EWR | FLL | 151 | 1065 | 6 | 36 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 645 | 647 | -2 | 815 | 810 | 5 | B6 | ... | N796JB | JFK | BUF | 63 | 301 | 6 | 47 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 646 | 645 | 1 | 910 | 916 | -6 | UA | ... | N569UA | LGA | DEN | 243 | 1620 | 6 | 45 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 646 | 645 | 1 | 1023 | 1030 | -7 | UA | ... | N38727 | EWR | SNA | 380 | 2434 | 6 | 45 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 651 | 655 | -4 | 936 | 942 | -6 | B6 | ... | N558JB | JFK | LAS | 323 | 2248 | 6 | 55 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 652 | 655 | -3 | 932 | 921 | 11 | B6 | ... | N178JB | JFK | MSY | 191 | 1182 | 6 | 55 | 2013-01-01 06:00:00 | TRUE | TRUE |
2013 | 1 | 1 | 655 | 655 | 0 | 1021 | 1030 | -9 | DL | ... | N3763D | JFK | SLC | 294 | 1990 | 6 | 55 | 2013-01-01 06:00:00 | TRUE | TRUE |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | |
2013 | 9 | 30 | 1908 | 1911 | -3 | 2107 | 2110 | -3 | EV | ... | N16987 | EWR | CLT | 74 | 529 | 19 | 11 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1909 | 1905 | 4 | 2055 | 2057 | -2 | 9E | ... | N937XJ | JFK | ORD | 108 | 740 | 19 | 5 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1909 | 1901 | 8 | 2212 | 2211 | 1 | UA | ... | N513UA | EWR | SFO | 307 | 2565 | 19 | 1 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1915 | 1900 | 15 | 2138 | 2151 | -13 | B6 | ... | N563JB | JFK | MCO | 120 | 944 | 19 | 0 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1915 | 1900 | 15 | 2025 | 2015 | 10 | WN | ... | N719SW | LGA | MKE | 104 | 738 | 19 | 0 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1917 | 1920 | -3 | 2053 | 2055 | -2 | 9E | ... | N600LR | LGA | BUF | 50 | 292 | 19 | 20 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1925 | 1930 | -5 | 2033 | 2049 | -16 | EV | ... | N825AS | LGA | IAD | 43 | 229 | 19 | 30 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1926 | 1930 | -4 | 2146 | 2201 | -15 | DL | ... | N303DQ | EWR | ATL | 95 | 746 | 19 | 30 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1927 | 1930 | -3 | 2054 | 2056 | -2 | EV | ... | N13956 | EWR | MKE | 110 | 725 | 19 | 30 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1928 | 1932 | -4 | 2137 | 2143 | -6 | EV | ... | N18556 | EWR | TYS | 82 | 631 | 19 | 32 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1929 | 1934 | -5 | 2146 | 2136 | 10 | EV | ... | N18557 | EWR | CHS | 92 | 628 | 19 | 34 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1930 | 1930 | 0 | 2217 | 2231 | -14 | DL | ... | N365NB | LGA | TPA | 133 | 1010 | 19 | 30 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1930 | 1940 | -10 | 2120 | 2125 | -5 | MQ | ... | N839MQ | JFK | RDU | 70 | 427 | 19 | 40 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1931 | 1905 | 26 | 2049 | 2033 | 16 | EV | ... | N748EV | LGA | ROC | 41 | 254 | 19 | 5 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1932 | 1935 | -3 | 2221 | 2232 | -11 | DL | ... | N927DA | LGA | MCO | 132 | 950 | 19 | 35 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1934 | 1930 | 4 | 2236 | 2250 | -14 | AA | ... | N3EXAA | JFK | SEA | 340 | 2422 | 19 | 30 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1934 | 1935 | -1 | 2238 | 2250 | -12 | AA | ... | N3JFAA | LGA | MIA | 150 | 1096 | 19 | 35 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1935 | 1935 | 0 | 2211 | 2229 | -18 | B6 | ... | N589JB | JFK | TPA | 132 | 1005 | 19 | 35 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1935 | 1930 | 5 | 2230 | 2233 | -3 | UA | ... | N38446 | EWR | TPA | 132 | 997 | 19 | 30 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1936 | 1910 | 26 | 2204 | 2203 | 1 | DL | ... | N905DL | JFK | MCO | 126 | 944 | 19 | 10 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1939 | 1940 | -1 | 2041 | 2100 | -19 | EV | ... | N829AS | JFK | IAD | 42 | 228 | 19 | 40 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1939 | 1950 | -11 | 2129 | 2140 | -11 | MQ | ... | N735MQ | LGA | CMH | 73 | 479 | 19 | 50 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1940 | 1900 | 40 | 2228 | 2232 | -4 | DL | ... | N723TW | JFK | SFO | 323 | 2586 | 19 | 0 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1944 | 1950 | -6 | 2208 | 2215 | -7 | MQ | ... | N507MQ | LGA | ATL | 100 | 762 | 19 | 50 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1953 | 2000 | -7 | 2119 | 2132 | -13 | UA | ... | N853UA | LGA | ORD | 107 | 733 | 20 | 0 | 2013-09-30 20:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1955 | 2000 | -5 | 2219 | 2230 | -11 | DL | ... | N992DL | LGA | ATL | 99 | 762 | 20 | 0 | 2013-09-30 20:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1955 | 1935 | 20 | 2141 | 2159 | -18 | 9E | ... | N928XJ | JFK | CVG | 86 | 589 | 19 | 35 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1955 | 1942 | 13 | 2300 | 2250 | 10 | B6 | ... | N623JB | LGA | FLL | 141 | 1076 | 19 | 42 | 2013-09-30 19:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1955 | 2000 | -5 | 2112 | 2114 | -2 | US | ... | N957UW | LGA | BOS | 35 | 184 | 20 | 0 | 2013-09-30 20:00:00 | TRUE | TRUE |
2013 | 9 | 30 | 1958 | 2005 | -7 | 2119 | 2130 | -11 | MQ | ... | N511MQ | EWR | ORD | 102 | 719 | 20 | 5 | 2013-09-30 20:00:00 | TRUE | TRUE |
Floating point comparision
x <- c(1 / 49 * 49, sqrt(2) ^ 2)
x
<ol class=list-inline><li>1</li><li>2</li></ol>
x == c(1, 2)
<ol class=list-inline><li>FALSE</li><li>FALSE</li></ol>
print(x, digits = 16)
[1] 0.9999999999999999 2.0000000000000004
near(x, c(1, 2))
<ol class=list-inline><li>TRUE</li><li>TRUE</li></ol>
Missing values
NA > 5
10 == NA
NA == NA
<NA>
<NA>
<NA>
# so this code doesn't work
flights |>
filter(dep_time == NA)
year | month | day | dep_time | sched_dep_time | dep_delay | arr_time | sched_arr_time | arr_delay | carrier | flight | tailnum | origin | dest | air_time | distance | hour | minute | time_hour |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<int> | <int> | <int> | <int> | <int> | <dbl> | <int> | <int> | <dbl> | <chr> | <int> | <chr> | <chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dttm> |
is.na()
flights |>
filter(is.na(dep_time))
year | month | day | dep_time | sched_dep_time | dep_delay | arr_time | sched_arr_time | arr_delay | carrier | flight | tailnum | origin | dest | air_time | distance | hour | minute | time_hour |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<int> | <int> | <int> | <int> | <int> | <dbl> | <int> | <int> | <dbl> | <chr> | <int> | <chr> | <chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dttm> |
2013 | 1 | 1 | NA | 1630 | NA | NA | 1815 | NA | EV | 4308 | N18120 | EWR | RDU | NA | 416 | 16 | 30 | 2013-01-01 16:00:00 |
2013 | 1 | 1 | NA | 1935 | NA | NA | 2240 | NA | AA | 791 | N3EHAA | LGA | DFW | NA | 1389 | 19 | 35 | 2013-01-01 19:00:00 |
2013 | 1 | 1 | NA | 1500 | NA | NA | 1825 | NA | AA | 1925 | N3EVAA | LGA | MIA | NA | 1096 | 15 | 0 | 2013-01-01 15:00:00 |
2013 | 1 | 1 | NA | 600 | NA | NA | 901 | NA | B6 | 125 | N618JB | JFK | FLL | NA | 1069 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 2 | NA | 1540 | NA | NA | 1747 | NA | EV | 4352 | N10575 | EWR | CVG | NA | 569 | 15 | 40 | 2013-01-02 15:00:00 |
2013 | 1 | 2 | NA | 1620 | NA | NA | 1746 | NA | EV | 4406 | N13949 | EWR | PIT | NA | 319 | 16 | 20 | 2013-01-02 16:00:00 |
2013 | 1 | 2 | NA | 1355 | NA | NA | 1459 | NA | EV | 4434 | N10575 | EWR | MHT | NA | 209 | 13 | 55 | 2013-01-02 13:00:00 |
2013 | 1 | 2 | NA | 1420 | NA | NA | 1644 | NA | EV | 4935 | N759EV | EWR | ATL | NA | 746 | 14 | 20 | 2013-01-02 14:00:00 |
2013 | 1 | 2 | NA | 1321 | NA | NA | 1536 | NA | EV | 3849 | N13550 | EWR | IND | NA | 645 | 13 | 21 | 2013-01-02 13:00:00 |
2013 | 1 | 2 | NA | 1545 | NA | NA | 1910 | NA | AA | 133 | NA | JFK | LAX | NA | 2475 | 15 | 45 | 2013-01-02 15:00:00 |
2013 | 1 | 2 | NA | 1330 | NA | NA | 1640 | NA | AA | 753 | N3FBAA | LGA | DFW | NA | 1389 | 13 | 30 | 2013-01-02 13:00:00 |
2013 | 1 | 2 | NA | 1601 | NA | NA | 1735 | NA | UA | 623 | NA | EWR | ORD | NA | 719 | 16 | 1 | 2013-01-02 16:00:00 |
2013 | 1 | 3 | NA | 645 | NA | NA | 757 | NA | EV | 4241 | N14972 | EWR | DCA | NA | 199 | 6 | 45 | 2013-01-03 06:00:00 |
2013 | 1 | 3 | NA | 1030 | NA | NA | 1210 | NA | AA | 321 | N487AA | LGA | ORD | NA | 733 | 10 | 30 | 2013-01-03 10:00:00 |
2013 | 1 | 3 | NA | 1125 | NA | NA | 1305 | NA | AA | 327 | N3AMAA | LGA | ORD | NA | 733 | 11 | 25 | 2013-01-03 11:00:00 |
2013 | 1 | 3 | NA | 835 | NA | NA | 1150 | NA | AA | 717 | N3GXAA | LGA | DFW | NA | 1389 | 8 | 35 | 2013-01-03 08:00:00 |
2013 | 1 | 3 | NA | 920 | NA | NA | 1245 | NA | AA | 721 | N201AA | LGA | DFW | NA | 1389 | 9 | 20 | 2013-01-03 09:00:00 |
2013 | 1 | 3 | NA | 1020 | NA | NA | 1330 | NA | AA | 731 | N3FVAA | LGA | DFW | NA | 1389 | 10 | 20 | 2013-01-03 10:00:00 |
2013 | 1 | 3 | NA | 1220 | NA | NA | 1415 | NA | AA | 1757 | N573AA | LGA | STL | NA | 888 | 12 | 20 | 2013-01-03 12:00:00 |
2013 | 1 | 3 | NA | 630 | NA | NA | 830 | NA | MQ | 4599 | N500MQ | LGA | MSP | NA | 1020 | 6 | 30 | 2013-01-03 06:00:00 |
2013 | 1 | 3 | NA | 857 | NA | NA | 1209 | NA | UA | 714 | NA | EWR | MIA | NA | 1085 | 8 | 57 | 2013-01-03 08:00:00 |
2013 | 1 | 3 | NA | 645 | NA | NA | 952 | NA | UA | 719 | NA | EWR | DFW | NA | 1372 | 6 | 45 | 2013-01-03 06:00:00 |
2013 | 1 | 4 | NA | 845 | NA | NA | 1015 | NA | 9E | 3405 | NA | JFK | DCA | NA | 213 | 8 | 45 | 2013-01-04 08:00:00 |
2013 | 1 | 4 | NA | 1830 | NA | NA | 2044 | NA | 9E | 3716 | NA | EWR | DTW | NA | 488 | 18 | 30 | 2013-01-04 18:00:00 |
2013 | 1 | 4 | NA | 920 | NA | NA | 1245 | NA | AA | 721 | N541AA | LGA | DFW | NA | 1389 | 9 | 20 | 2013-01-04 09:00:00 |
2013 | 1 | 4 | NA | 1245 | NA | NA | 1550 | NA | AA | 745 | N3BGAA | LGA | DFW | NA | 1389 | 12 | 45 | 2013-01-04 12:00:00 |
2013 | 1 | 4 | NA | 1430 | NA | NA | 1735 | NA | AA | 883 | N200AA | EWR | DFW | NA | 1372 | 14 | 30 | 2013-01-04 14:00:00 |
2013 | 1 | 4 | NA | 1530 | NA | NA | 1725 | NA | AA | 2223 | N569AA | LGA | STL | NA | 888 | 15 | 30 | 2013-01-04 15:00:00 |
2013 | 1 | 5 | NA | 1400 | NA | NA | 1518 | NA | EV | 5712 | N827AS | JFK | IAD | NA | 228 | 14 | 0 | 2013-01-05 14:00:00 |
2013 | 1 | 5 | NA | 840 | NA | NA | 1001 | NA | 9E | 3422 | NA | JFK | BOS | NA | 187 | 8 | 40 | 2013-01-05 08:00:00 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2013 | 9 | 24 | NA | 1625 | NA | NA | 1750 | NA | MQ | 3622 | N524MQ | LGA | BNA | NA | 764 | 16 | 25 | 2013-09-24 16:00:00 |
2013 | 9 | 25 | NA | 1259 | NA | NA | 1507 | NA | EV | 5207 | N615QX | LGA | CLT | NA | 544 | 12 | 59 | 2013-09-25 12:00:00 |
2013 | 9 | 25 | NA | 845 | NA | NA | 1018 | NA | EV | 5286 | N615QX | LGA | BTV | NA | 258 | 8 | 45 | 2013-09-25 08:00:00 |
2013 | 9 | 25 | NA | 1755 | NA | NA | 1932 | NA | EV | 5287 | N722EV | LGA | MSN | NA | 812 | 17 | 55 | 2013-09-25 17:00:00 |
2013 | 9 | 25 | NA | 600 | NA | NA | 716 | NA | EV | 5716 | N877AS | JFK | IAD | NA | 228 | 6 | 0 | 2013-09-25 06:00:00 |
2013 | 9 | 25 | NA | 836 | NA | NA | 944 | NA | B6 | 2280 | N258JB | EWR | BOS | NA | 200 | 8 | 36 | 2013-09-25 08:00:00 |
2013 | 9 | 25 | NA | 1300 | NA | NA | 1409 | NA | US | 2148 | NA | LGA | BOS | NA | 184 | 13 | 0 | 2013-09-25 13:00:00 |
2013 | 9 | 25 | NA | 1900 | NA | NA | 2014 | NA | US | 2160 | NA | LGA | BOS | NA | 184 | 19 | 0 | 2013-09-25 19:00:00 |
2013 | 9 | 25 | NA | 1300 | NA | NA | 1450 | NA | MQ | 3388 | N817MQ | LGA | CMH | NA | 479 | 13 | 0 | 2013-09-25 13:00:00 |
2013 | 9 | 25 | NA | 1655 | NA | NA | 1840 | NA | MQ | 3411 | N735MQ | LGA | RDU | NA | 431 | 16 | 55 | 2013-09-25 16:00:00 |
2013 | 9 | 25 | NA | 1559 | NA | NA | 1719 | NA | MQ | 3748 | N530MQ | EWR | ORD | NA | 719 | 15 | 59 | 2013-09-25 15:00:00 |
2013 | 9 | 26 | NA | 915 | NA | NA | 1141 | NA | EV | 5109 | N748EV | LGA | CHS | NA | 641 | 9 | 15 | 2013-09-26 09:00:00 |
2013 | 9 | 26 | NA | 1400 | NA | NA | 1512 | NA | US | 2183 | NA | LGA | DCA | NA | 214 | 14 | 0 | 2013-09-26 14:00:00 |
2013 | 9 | 26 | NA | 1240 | NA | NA | 1525 | NA | WN | 4720 | N691WN | EWR | HOU | NA | 1411 | 12 | 40 | 2013-09-26 12:00:00 |
2013 | 9 | 27 | NA | 600 | NA | NA | 730 | NA | AA | 301 | N584AA | LGA | ORD | NA | 733 | 6 | 0 | 2013-09-27 06:00:00 |
2013 | 9 | 27 | NA | 2100 | NA | NA | 2211 | NA | US | 2164 | NA | LGA | BOS | NA | 184 | 21 | 0 | 2013-09-27 21:00:00 |
2013 | 9 | 27 | NA | 1329 | NA | NA | 1444 | NA | MQ | 3760 | N505MQ | EWR | ORD | NA | 719 | 13 | 29 | 2013-09-27 13:00:00 |
2013 | 9 | 27 | NA | 1600 | NA | NA | 1739 | NA | UA | 269 | NA | LGA | ORD | NA | 733 | 16 | 0 | 2013-09-27 16:00:00 |
2013 | 9 | 28 | NA | 1803 | NA | NA | 1927 | NA | EV | 5563 | N724EV | LGA | BTV | NA | 258 | 18 | 3 | 2013-09-28 18:00:00 |
2013 | 9 | 28 | NA | 910 | NA | NA | 1220 | NA | AA | 1 | N320AA | JFK | LAX | NA | 2475 | 9 | 10 | 2013-09-28 09:00:00 |
2013 | 9 | 28 | NA | 1635 | NA | NA | 1827 | NA | US | 581 | NA | EWR | CLT | NA | 529 | 16 | 35 | 2013-09-28 16:00:00 |
2013 | 9 | 29 | NA | 2054 | NA | NA | 2302 | NA | EV | 4536 | N13988 | EWR | CVG | NA | 569 | 20 | 54 | 2013-09-29 20:00:00 |
2013 | 9 | 29 | NA | 1830 | NA | NA | 2010 | NA | MQ | 3134 | N508MQ | EWR | ORD | NA | 719 | 18 | 30 | 2013-09-29 18:00:00 |
2013 | 9 | 29 | NA | 700 | NA | NA | 833 | NA | UA | 331 | NA | LGA | ORD | NA | 733 | 7 | 0 | 2013-09-29 07:00:00 |
2013 | 9 | 30 | NA | 1842 | NA | NA | 2019 | NA | EV | 5274 | N740EV | LGA | BNA | NA | 764 | 18 | 42 | 2013-09-30 18:00:00 |
2013 | 9 | 30 | NA | 1455 | NA | NA | 1634 | NA | 9E | 3393 | NA | JFK | DCA | NA | 213 | 14 | 55 | 2013-09-30 14:00:00 |
2013 | 9 | 30 | NA | 2200 | NA | NA | 2312 | NA | 9E | 3525 | NA | LGA | SYR | NA | 198 | 22 | 0 | 2013-09-30 22:00:00 |
2013 | 9 | 30 | NA | 1210 | NA | NA | 1330 | NA | MQ | 3461 | N535MQ | LGA | BNA | NA | 764 | 12 | 10 | 2013-09-30 12:00:00 |
2013 | 9 | 30 | NA | 1159 | NA | NA | 1344 | NA | MQ | 3572 | N511MQ | LGA | CLE | NA | 419 | 11 | 59 | 2013-09-30 11:00:00 |
2013 | 9 | 30 | NA | 840 | NA | NA | 1020 | NA | MQ | 3531 | N839MQ | LGA | RDU | NA | 431 | 8 | 40 | 2013-09-30 08:00:00 |
flights |>
filter(month == 1, day == 1) |>
arrange(desc(is.na(dep_time)), dep_time)
year | month | day | dep_time | sched_dep_time | dep_delay | arr_time | sched_arr_time | arr_delay | carrier | flight | tailnum | origin | dest | air_time | distance | hour | minute | time_hour |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<int> | <int> | <int> | <int> | <int> | <dbl> | <int> | <int> | <dbl> | <chr> | <int> | <chr> | <chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dttm> |
2013 | 1 | 1 | NA | 1630 | NA | NA | 1815 | NA | EV | 4308 | N18120 | EWR | RDU | NA | 416 | 16 | 30 | 2013-01-01 16:00:00 |
2013 | 1 | 1 | NA | 1935 | NA | NA | 2240 | NA | AA | 791 | N3EHAA | LGA | DFW | NA | 1389 | 19 | 35 | 2013-01-01 19:00:00 |
2013 | 1 | 1 | NA | 1500 | NA | NA | 1825 | NA | AA | 1925 | N3EVAA | LGA | MIA | NA | 1096 | 15 | 0 | 2013-01-01 15:00:00 |
2013 | 1 | 1 | NA | 600 | NA | NA | 901 | NA | B6 | 125 | N618JB | JFK | FLL | NA | 1069 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 517 | 515 | 2 | 830 | 819 | 11 | UA | 1545 | N14228 | EWR | IAH | 227 | 1400 | 5 | 15 | 2013-01-01 05:00:00 |
2013 | 1 | 1 | 533 | 529 | 4 | 850 | 830 | 20 | UA | 1714 | N24211 | LGA | IAH | 227 | 1416 | 5 | 29 | 2013-01-01 05:00:00 |
2013 | 1 | 1 | 542 | 540 | 2 | 923 | 850 | 33 | AA | 1141 | N619AA | JFK | MIA | 160 | 1089 | 5 | 40 | 2013-01-01 05:00:00 |
2013 | 1 | 1 | 544 | 545 | -1 | 1004 | 1022 | -18 | B6 | 725 | N804JB | JFK | BQN | 183 | 1576 | 5 | 45 | 2013-01-01 05:00:00 |
2013 | 1 | 1 | 554 | 600 | -6 | 812 | 837 | -25 | DL | 461 | N668DN | LGA | ATL | 116 | 762 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 554 | 558 | -4 | 740 | 728 | 12 | UA | 1696 | N39463 | EWR | ORD | 150 | 719 | 5 | 58 | 2013-01-01 05:00:00 |
2013 | 1 | 1 | 555 | 600 | -5 | 913 | 854 | 19 | B6 | 507 | N516JB | EWR | FLL | 158 | 1065 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 557 | 600 | -3 | 709 | 723 | -14 | EV | 5708 | N829AS | LGA | IAD | 53 | 229 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 557 | 600 | -3 | 838 | 846 | -8 | B6 | 79 | N593JB | JFK | MCO | 140 | 944 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 558 | 600 | -2 | 753 | 745 | 8 | AA | 301 | N3ALAA | LGA | ORD | 138 | 733 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 558 | 600 | -2 | 849 | 851 | -2 | B6 | 49 | N793JB | JFK | PBI | 149 | 1028 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 558 | 600 | -2 | 853 | 856 | -3 | B6 | 71 | N657JB | JFK | TPA | 158 | 1005 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 558 | 600 | -2 | 924 | 917 | 7 | UA | 194 | N29129 | JFK | LAX | 345 | 2475 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 558 | 600 | -2 | 923 | 937 | -14 | UA | 1124 | N53441 | EWR | SFO | 361 | 2565 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 559 | 600 | -1 | 941 | 910 | 31 | AA | 707 | N3DUAA | LGA | DFW | 257 | 1389 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 559 | 559 | 0 | 702 | 706 | -4 | B6 | 1806 | N708JB | JFK | BOS | 44 | 187 | 5 | 59 | 2013-01-01 05:00:00 |
2013 | 1 | 1 | 559 | 600 | -1 | 854 | 902 | -8 | UA | 1187 | N76515 | EWR | LAS | 337 | 2227 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 600 | 600 | 0 | 851 | 858 | -7 | B6 | 371 | N595JB | LGA | FLL | 152 | 1076 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 600 | 600 | 0 | 837 | 825 | 12 | MQ | 4650 | N542MQ | LGA | ATL | 134 | 762 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 601 | 600 | 1 | 844 | 850 | -6 | B6 | 343 | N644JB | EWR | PBI | 147 | 1023 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 602 | 610 | -8 | 812 | 820 | -8 | DL | 1919 | N971DL | LGA | MSP | 170 | 1020 | 6 | 10 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 602 | 605 | -3 | 821 | 805 | 16 | MQ | 4401 | N730MQ | LGA | DTW | 105 | 502 | 6 | 5 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 606 | 610 | -4 | 858 | 910 | -12 | AA | 1895 | N633AA | EWR | MIA | 152 | 1085 | 6 | 10 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 606 | 610 | -4 | 837 | 845 | -8 | DL | 1743 | N3739P | JFK | ATL | 128 | 760 | 6 | 10 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 607 | 607 | 0 | 858 | 915 | -17 | UA | 1077 | N53442 | EWR | MIA | 157 | 1085 | 6 | 7 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 608 | 600 | 8 | 807 | 735 | 32 | MQ | 3768 | N9EAMQ | EWR | ORD | 139 | 719 | 6 | 0 | 2013-01-01 06:00:00 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2013 | 1 | 1 | 2128 | 2125 | 3 | 2243 | 2240 | 3 | MQ | 4449 | N810MQ | JFK | DCA | 54 | 213 | 21 | 25 | 2013-01-01 21:00:00 |
2013 | 1 | 1 | 2129 | 2120 | 9 | 2342 | 2351 | -9 | B6 | 97 | N625JB | JFK | DEN | 223 | 1626 | 21 | 20 | 2013-01-01 21:00:00 |
2013 | 1 | 1 | 2134 | 2045 | 49 | 20 | 2352 | 28 | UA | 1106 | N27733 | EWR | FLL | 152 | 1065 | 20 | 45 | 2013-01-01 20:00:00 |
2013 | 1 | 1 | 2136 | 2145 | -9 | 25 | 39 | -14 | B6 | 515 | N198JB | EWR | FLL | 154 | 1065 | 21 | 45 | 2013-01-01 21:00:00 |
2013 | 1 | 1 | 2140 | 2135 | 5 | 210 | 224 | -14 | B6 | 701 | N284JB | JFK | SJU | 189 | 1598 | 21 | 35 | 2013-01-01 21:00:00 |
2013 | 1 | 1 | 2157 | 2155 | 2 | 43 | 41 | 2 | B6 | 43 | N537JB | JFK | MCO | 140 | 944 | 21 | 55 | 2013-01-01 21:00:00 |
2013 | 1 | 1 | 2158 | 2200 | -2 | 2254 | 2307 | -13 | EV | 4103 | N14998 | EWR | BWI | 36 | 169 | 22 | 0 | 2013-01-01 22:00:00 |
2013 | 1 | 1 | 2205 | 1720 | 285 | 46 | 2040 | 246 | AA | 1999 | N5DNAA | EWR | MIA | 146 | 1085 | 17 | 20 | 2013-01-01 17:00:00 |
2013 | 1 | 1 | 2209 | 2145 | 24 | 58 | 37 | 21 | B6 | 35 | N608JB | JFK | PBI | 143 | 1028 | 21 | 45 | 2013-01-01 21:00:00 |
2013 | 1 | 1 | 2209 | 2155 | 14 | 2400 | 2337 | 23 | B6 | 1109 | N216JB | JFK | RDU | 86 | 427 | 21 | 55 | 2013-01-01 21:00:00 |
2013 | 1 | 1 | 2211 | 2145 | 26 | 2339 | 2311 | 28 | B6 | 104 | N228JB | JFK | BUF | 64 | 301 | 21 | 45 | 2013-01-01 21:00:00 |
2013 | 1 | 1 | 2217 | 2229 | -12 | 249 | 315 | -26 | B6 | 713 | N547JB | JFK | SJU | 191 | 1598 | 22 | 29 | 2013-01-01 22:00:00 |
2013 | 1 | 1 | 2217 | 2130 | 47 | 140 | 27 | 73 | B6 | 21 | N516JB | JFK | TPA | 163 | 1005 | 21 | 30 | 2013-01-01 21:00:00 |
2013 | 1 | 1 | 2221 | 2000 | 141 | 2331 | 2124 | 127 | EV | 4462 | N13566 | EWR | BUF | 56 | 282 | 20 | 0 | 2013-01-01 20:00:00 |
2013 | 1 | 1 | 2224 | 2200 | 24 | 2324 | 2316 | 8 | EV | 4206 | N16561 | EWR | PWM | 47 | 284 | 22 | 0 | 2013-01-01 22:00:00 |
2013 | 1 | 1 | 2229 | 2159 | 30 | 149 | 100 | 49 | B6 | 11 | N531JB | JFK | FLL | 153 | 1069 | 21 | 59 | 2013-01-01 21:00:00 |
2013 | 1 | 1 | 2240 | 2245 | -5 | 2340 | 2356 | -16 | B6 | 608 | N279JB | JFK | PWM | 44 | 273 | 22 | 45 | 2013-01-01 22:00:00 |
2013 | 1 | 1 | 2250 | 2255 | -5 | 2352 | 2359 | -7 | B6 | 1018 | N521JB | JFK | BOS | 37 | 187 | 22 | 55 | 2013-01-01 22:00:00 |
2013 | 1 | 1 | 2302 | 2200 | 62 | 2342 | 2253 | 49 | EV | 4276 | N13903 | EWR | BDL | 24 | 116 | 22 | 0 | 2013-01-01 22:00:00 |
2013 | 1 | 1 | 2306 | 2245 | 21 | 28 | 5 | 23 | B6 | 30 | N281JB | JFK | ROC | 59 | 264 | 22 | 45 | 2013-01-01 22:00:00 |
2013 | 1 | 1 | 2307 | 2245 | 22 | 32 | 2357 | 35 | B6 | 128 | N178JB | JFK | BTV | 59 | 266 | 22 | 45 | 2013-01-01 22:00:00 |
2013 | 1 | 1 | 2310 | 2255 | 15 | 24 | 15 | 9 | B6 | 112 | N646JB | JFK | BUF | 57 | 301 | 22 | 55 | 2013-01-01 22:00:00 |
2013 | 1 | 1 | 2312 | 2000 | 192 | 21 | 2110 | 191 | EV | 4312 | N13958 | EWR | DCA | 44 | 199 | 20 | 0 | 2013-01-01 20:00:00 |
2013 | 1 | 1 | 2323 | 2200 | 83 | 22 | 2313 | 69 | EV | 4257 | N13538 | EWR | BTV | 44 | 266 | 22 | 0 | 2013-01-01 22:00:00 |
2013 | 1 | 1 | 2326 | 2130 | 116 | 131 | 18 | 73 | B6 | 199 | N594JB | JFK | LAS | 290 | 2248 | 21 | 30 | 2013-01-01 21:00:00 |
2013 | 1 | 1 | 2327 | 2250 | 37 | 32 | 2359 | 33 | B6 | 22 | N639JB | JFK | SYR | 45 | 209 | 22 | 50 | 2013-01-01 22:00:00 |
2013 | 1 | 1 | 2343 | 1724 | 379 | 314 | 1938 | 456 | EV | 4321 | N21197 | EWR | MCI | 222 | 1092 | 17 | 24 | 2013-01-01 17:00:00 |
2013 | 1 | 1 | 2353 | 2359 | -6 | 425 | 445 | -20 | B6 | 739 | N591JB | JFK | PSE | 195 | 1617 | 23 | 59 | 2013-01-01 23:00:00 |
2013 | 1 | 1 | 2353 | 2359 | -6 | 418 | 442 | -24 | B6 | 707 | N794JB | JFK | SJU | 185 | 1598 | 23 | 59 | 2013-01-01 23:00:00 |
2013 | 1 | 1 | 2356 | 2359 | -3 | 425 | 437 | -12 | B6 | 727 | N588JB | JFK | BQN | 186 | 1576 | 23 | 59 | 2013-01-01 23:00:00 |
Exercises:
- Use mutate(), is.na(), and count() together to describe how the missing values in dep_time, sched_dep_time and dep_delay are connected.
12.3 Boolean Algebra
!, &, xor, |
df <- tibble(x = c(TRUE, FALSE, NA))
df |>
mutate(
and = x & NA,
or = x | NA
)
x | and | or |
---|---|---|
<lgl> | <lgl> | <lgl> |
TRUE | NA | TRUE |
FALSE | FALSE | NA |
NA | NA | NA |
flights |>
filter(month %in% c(11, 12))
year | month | day | dep_time | sched_dep_time | dep_delay | arr_time | sched_arr_time | arr_delay | carrier | flight | tailnum | origin | dest | air_time | distance | hour | minute | time_hour |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<int> | <int> | <int> | <int> | <int> | <dbl> | <int> | <int> | <dbl> | <chr> | <int> | <chr> | <chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dttm> |
2013 | 11 | 1 | 5 | 2359 | 6 | 352 | 345 | 7 | B6 | 745 | N568JB | JFK | PSE | 205 | 1617 | 23 | 59 | 2013-11-01 23:00:00 |
2013 | 11 | 1 | 35 | 2250 | 105 | 123 | 2356 | 87 | B6 | 1816 | N353JB | JFK | SYR | 36 | 209 | 22 | 50 | 2013-11-01 22:00:00 |
2013 | 11 | 1 | 455 | 500 | -5 | 641 | 651 | -10 | US | 1895 | N192UW | EWR | CLT | 88 | 529 | 5 | 0 | 2013-11-01 05:00:00 |
2013 | 11 | 1 | 539 | 545 | -6 | 856 | 827 | 29 | UA | 1714 | N38727 | LGA | IAH | 229 | 1416 | 5 | 45 | 2013-11-01 05:00:00 |
2013 | 11 | 1 | 542 | 545 | -3 | 831 | 855 | -24 | AA | 2243 | N5CLAA | JFK | MIA | 147 | 1089 | 5 | 45 | 2013-11-01 05:00:00 |
2013 | 11 | 1 | 549 | 600 | -11 | 912 | 923 | -11 | UA | 303 | N595UA | JFK | SFO | 359 | 2586 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 550 | 600 | -10 | 705 | 659 | 6 | US | 2167 | N748UW | LGA | DCA | 57 | 214 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 554 | 600 | -6 | 659 | 701 | -2 | US | 2134 | N742PS | LGA | BOS | 40 | 184 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 554 | 600 | -6 | 826 | 827 | -1 | DL | 563 | N912DE | LGA | ATL | 126 | 762 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 554 | 600 | -6 | 749 | 751 | -2 | DL | 731 | N315NB | LGA | DTW | 93 | 502 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 555 | 600 | -5 | 847 | 854 | -7 | B6 | 605 | N640JB | EWR | FLL | 149 | 1065 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 555 | 600 | -5 | 839 | 846 | -7 | B6 | 583 | N661JB | JFK | MCO | 136 | 944 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 555 | 600 | -5 | 929 | 943 | -14 | B6 | 1403 | N746JB | JFK | SJU | 196 | 1598 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 556 | 600 | -4 | 834 | 851 | -17 | UA | 407 | N834UA | EWR | TPA | 142 | 997 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 558 | 600 | -2 | 727 | 730 | -3 | UA | 279 | N459UA | EWR | ORD | 118 | 719 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 558 | 600 | -2 | 650 | 658 | -8 | US | 1909 | N950UW | LGA | PHL | 38 | 96 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 558 | 600 | -2 | 914 | 905 | 9 | AA | 1175 | N3GHAA | LGA | MIA | 155 | 1096 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 558 | 600 | -2 | 720 | 715 | 5 | WN | 464 | N390SW | EWR | MDW | 117 | 711 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 559 | 600 | -1 | 756 | 730 | 26 | AA | 301 | N4YHAA | LGA | ORD | 119 | 733 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 600 | 600 | 0 | 709 | 716 | -7 | EV | 5716 | N820AS | JFK | IAD | 49 | 228 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 600 | 600 | 0 | 725 | 721 | 4 | UA | 1198 | N23707 | LGA | ORD | 121 | 733 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 601 | 600 | 1 | 853 | 856 | -3 | B6 | 371 | N570JB | LGA | FLL | 150 | 1076 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 601 | 610 | -9 | 803 | 813 | -10 | DL | 1919 | N922DL | LGA | MSP | 153 | 1020 | 6 | 10 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 602 | 600 | 2 | 843 | 815 | 28 | FL | 345 | N353AT | LGA | ATL | 131 | 762 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 603 | 600 | 3 | 717 | 711 | 6 | EV | 4533 | N11109 | EWR | BUF | 53 | 282 | 6 | 0 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 604 | 610 | -6 | 855 | 855 | 0 | AA | 1103 | N3FTAA | LGA | DFW | 205 | 1389 | 6 | 10 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 606 | 615 | -9 | 746 | 750 | -4 | MQ | 3525 | N834MQ | LGA | RDU | 74 | 431 | 6 | 15 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 606 | 615 | -9 | 807 | 817 | -10 | US | 1899 | N199UW | JFK | CLT | 93 | 541 | 6 | 15 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 606 | 610 | -4 | 752 | 745 | 7 | WN | 2609 | N440LV | LGA | STL | 142 | 888 | 6 | 10 | 2013-11-01 06:00:00 |
2013 | 11 | 1 | 607 | 611 | -4 | 857 | 912 | -15 | B6 | 601 | N597JB | JFK | FLL | 149 | 1069 | 6 | 11 | 2013-11-01 06:00:00 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2013 | 12 | 31 | 2155 | 2039 | 76 | 253 | 2355 | NA | B6 | 1205 | N627JB | JFK | PDX | NA | 2454 | 20 | 39 | 2013-12-31 20:00:00 |
2013 | 12 | 31 | 2155 | 2150 | 5 | 110 | 51 | 19 | B6 | 1901 | N729JB | JFK | FLL | 164 | 1069 | 21 | 50 | 2013-12-31 21:00:00 |
2013 | 12 | 31 | 2159 | 2155 | 4 | 55 | 46 | 9 | B6 | 2053 | N593JB | JFK | PBI | 155 | 1028 | 21 | 55 | 2013-12-31 21:00:00 |
2013 | 12 | 31 | 2206 | 2110 | 56 | 44 | 2339 | 65 | B6 | 775 | N184JB | JFK | MSY | 195 | 1182 | 21 | 10 | 2013-12-31 21:00:00 |
2013 | 12 | 31 | 2211 | 2159 | 12 | 100 | 45 | 15 | B6 | 1183 | N715JB | JFK | MCO | 148 | 944 | 21 | 59 | 2013-12-31 21:00:00 |
2013 | 12 | 31 | 2218 | 2219 | -1 | 315 | 304 | 11 | B6 | 1203 | N625JB | JFK | SJU | 202 | 1598 | 22 | 19 | 2013-12-31 22:00:00 |
2013 | 12 | 31 | 2235 | 2245 | -10 | 2351 | 2355 | -4 | B6 | 234 | N355JB | JFK | BTV | 49 | 266 | 22 | 45 | 2013-12-31 22:00:00 |
2013 | 12 | 31 | 2245 | 2250 | -5 | 2359 | 2356 | 3 | B6 | 1816 | N318JB | JFK | SYR | 51 | 209 | 22 | 50 | 2013-12-31 22:00:00 |
2013 | 12 | 31 | 2310 | 2255 | 15 | 7 | 2356 | 11 | B6 | 718 | N279JB | JFK | BOS | 40 | 187 | 22 | 55 | 2013-12-31 22:00:00 |
2013 | 12 | 31 | 2321 | 2250 | 31 | 46 | 8 | 38 | B6 | 2002 | N179JB | JFK | BUF | 66 | 301 | 22 | 50 | 2013-12-31 22:00:00 |
2013 | 12 | 31 | 2328 | 2330 | -2 | 412 | 409 | 3 | B6 | 1389 | N651JB | EWR | SJU | 198 | 1608 | 23 | 30 | 2013-12-31 23:00:00 |
2013 | 12 | 31 | 2332 | 2245 | 47 | 58 | 3 | 55 | B6 | 486 | N334JB | JFK | ROC | 60 | 264 | 22 | 45 | 2013-12-31 22:00:00 |
2013 | 12 | 31 | 2355 | 2359 | -4 | 430 | 440 | -10 | B6 | 1503 | N509JB | JFK | SJU | 195 | 1598 | 23 | 59 | 2013-12-31 23:00:00 |
2013 | 12 | 31 | 2356 | 2359 | -3 | 436 | 445 | -9 | B6 | 745 | N665JB | JFK | PSE | 200 | 1617 | 23 | 59 | 2013-12-31 23:00:00 |
2013 | 12 | 31 | NA | 1520 | NA | NA | 1705 | NA | AA | 341 | N568AA | LGA | ORD | NA | 733 | 15 | 20 | 2013-12-31 15:00:00 |
2013 | 12 | 31 | NA | 2025 | NA | NA | 2205 | NA | AA | 371 | N482AA | LGA | ORD | NA | 733 | 20 | 25 | 2013-12-31 20:00:00 |
2013 | 12 | 31 | NA | 1932 | NA | NA | 2305 | NA | B6 | 161 | N516JB | JFK | SMF | NA | 2521 | 19 | 32 | 2013-12-31 19:00:00 |
2013 | 12 | 31 | NA | 1505 | NA | NA | 1725 | NA | EV | 4181 | N24103 | EWR | MCI | NA | 1092 | 15 | 5 | 2013-12-31 15:00:00 |
2013 | 12 | 31 | NA | 1000 | NA | NA | 1252 | NA | UA | 1124 | NA | EWR | EGE | NA | 1725 | 10 | 0 | 2013-12-31 10:00:00 |
2013 | 12 | 31 | NA | 840 | NA | NA | 1205 | NA | UA | 1151 | NA | EWR | SEA | NA | 2402 | 8 | 40 | 2013-12-31 08:00:00 |
2013 | 12 | 31 | NA | 754 | NA | NA | 1118 | NA | UA | 1455 | NA | EWR | LAX | NA | 2454 | 7 | 54 | 2013-12-31 07:00:00 |
2013 | 12 | 31 | NA | 2000 | NA | NA | 2146 | NA | UA | 1482 | NA | EWR | ORD | NA | 719 | 20 | 0 | 2013-12-31 20:00:00 |
2013 | 12 | 31 | NA | 1500 | NA | NA | 1817 | NA | UA | 1483 | NA | EWR | AUS | NA | 1504 | 15 | 0 | 2013-12-31 15:00:00 |
2013 | 12 | 31 | NA | 1430 | NA | NA | 1750 | NA | UA | 1493 | NA | EWR | LAX | NA | 2454 | 14 | 30 | 2013-12-31 14:00:00 |
2013 | 12 | 31 | NA | 855 | NA | NA | 1142 | NA | UA | 1506 | NA | EWR | JAC | NA | 1874 | 8 | 55 | 2013-12-31 08:00:00 |
2013 | 12 | 31 | NA | 705 | NA | NA | 931 | NA | UA | 1729 | NA | EWR | DEN | NA | 1605 | 7 | 5 | 2013-12-31 07:00:00 |
2013 | 12 | 31 | NA | 825 | NA | NA | 1029 | NA | US | 1831 | NA | JFK | CLT | NA | 541 | 8 | 25 | 2013-12-31 08:00:00 |
2013 | 12 | 31 | NA | 1615 | NA | NA | 1800 | NA | MQ | 3301 | N844MQ | LGA | RDU | NA | 431 | 16 | 15 | 2013-12-31 16:00:00 |
2013 | 12 | 31 | NA | 600 | NA | NA | 735 | NA | UA | 219 | NA | EWR | ORD | NA | 719 | 6 | 0 | 2013-12-31 06:00:00 |
2013 | 12 | 31 | NA | 830 | NA | NA | 1154 | NA | UA | 443 | NA | JFK | LAX | NA | 2475 | 8 | 30 | 2013-12-31 08:00:00 |
Exercises
- Find all flights where
arr_delay
is missing but dep_delay is not. Find all flights where neitherarr_time
norsched_arr_time
are missing, but arr_delay is. - How many flights have a missing
dep_time
? What other variables are missing in these rows? What might these rows represent? - Assuming that a missing dep_time implies that a flight is cancelled, look at the number of cancelled flights per day. Is there a pattern? Is there a connection between the proportion of cancelled flights and the average delay of non-cancelled flights?
12.4 Summarizes
Logical summarizes
Nummeric summaries of logical vectors
Logical subsetting
Exercises
12.5 Conditional transformations
if_else()
case_when()
Compatible types
Exercises
12.6 Summary
C13. Numbers
13.2 Making numbers
13.3 Counts
13.4 Numeric transformations
13.5 General transformations
13.6 Numeric summaries
13.7 Summary
C14. Strings
14.2 Creating a string
14.3 Creating many strings from data
14.4 Extracting data from strings
14.5 Letters
14.6 Non-English text
14.7 Summary
C15 Regular Expressions
library(tidyverse)
library(babynames)
15.2 Pattern basics
15.3 Key functions
15.4 Pattern details
15.5 Pattern control
15.6 Practice
15.7 Regular expressions in other places
15.8 Summary
C16. Factors
16.2 Factor basics
month_levels <- c(
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
)
x1 <- c("Dec", "Apr", "Jan", "Mar")
y1 <- factor(x1, levels = month_levels)
y1
sort(y1)
<ol class=list-inline><li>Dec</li><li>Apr</li><li>Jan</li><li>Mar</li></ol>
<ol class=list-inline><li>Jan</li><li>Mar</li><li>Apr</li><li>Dec</li></ol>
x2 <- c("Dec", "Apr", "Jam", "Mar")
y2 <- factor(x2, levels = month_levels)
y2
sort(y2)
<ol class=list-inline><li>Dec</li><li>Apr</li><li><NA></li><li>Mar</li></ol>
<ol class=list-inline><li>Mar</li><li>Apr</li><li>Dec</li></ol>
levels(y2)
<ol class=list-inline><li>‘Jan’</li><li>‘Feb’</li><li>‘Mar’</li><li>‘Apr’</li><li>‘May’</li><li>‘Jun’</li><li>‘Jul’</li><li>‘Aug’</li><li>‘Sep’</li><li>‘Oct’</li><li>‘Nov’</li><li>‘Dec’</li></ol>
library(readr)
csv <- "
month,value
Jan,12
Feb,56
Mar,12"
df <- read_csv(csv, col_types = cols(month = col_factor(month_levels)))
df$month
<ol class=list-inline><li>Jan</li><li>Feb</li><li>Mar</li></ol>
16.3 General Social Survey
library(tidyverse)
library(gridExtra)
gss_cat
year | marital | age | race | rincome | partyid | relig | denom | tvhours |
---|---|---|---|---|---|---|---|---|
<int> | <fct> | <int> | <fct> | <fct> | <fct> | <fct> | <fct> | <int> |
2000 | Never married | 26 | White | $8000 to 9999 | Ind,near rep | Protestant | Southern baptist | 12 |
2000 | Divorced | 48 | White | $8000 to 9999 | Not str republican | Protestant | Baptist-dk which | NA |
2000 | Widowed | 67 | White | Not applicable | Independent | Protestant | No denomination | 2 |
2000 | Never married | 39 | White | Not applicable | Ind,near rep | Orthodox-christian | Not applicable | 4 |
2000 | Divorced | 25 | White | Not applicable | Not str democrat | None | Not applicable | 1 |
2000 | Married | 25 | White | $20000 - 24999 | Strong democrat | Protestant | Southern baptist | NA |
2000 | Never married | 36 | White | $25000 or more | Not str republican | Christian | Not applicable | 3 |
2000 | Divorced | 44 | White | $7000 to 7999 | Ind,near dem | Protestant | Lutheran-mo synod | NA |
2000 | Married | 44 | White | $25000 or more | Not str democrat | Protestant | Other | 0 |
2000 | Married | 47 | White | $25000 or more | Strong republican | Protestant | Southern baptist | 3 |
2000 | Married | 53 | White | $25000 or more | Not str democrat | Protestant | Other | 2 |
2000 | Married | 52 | White | $25000 or more | Ind,near rep | None | Not applicable | NA |
2000 | Married | 52 | White | $25000 or more | Strong democrat | Protestant | Southern baptist | 1 |
2000 | Married | 51 | White | $25000 or more | Strong republican | Protestant | United methodist | NA |
2000 | Divorced | 52 | White | $25000 or more | Ind,near dem | None | Not applicable | 1 |
2000 | Married | 40 | Black | $25000 or more | Strong democrat | Protestant | Baptist-dk which | 7 |
2000 | Widowed | 77 | White | Not applicable | Strong republican | Jewish | Not applicable | NA |
2000 | Never married | 44 | White | $25000 or more | Independent | None | Not applicable | 3 |
2000 | Married | 40 | White | $10000 - 14999 | Not str democrat | Catholic | Not applicable | 3 |
2000 | Married | 45 | Black | Not applicable | Independent | Protestant | United methodist | NA |
2000 | Married | 48 | White | $25000 or more | Ind,near dem | Catholic | Not applicable | 1 |
2000 | Married | 49 | White | Refused | Strong republican | Protestant | United methodist | 2 |
2000 | Never married | 19 | White | Not applicable | Independent | None | Not applicable | 2 |
2000 | Widowed | 54 | White | $25000 or more | Ind,near rep | Christian | Not applicable | 1 |
2000 | Widowed | 82 | White | Not applicable | Not str democrat | Protestant | Other | 3 |
2000 | Widowed | 83 | White | Not applicable | Strong democrat | Protestant | Episcopal | NA |
2000 | Widowed | 89 | White | Not applicable | Not str democrat | Protestant | Other lutheran | 4 |
2000 | Widowed | 88 | White | Not applicable | Strong republican | Protestant | Afr meth ep zion | NA |
2000 | Divorced | 72 | White | Not applicable | Strong democrat | Protestant | Southern baptist | 7 |
2000 | Widowed | 82 | White | Not applicable | Independent | Protestant | Am bapt ch in usa | NA |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2014 | Divorced | 38 | White | $3000 to 3999 | Not str republican | Protestant | Other | 1 |
2014 | Widowed | 46 | White | $25000 or more | Strong democrat | None | Not applicable | 2 |
2014 | Married | 49 | White | Not applicable | Ind,near rep | Protestant | Other | 6 |
2014 | Never married | 34 | White | $25000 or more | Independent | Protestant | United methodist | 2 |
2014 | Married | 54 | White | Not applicable | Independent | Protestant | Other | NA |
2014 | Married | 34 | White | $15000 - 19999 | Ind,near dem | Buddhism | Not applicable | 1 |
2014 | Married | 69 | White | Not applicable | Ind,near dem | Jewish | Not applicable | 3 |
2014 | Divorced | 36 | White | Not applicable | Independent | None | Not applicable | 0 |
2014 | Married | 65 | White | $25000 or more | Not str democrat | None | Not applicable | 2 |
2014 | Married | 48 | White | $20000 - 24999 | Strong democrat | Protestant | Other | 0 |
2014 | Married | 38 | White | $10000 - 14999 | Not str democrat | Protestant | No denomination | 2 |
2014 | Never married | 30 | White | $4000 to 4999 | Ind,near dem | None | Not applicable | 2 |
2014 | Married | 48 | White | $8000 to 9999 | Not str republican | Catholic | Not applicable | 0 |
2014 | Divorced | 49 | White | $25000 or more | Ind,near rep | Other | Not applicable | 2 |
2014 | Married | 54 | White | $25000 or more | Ind,near dem | Protestant | Other | NA |
2014 | Married | 49 | White | $25000 or more | Not str republican | Catholic | Not applicable | NA |
2014 | Married | 53 | White | $25000 or more | Not str democrat | None | Not applicable | 0 |
2014 | Married | 52 | White | $25000 or more | Not str democrat | None | Not applicable | 1 |
2014 | Widowed | 82 | White | Not applicable | Strong democrat | Protestant | Other | 2 |
2014 | Married | 63 | White | Not applicable | Ind,near dem | No answer | No answer | 2 |
2014 | Divorced | 54 | White | $25000 or more | Ind,near rep | Catholic | Not applicable | 3 |
2014 | Married | 62 | White | $25000 or more | Ind,near rep | Protestant | Other | NA |
2014 | Never married | 40 | White | $1000 to 2999 | Not str republican | None | Not applicable | 2 |
2014 | Married | 33 | White | Not applicable | Independent | Christian | No denomination | 0 |
2014 | Widowed | 75 | White | Don't know | Strong republican | Protestant | Baptist-dk which | 4 |
2014 | Widowed | 89 | White | Not applicable | Not str republican | Protestant | United methodist | 3 |
2014 | Divorced | 56 | White | $25000 or more | Independent | None | Not applicable | 4 |
2014 | Never married | 24 | White | $10000 - 14999 | Ind,near dem | None | Not applicable | 4 |
2014 | Never married | 27 | White | $25000 or more | Not str democrat | Catholic | Not applicable | NA |
2014 | Widowed | 71 | White | $20000 - 24999 | Ind,near rep | Protestant | Other | 2 |
gss_cat |>
count(race)
race | n |
---|---|
<fct> | <int> |
Other | 1959 |
Black | 3129 |
White | 16395 |
Exercises
-
Explore the distribution of rincome (reported income). What makes the default bar chart hard to understand? How could you improve the plot?
-
What is the most common relig in this survey? What’s the most common partyid?
-
Which relig does denom (denomination) apply to? How can you find out with a table? How can you find out with a visualization?
16.4 Modifying factor order
relig_summary <- gss_cat |>
group_by(relig) |>
summarize(
tvhours = mean(tvhours, na.rm = TRUE),
n = n()
)
ggplot(relig_summary, aes(x = tvhours, y = relig)) +
geom_point()
ggplot(relig_summary, aes(x = tvhours, y = fct_reorder(relig, tvhours))) +
geom_point()
relig_summary |>
mutate(
relig = fct_reorder(relig, tvhours)
) |>
ggplot(aes(x = tvhours, y = relig)) +
geom_point()
rincome_summary <- gss_cat |>
group_by(rincome) |>
summarize(
age = mean(age, na.rm = TRUE),
n = n()
)
ggplot(rincome_summary, aes(x = age, y = fct_reorder(rincome, age))) +
geom_point()
ggplot(rincome_summary, aes(x = age, y = fct_relevel(rincome, "Not applicable"))) +
geom_point()
by_age <- gss_cat |>
filter(!is.na(age)) |>
count(age, marital) |>
group_by(age) |>
mutate(
prop = n / sum(n)
)
p1 <- ggplot(by_age, aes(x = age, y = prop, color = marital)) +
geom_line(linewidth = 1) +
scale_color_brewer(palette = "Set1")
p2 <- ggplot(by_age, aes(x = age, y = prop, color = fct_reorder2(marital, age, prop))) +
geom_line(linewidth = 1) +
scale_color_brewer(palette = "Set1") +
labs(color = "marital")
options(repr.plot.width = 20, repr.plot.height = 5)
grid.arrange(p1, p2, ncol=2)
gss_cat |>
mutate(marital = marital |> fct_infreq() |> fct_rev()) |>
ggplot(aes(x = marital)) +
geom_bar()
options(repr.plot.width = 10, repr.plot.height = 5)
-
There are some suspiciously high numbers in tvhours. Is the mean a good summary?
-
For each factor in gss_cat identify whether the order of the levels is arbitrary or principled.
-
Why did moving “Not applicable” to the front of the levels move it to the bottom of the plot?
16.5 Modifying factor levels
gss_cat |> count(partyid)
partyid | n |
---|---|
<fct> | <int> |
No answer | 154 |
Don't know | 1 |
Other party | 393 |
Strong republican | 2314 |
Not str republican | 3032 |
Ind,near rep | 1791 |
Independent | 4119 |
Ind,near dem | 2499 |
Not str democrat | 3690 |
Strong democrat | 3490 |
gss_cat |>
mutate(
partyid = fct_recode(partyid,
"Republican, strong" = "Strong republican",
"Republican, weak" = "Not str republican",
"Independent, near rep" = "Ind,near rep",
"Independent, near dem" = "Ind,near dem",
"Democrat, weak" = "Not str democrat",
"Democrat, strong" = "Strong democrat"
)
) |>
count(partyid)
partyid | n |
---|---|
<fct> | <int> |
No answer | 154 |
Don't know | 1 |
Other party | 393 |
Republican, strong | 2314 |
Republican, weak | 3032 |
Independent, near rep | 1791 |
Independent | 4119 |
Independent, near dem | 2499 |
Democrat, weak | 3690 |
Democrat, strong | 3490 |
gss_cat |>
mutate(
partyid = fct_recode(partyid,
"Republican, strong" = "Strong republican",
"Republican, weak" = "Not str republican",
"Independent, near rep" = "Ind,near rep",
"Independent, near dem" = "Ind,near dem",
"Democrat, weak" = "Not str democrat",
"Democrat, strong" = "Strong democrat",
"Other" = "No answer",
"Other" = "Don't know",
"Other" = "Other party"
)
)
year | marital | age | race | rincome | partyid | relig | denom | tvhours |
---|---|---|---|---|---|---|---|---|
<int> | <fct> | <int> | <fct> | <fct> | <fct> | <fct> | <fct> | <int> |
2000 | Never married | 26 | White | $8000 to 9999 | Independent, near rep | Protestant | Southern baptist | 12 |
2000 | Divorced | 48 | White | $8000 to 9999 | Republican, weak | Protestant | Baptist-dk which | NA |
2000 | Widowed | 67 | White | Not applicable | Independent | Protestant | No denomination | 2 |
2000 | Never married | 39 | White | Not applicable | Independent, near rep | Orthodox-christian | Not applicable | 4 |
2000 | Divorced | 25 | White | Not applicable | Democrat, weak | None | Not applicable | 1 |
2000 | Married | 25 | White | $20000 - 24999 | Democrat, strong | Protestant | Southern baptist | NA |
2000 | Never married | 36 | White | $25000 or more | Republican, weak | Christian | Not applicable | 3 |
2000 | Divorced | 44 | White | $7000 to 7999 | Independent, near dem | Protestant | Lutheran-mo synod | NA |
2000 | Married | 44 | White | $25000 or more | Democrat, weak | Protestant | Other | 0 |
2000 | Married | 47 | White | $25000 or more | Republican, strong | Protestant | Southern baptist | 3 |
2000 | Married | 53 | White | $25000 or more | Democrat, weak | Protestant | Other | 2 |
2000 | Married | 52 | White | $25000 or more | Independent, near rep | None | Not applicable | NA |
2000 | Married | 52 | White | $25000 or more | Democrat, strong | Protestant | Southern baptist | 1 |
2000 | Married | 51 | White | $25000 or more | Republican, strong | Protestant | United methodist | NA |
2000 | Divorced | 52 | White | $25000 or more | Independent, near dem | None | Not applicable | 1 |
2000 | Married | 40 | Black | $25000 or more | Democrat, strong | Protestant | Baptist-dk which | 7 |
2000 | Widowed | 77 | White | Not applicable | Republican, strong | Jewish | Not applicable | NA |
2000 | Never married | 44 | White | $25000 or more | Independent | None | Not applicable | 3 |
2000 | Married | 40 | White | $10000 - 14999 | Democrat, weak | Catholic | Not applicable | 3 |
2000 | Married | 45 | Black | Not applicable | Independent | Protestant | United methodist | NA |
2000 | Married | 48 | White | $25000 or more | Independent, near dem | Catholic | Not applicable | 1 |
2000 | Married | 49 | White | Refused | Republican, strong | Protestant | United methodist | 2 |
2000 | Never married | 19 | White | Not applicable | Independent | None | Not applicable | 2 |
2000 | Widowed | 54 | White | $25000 or more | Independent, near rep | Christian | Not applicable | 1 |
2000 | Widowed | 82 | White | Not applicable | Democrat, weak | Protestant | Other | 3 |
2000 | Widowed | 83 | White | Not applicable | Democrat, strong | Protestant | Episcopal | NA |
2000 | Widowed | 89 | White | Not applicable | Democrat, weak | Protestant | Other lutheran | 4 |
2000 | Widowed | 88 | White | Not applicable | Republican, strong | Protestant | Afr meth ep zion | NA |
2000 | Divorced | 72 | White | Not applicable | Democrat, strong | Protestant | Southern baptist | 7 |
2000 | Widowed | 82 | White | Not applicable | Independent | Protestant | Am bapt ch in usa | NA |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2014 | Divorced | 38 | White | $3000 to 3999 | Republican, weak | Protestant | Other | 1 |
2014 | Widowed | 46 | White | $25000 or more | Democrat, strong | None | Not applicable | 2 |
2014 | Married | 49 | White | Not applicable | Independent, near rep | Protestant | Other | 6 |
2014 | Never married | 34 | White | $25000 or more | Independent | Protestant | United methodist | 2 |
2014 | Married | 54 | White | Not applicable | Independent | Protestant | Other | NA |
2014 | Married | 34 | White | $15000 - 19999 | Independent, near dem | Buddhism | Not applicable | 1 |
2014 | Married | 69 | White | Not applicable | Independent, near dem | Jewish | Not applicable | 3 |
2014 | Divorced | 36 | White | Not applicable | Independent | None | Not applicable | 0 |
2014 | Married | 65 | White | $25000 or more | Democrat, weak | None | Not applicable | 2 |
2014 | Married | 48 | White | $20000 - 24999 | Democrat, strong | Protestant | Other | 0 |
2014 | Married | 38 | White | $10000 - 14999 | Democrat, weak | Protestant | No denomination | 2 |
2014 | Never married | 30 | White | $4000 to 4999 | Independent, near dem | None | Not applicable | 2 |
2014 | Married | 48 | White | $8000 to 9999 | Republican, weak | Catholic | Not applicable | 0 |
2014 | Divorced | 49 | White | $25000 or more | Independent, near rep | Other | Not applicable | 2 |
2014 | Married | 54 | White | $25000 or more | Independent, near dem | Protestant | Other | NA |
2014 | Married | 49 | White | $25000 or more | Republican, weak | Catholic | Not applicable | NA |
2014 | Married | 53 | White | $25000 or more | Democrat, weak | None | Not applicable | 0 |
2014 | Married | 52 | White | $25000 or more | Democrat, weak | None | Not applicable | 1 |
2014 | Widowed | 82 | White | Not applicable | Democrat, strong | Protestant | Other | 2 |
2014 | Married | 63 | White | Not applicable | Independent, near dem | No answer | No answer | 2 |
2014 | Divorced | 54 | White | $25000 or more | Independent, near rep | Catholic | Not applicable | 3 |
2014 | Married | 62 | White | $25000 or more | Independent, near rep | Protestant | Other | NA |
2014 | Never married | 40 | White | $1000 to 2999 | Republican, weak | None | Not applicable | 2 |
2014 | Married | 33 | White | Not applicable | Independent | Christian | No denomination | 0 |
2014 | Widowed | 75 | White | Don't know | Republican, strong | Protestant | Baptist-dk which | 4 |
2014 | Widowed | 89 | White | Not applicable | Republican, weak | Protestant | United methodist | 3 |
2014 | Divorced | 56 | White | $25000 or more | Independent | None | Not applicable | 4 |
2014 | Never married | 24 | White | $10000 - 14999 | Independent, near dem | None | Not applicable | 4 |
2014 | Never married | 27 | White | $25000 or more | Democrat, weak | Catholic | Not applicable | NA |
2014 | Widowed | 71 | White | $20000 - 24999 | Independent, near rep | Protestant | Other | 2 |
gss_cat |>
mutate(
partyid = fct_collapse(partyid,
"other" = c("No answer", "Don't know", "Other party"),
"rep" = c("Strong republican", "Not str republican"),
"ind" = c("Ind,near rep", "Independent", "Ind,near dem"),
"dem" = c("Not str democrat", "Strong democrat")
)
) |>
count(partyid)
partyid | n |
---|---|
<fct> | <int> |
other | 548 |
rep | 5346 |
ind | 8409 |
dem | 7180 |
gss_cat |>
mutate(relig = fct_lump_lowfreq(relig)) |>
count(relig)
relig | n |
---|---|
<fct> | <int> |
Protestant | 10846 |
Other | 10637 |
gss_cat |>
mutate(relig = fct_lump_n(relig, n = 10)) |>
count(relig, sort = TRUE)
relig | n |
---|---|
<fct> | <int> |
Protestant | 10846 |
Catholic | 5124 |
None | 3523 |
Christian | 689 |
Other | 458 |
Jewish | 388 |
Buddhism | 147 |
Inter-nondenominational | 109 |
Moslem/islam | 104 |
Orthodox-christian | 95 |
Exercises
-
How have the proportions of people identifying as Democrat, Republican, and Independent changed over time?
-
How could you collapse rincome into a small set of categories?
-
Notice there are 9 groups (excluding other) in the fct_lump example above. Why not 10? (Hint: type ?fct_lump, and find the default for the argument other_level is “Other”.)
16.6 Ordered factors
ordered(c("a", "b", "c"))
<ol class=list-inline><li>a</li><li>b</li><li>c</li></ol>
16.7 Summary
C17. Dates and Times
library(tidyverse)
library(nycflights13)
17.2 Creating date/times
Year:
%Y
4 digit year%y
2 digit year
Month:
%m
Number%b
Abbreviated name%B
Full name
Day:
%d
One or two digits%e
Two digits
Time:
%H
24-hour hour%I
12-hour hour%p
AM/PM%M
Minutes%S
Seconds%OS
Seconds with decimal component%Z
Time zone name%z
Offset from UTC
Others:
%.
Skip one non-digit%*
Skip any number of non-digits
today()
now()
[1] "2024-03-12 11:34:12 AEST"
During import
csv <- "
date
01/02/15
"
read_csv(csv, col_types = cols(date = col_date("%m/%d/%y")))
read_csv(csv, col_types = cols(date = col_date("%d/%m/%y")))
read_csv(csv, col_types = cols(date = col_date("%y/%m/%d")))
date |
---|
<date> |
2015-01-02 |
date |
---|
<date> |
2015-02-01 |
date |
---|
<date> |
2001-02-15 |
From strings
ymd("2017-01-31")
mdy("January 31st, 2017")
dmy("31-Jan-2017")
ymd("2017-01-31", tz = "UTC")
[1] "2017-01-31 UTC"
From individual components
make_date()
make_datetime()
flights |>
select(year, month, day, hour, minute) |>
mutate(departure = make_datetime(year, month, day, hour, minute))
year | month | day | hour | minute | departure |
---|---|---|---|---|---|
<int> | <int> | <int> | <dbl> | <dbl> | <dttm> |
2013 | 1 | 1 | 5 | 15 | 2013-01-01 05:15:00 |
2013 | 1 | 1 | 5 | 29 | 2013-01-01 05:29:00 |
2013 | 1 | 1 | 5 | 40 | 2013-01-01 05:40:00 |
2013 | 1 | 1 | 5 | 45 | 2013-01-01 05:45:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 5 | 58 | 2013-01-01 05:58:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 5 | 59 | 2013-01-01 05:59:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 10 | 2013-01-01 06:10:00 |
2013 | 1 | 1 | 6 | 5 | 2013-01-01 06:05:00 |
2013 | 1 | 1 | 6 | 10 | 2013-01-01 06:10:00 |
2013 | 1 | 1 | 6 | 10 | 2013-01-01 06:10:00 |
2013 | 1 | 1 | 6 | 7 | 2013-01-01 06:07:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 0 | 2013-01-01 06:00:00 |
2013 | 1 | 1 | 6 | 10 | 2013-01-01 06:10:00 |
2013 | 1 | 1 | 6 | 15 | 2013-01-01 06:15:00 |
2013 | 1 | 1 | 6 | 15 | 2013-01-01 06:15:00 |
... | ... | ... | ... | ... | ... |
2013 | 9 | 30 | 21 | 25 | 2013-09-30 21:25:00 |
2013 | 9 | 30 | 21 | 29 | 2013-09-30 21:29:00 |
2013 | 9 | 30 | 21 | 30 | 2013-09-30 21:30:00 |
2013 | 9 | 30 | 20 | 59 | 2013-09-30 20:59:00 |
2013 | 9 | 30 | 21 | 40 | 2013-09-30 21:40:00 |
2013 | 9 | 30 | 21 | 40 | 2013-09-30 21:40:00 |
2013 | 9 | 30 | 21 | 29 | 2013-09-30 21:29:00 |
2013 | 9 | 30 | 21 | 45 | 2013-09-30 21:45:00 |
2013 | 9 | 30 | 21 | 37 | 2013-09-30 21:37:00 |
2013 | 9 | 30 | 21 | 56 | 2013-09-30 21:56:00 |
2013 | 9 | 30 | 21 | 59 | 2013-09-30 21:59:00 |
2013 | 9 | 30 | 18 | 45 | 2013-09-30 18:45:00 |
2013 | 9 | 30 | 22 | 5 | 2013-09-30 22:05:00 |
2013 | 9 | 30 | 21 | 40 | 2013-09-30 21:40:00 |
2013 | 9 | 30 | 20 | 59 | 2013-09-30 20:59:00 |
2013 | 9 | 30 | 22 | 45 | 2013-09-30 22:45:00 |
2013 | 9 | 30 | 21 | 13 | 2013-09-30 21:13:00 |
2013 | 9 | 30 | 20 | 1 | 2013-09-30 20:01:00 |
2013 | 9 | 30 | 22 | 45 | 2013-09-30 22:45:00 |
2013 | 9 | 30 | 22 | 45 | 2013-09-30 22:45:00 |
2013 | 9 | 30 | 22 | 50 | 2013-09-30 22:50:00 |
2013 | 9 | 30 | 22 | 46 | 2013-09-30 22:46:00 |
2013 | 9 | 30 | 22 | 55 | 2013-09-30 22:55:00 |
2013 | 9 | 30 | 23 | 59 | 2013-09-30 23:59:00 |
2013 | 9 | 30 | 18 | 42 | 2013-09-30 18:42:00 |
2013 | 9 | 30 | 14 | 55 | 2013-09-30 14:55:00 |
2013 | 9 | 30 | 22 | 0 | 2013-09-30 22:00:00 |
2013 | 9 | 30 | 12 | 10 | 2013-09-30 12:10:00 |
2013 | 9 | 30 | 11 | 59 | 2013-09-30 11:59:00 |
2013 | 9 | 30 | 8 | 40 | 2013-09-30 08:40:00 |
make_datetime_100 <- function(year, month, day, time) {
make_datetime(year, month, day, time %/% 100, time %% 100)
}
flights_dt <- flights |>
filter(!is.na(dep_time), !is.na(arr_time)) |>
mutate(
dep_time = make_datetime_100(year, month, day, dep_time),
arr_time = make_datetime_100(year, month, day, arr_time),
sched_dep_time = make_datetime_100(year, month, day, sched_dep_time),
sched_arr_time = make_datetime_100(year, month, day, sched_arr_time)
) |>
select(origin, dest, ends_with("delay"), ends_with("time"))
flights_dt
origin | dest | dep_delay | arr_delay | dep_time | sched_dep_time | arr_time | sched_arr_time | air_time |
---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <dbl> | <dbl> | <dttm> | <dttm> | <dttm> | <dttm> | <dbl> |
EWR | IAH | 2 | 11 | 2013-01-01 05:17:00 | 2013-01-01 05:15:00 | 2013-01-01 08:30:00 | 2013-01-01 08:19:00 | 227 |
LGA | IAH | 4 | 20 | 2013-01-01 05:33:00 | 2013-01-01 05:29:00 | 2013-01-01 08:50:00 | 2013-01-01 08:30:00 | 227 |
JFK | MIA | 2 | 33 | 2013-01-01 05:42:00 | 2013-01-01 05:40:00 | 2013-01-01 09:23:00 | 2013-01-01 08:50:00 | 160 |
JFK | BQN | -1 | -18 | 2013-01-01 05:44:00 | 2013-01-01 05:45:00 | 2013-01-01 10:04:00 | 2013-01-01 10:22:00 | 183 |
LGA | ATL | -6 | -25 | 2013-01-01 05:54:00 | 2013-01-01 06:00:00 | 2013-01-01 08:12:00 | 2013-01-01 08:37:00 | 116 |
EWR | ORD | -4 | 12 | 2013-01-01 05:54:00 | 2013-01-01 05:58:00 | 2013-01-01 07:40:00 | 2013-01-01 07:28:00 | 150 |
EWR | FLL | -5 | 19 | 2013-01-01 05:55:00 | 2013-01-01 06:00:00 | 2013-01-01 09:13:00 | 2013-01-01 08:54:00 | 158 |
LGA | IAD | -3 | -14 | 2013-01-01 05:57:00 | 2013-01-01 06:00:00 | 2013-01-01 07:09:00 | 2013-01-01 07:23:00 | 53 |
JFK | MCO | -3 | -8 | 2013-01-01 05:57:00 | 2013-01-01 06:00:00 | 2013-01-01 08:38:00 | 2013-01-01 08:46:00 | 140 |
LGA | ORD | -2 | 8 | 2013-01-01 05:58:00 | 2013-01-01 06:00:00 | 2013-01-01 07:53:00 | 2013-01-01 07:45:00 | 138 |
JFK | PBI | -2 | -2 | 2013-01-01 05:58:00 | 2013-01-01 06:00:00 | 2013-01-01 08:49:00 | 2013-01-01 08:51:00 | 149 |
JFK | TPA | -2 | -3 | 2013-01-01 05:58:00 | 2013-01-01 06:00:00 | 2013-01-01 08:53:00 | 2013-01-01 08:56:00 | 158 |
JFK | LAX | -2 | 7 | 2013-01-01 05:58:00 | 2013-01-01 06:00:00 | 2013-01-01 09:24:00 | 2013-01-01 09:17:00 | 345 |
EWR | SFO | -2 | -14 | 2013-01-01 05:58:00 | 2013-01-01 06:00:00 | 2013-01-01 09:23:00 | 2013-01-01 09:37:00 | 361 |
LGA | DFW | -1 | 31 | 2013-01-01 05:59:00 | 2013-01-01 06:00:00 | 2013-01-01 09:41:00 | 2013-01-01 09:10:00 | 257 |
JFK | BOS | 0 | -4 | 2013-01-01 05:59:00 | 2013-01-01 05:59:00 | 2013-01-01 07:02:00 | 2013-01-01 07:06:00 | 44 |
EWR | LAS | -1 | -8 | 2013-01-01 05:59:00 | 2013-01-01 06:00:00 | 2013-01-01 08:54:00 | 2013-01-01 09:02:00 | 337 |
LGA | FLL | 0 | -7 | 2013-01-01 06:00:00 | 2013-01-01 06:00:00 | 2013-01-01 08:51:00 | 2013-01-01 08:58:00 | 152 |
LGA | ATL | 0 | 12 | 2013-01-01 06:00:00 | 2013-01-01 06:00:00 | 2013-01-01 08:37:00 | 2013-01-01 08:25:00 | 134 |
EWR | PBI | 1 | -6 | 2013-01-01 06:01:00 | 2013-01-01 06:00:00 | 2013-01-01 08:44:00 | 2013-01-01 08:50:00 | 147 |
LGA | MSP | -8 | -8 | 2013-01-01 06:02:00 | 2013-01-01 06:10:00 | 2013-01-01 08:12:00 | 2013-01-01 08:20:00 | 170 |
LGA | DTW | -3 | 16 | 2013-01-01 06:02:00 | 2013-01-01 06:05:00 | 2013-01-01 08:21:00 | 2013-01-01 08:05:00 | 105 |
EWR | MIA | -4 | -12 | 2013-01-01 06:06:00 | 2013-01-01 06:10:00 | 2013-01-01 08:58:00 | 2013-01-01 09:10:00 | 152 |
JFK | ATL | -4 | -8 | 2013-01-01 06:06:00 | 2013-01-01 06:10:00 | 2013-01-01 08:37:00 | 2013-01-01 08:45:00 | 128 |
EWR | MIA | 0 | -17 | 2013-01-01 06:07:00 | 2013-01-01 06:07:00 | 2013-01-01 08:58:00 | 2013-01-01 09:15:00 | 157 |
EWR | ORD | 8 | 32 | 2013-01-01 06:08:00 | 2013-01-01 06:00:00 | 2013-01-01 08:07:00 | 2013-01-01 07:35:00 | 139 |
JFK | SFO | 11 | 14 | 2013-01-01 06:11:00 | 2013-01-01 06:00:00 | 2013-01-01 09:45:00 | 2013-01-01 09:31:00 | 366 |
JFK | RSW | 3 | 4 | 2013-01-01 06:13:00 | 2013-01-01 06:10:00 | 2013-01-01 09:25:00 | 2013-01-01 09:21:00 | 175 |
JFK | SJU | 0 | -21 | 2013-01-01 06:15:00 | 2013-01-01 06:15:00 | 2013-01-01 10:39:00 | 2013-01-01 11:00:00 | 182 |
EWR | ATL | 0 | -9 | 2013-01-01 06:15:00 | 2013-01-01 06:15:00 | 2013-01-01 08:33:00 | 2013-01-01 08:42:00 | 120 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
LGA | DTW | 5 | -9 | 2013-09-30 21:15:00 | 2013-09-30 21:10:00 | 2013-09-30 22:46:00 | 2013-09-30 22:55:00 | 72 |
EWR | SDF | -8 | -20 | 2013-09-30 21:16:00 | 2013-09-30 21:24:00 | 2013-09-30 23:17:00 | 2013-09-30 23:37:00 | 94 |
EWR | MCI | 74 | 58 | 2013-09-30 21:19:00 | 2013-09-30 20:05:00 | 2013-09-30 23:10:00 | 2013-09-30 22:12:00 | 147 |
JFK | JAX | -1 | -24 | 2013-09-30 21:19:00 | 2013-09-30 21:20:00 | 2013-09-30 23:30:00 | 2013-09-30 23:54:00 | 113 |
JFK | LAX | 21 | -25 | 2013-09-30 21:21:00 | 2013-09-30 21:00:00 | 2013-09-30 23:49:00 | 2013-09-30 00:14:00 | 296 |
EWR | DCA | -5 | -11 | 2013-09-30 21:22:00 | 2013-09-30 21:27:00 | 2013-09-30 22:26:00 | 2013-09-30 22:37:00 | 35 |
LGA | CHO | -2 | -24 | 2013-09-30 21:23:00 | 2013-09-30 21:25:00 | 2013-09-30 22:23:00 | 2013-09-30 22:47:00 | 45 |
EWR | CLT | -2 | -9 | 2013-09-30 21:27:00 | 2013-09-30 21:29:00 | 2013-09-30 23:14:00 | 2013-09-30 23:23:00 | 72 |
JFK | DEN | -2 | -31 | 2013-09-30 21:28:00 | 2013-09-30 21:30:00 | 2013-09-30 23:28:00 | 2013-09-30 23:59:00 | 213 |
LGA | RIC | 30 | -2 | 2013-09-30 21:29:00 | 2013-09-30 20:59:00 | 2013-09-30 22:30:00 | 2013-09-30 22:32:00 | 45 |
JFK | DCA | -9 | -30 | 2013-09-30 21:31:00 | 2013-09-30 21:40:00 | 2013-09-30 22:25:00 | 2013-09-30 22:55:00 | 36 |
JFK | LAX | 0 | -30 | 2013-09-30 21:40:00 | 2013-09-30 21:40:00 | 2013-09-30 00:10:00 | 2013-09-30 00:40:00 | 298 |
EWR | PWM | 13 | 11 | 2013-09-30 21:42:00 | 2013-09-30 21:29:00 | 2013-09-30 22:50:00 | 2013-09-30 22:39:00 | 47 |
JFK | SJU | 0 | -25 | 2013-09-30 21:45:00 | 2013-09-30 21:45:00 | 2013-09-30 01:15:00 | 2013-09-30 01:40:00 | 192 |
LGA | FLL | 10 | 3 | 2013-09-30 21:47:00 | 2013-09-30 21:37:00 | 2013-09-30 00:30:00 | 2013-09-30 00:27:00 | 139 |
EWR | BOS | -7 | -23 | 2013-09-30 21:49:00 | 2013-09-30 21:56:00 | 2013-09-30 22:45:00 | 2013-09-30 23:08:00 | 37 |
EWR | MHT | -9 | -16 | 2013-09-30 21:50:00 | 2013-09-30 21:59:00 | 2013-09-30 22:50:00 | 2013-09-30 23:06:00 | 39 |
JFK | BUF | 194 | 194 | 2013-09-30 21:59:00 | 2013-09-30 18:45:00 | 2013-09-30 23:44:00 | 2013-09-30 20:30:00 | 50 |
LGA | BGR | -2 | 8 | 2013-09-30 22:03:00 | 2013-09-30 22:05:00 | 2013-09-30 23:39:00 | 2013-09-30 23:31:00 | 61 |
LGA | BNA | 27 | 7 | 2013-09-30 22:07:00 | 2013-09-30 21:40:00 | 2013-09-30 22:57:00 | 2013-09-30 22:50:00 | 97 |
EWR | STL | 72 | 57 | 2013-09-30 22:11:00 | 2013-09-30 20:59:00 | 2013-09-30 23:39:00 | 2013-09-30 22:42:00 | 120 |
JFK | PWM | -14 | -21 | 2013-09-30 22:31:00 | 2013-09-30 22:45:00 | 2013-09-30 23:35:00 | 2013-09-30 23:56:00 | 48 |
EWR | SFO | 80 | 42 | 2013-09-30 22:33:00 | 2013-09-30 21:13:00 | 2013-09-30 01:12:00 | 2013-09-30 00:30:00 | 318 |
JFK | MCO | 154 | 130 | 2013-09-30 22:35:00 | 2013-09-30 20:01:00 | 2013-09-30 00:59:00 | 2013-09-30 22:49:00 | 123 |
JFK | BTV | -8 | -8 | 2013-09-30 22:37:00 | 2013-09-30 22:45:00 | 2013-09-30 23:45:00 | 2013-09-30 23:53:00 | 43 |
JFK | SYR | -5 | -17 | 2013-09-30 22:40:00 | 2013-09-30 22:45:00 | 2013-09-30 23:34:00 | 2013-09-30 23:51:00 | 41 |
JFK | BUF | -10 | -20 | 2013-09-30 22:40:00 | 2013-09-30 22:50:00 | 2013-09-30 23:47:00 | 2013-09-30 00:07:00 | 52 |
JFK | ROC | -5 | -16 | 2013-09-30 22:41:00 | 2013-09-30 22:46:00 | 2013-09-30 23:45:00 | 2013-09-30 00:01:00 | 47 |
JFK | BOS | 12 | 1 | 2013-09-30 23:07:00 | 2013-09-30 22:55:00 | 2013-09-30 23:59:00 | 2013-09-30 23:58:00 | 33 |
JFK | PSE | -10 | -25 | 2013-09-30 23:49:00 | 2013-09-30 23:59:00 | 2013-09-30 03:25:00 | 2013-09-30 03:50:00 | 196 |
flights_dt |>
ggplot(aes(x = dep_time)) +
geom_freqpoly(binwidth = 86400) # 86400 seconds = 1 day
# or within a single day
flights_dt |>
filter(dep_time < ymd(20130102)) |>
ggplot(aes(x = dep_time)) +
geom_freqpoly(binwidth = 600) # 600 s = 10 minutes
From other types
as_date()
as_datetime()
as_datetime(today())
as_date(now())
[1] "2024-03-12 UTC"
as_datetime(86300)
as_date(365 * 10 + 2)
[1] "1970-01-01 23:58:20 UTC"
Exercises
today(tzone = "UTC")
- For each of the following date-times, show how you’d parse it using a readr column specification and a lubridate function.
d1 <- "January 1, 2010"
d2 <- "2015-Mar-07"
d3 <- "06-Jun-2017"
d4 <- c("August 19 (2015)", "July 1 (2015)")
d5 <- "12/30/14" # Dec 30, 2014
t1 <- "1705"
t2 <- "11:15:10.12 PM"
mdy(d1)
as_date(d2)
dmy(d3)
mdy(d4)
mdy(d5)
as_datetime(as.numeric(t1))
hms(t2)
<ol class=list-inline><li></li><li></li></ol>
[1] "1970-01-01 00:28:25 UTC"
11H 15M 10.12S
17.3 Date-time components
datetime <- ymd_hms(now()) # "2026-07-08 12:34:56"
year(datetime)
#> 2026
month(datetime)
#> 7
mday(datetime)
#> 8
yday(datetime)
#> 189
wday(datetime)
#> 4
2024
3
12
72
3
flights_dt |>
mutate(wday = wday(dep_time, label = TRUE)) |>
ggplot(aes(x = wday)) +
geom_bar()
flights_dt |>
mutate(minute = minute(dep_time)) |>
group_by(minute) |>
summarize(
avg_delay = mean(dep_delay, na.rm = TRUE),
n = n()
) |>
ggplot(aes(x = minute, y = avg_delay)) +
geom_line()
sched_dep <- flights_dt |>
mutate(minute = minute(sched_dep_time)) |>
group_by(minute) |>
summarize(
avg_delay = mean(arr_delay, na.rm = TRUE),
n = n()
)
ggplot(sched_dep, aes(x = minute, y = avg_delay)) +
geom_line()
flights_dt |>
mutate(dep_hour = hms::as_hms(dep_time - floor_date(dep_time, "day"))) |>
ggplot(aes(x = dep_hour)) +
geom_freqpoly(binwidth = 60 * 30)
Modify components
datetime <- ymd_hms("2026-07-08 12:34:56")
datetime
year(datetime) <- 2030
datetime
month(datetime) <- 01
datetime
hour(datetime) <- hour(datetime) + 1
datetime
update(datetime, year = 2030, month = 2, mday = 2, hour = 2)
datetime
[1] "2026-07-08 12:34:56 UTC"
[1] "2030-07-08 12:34:56 UTC"
[1] "2030-01-08 12:34:56 UTC"
[1] "2030-01-08 13:34:56 UTC"
[1] "2030-02-02 02:34:56 UTC"
[1] "2030-01-08 13:34:56 UTC"
Exercises
- How does the distribution of flight times within a day change over the course of the year?
- Compare dep_time,
sched_dep_time
anddep_delay
. Are they consistent? Explain your findings. - Compare air_time with the duration between the departure and arrival. Explain your findings. (Hint: consider the location of the airport.)
- How does the average delay time change over the course of a day? Should you use
dep_time
orsched_dep_time
? Why? - On what day of the week should you leave if you want to minimise the chance of a delay?
- What makes the distribution of
diamonds$carat
andflights$sched_dep_time
similar? - Confirm our hypothesis that the early departures of flights in minutes 20-30 and 50-60 are caused by scheduled flights that leave early. Hint: create a binary variable that tells you whether or not a flight was delayed.
17.4 Time spans
- Durations, which represent an exact number of seconds.
- Periods, which represent human units like weeks and months.
- Intervals, which represent a starting and ending point.
Duration
# How old is Hadley?
h_age <- today() - ymd("1979-05-16")
h_age
Time difference of 16372 days
as.duration(h_age)
1414540800s (~44.82 years)
dseconds(15)
dminutes(10)
dhours(c(12, 24))
ddays(0:5)
dweeks(3)
dyears(1)
15s
600s (~10 minutes)
<ol class=list-inline><li>43200s (~12 hours)</li><li>86400s (~1 days)</li></ol>
<ol class=list-inline><li>0s</li><li>86400s (~1 days)</li><li>172800s (~2 days)</li><li>259200s (~3 days)</li><li>345600s (~4 days)</li><li>432000s (~5 days)</li></ol>
1814400s (~3 weeks)
31557600s (~1 years)
# you can add and multiply durations:
2 * dyears(1)
dyears(1) + dweeks(12) + dhours(15)
## add and subtract durations to and from days
tomorrow <- today() + ddays(1)
last_year <- today() - dyears(1)
63115200s (~2 years)
38869200s (~1.23 years)
one_am <- ymd_hms("2026-03-08 01:00:00", tz = "America/New_York")
one_am
# be careful with time zone (on that day timezone has changed)
one_am + ddays(1)
[1] "2026-03-08 01:00:00 EST"
[1] "2026-03-09 02:00:00 EDT"
Periods
one_am <- ymd_hms("2026-03-08 01:00:00", tz = "America/New_York")
hours(c(12, 24))
days(7)
months(1:6)
<ol class=list-inline><li>12H 0M 0S</li><li>24H 0M 0S</li></ol>
7d 0H 0M 0S
<ol class=list-inline><li>1m 0d 0H 0M 0S</li><li>2m 0d 0H 0M 0S</li><li>3m 0d 0H 0M 0S</li><li>4m 0d 0H 0M 0S</li><li>5m 0d 0H 0M 0S</li><li>6m 0d 0H 0M 0S</li></ol>
10 * (months(6) + days(1))
days(50) + hours(25) + minutes(2)
60m 10d 0H 0M 0S
50d 25H 2M 0S
# A leap year
ymd("2024-01-01") + dyears(1)
ymd("2024-01-01") + years(1)
# Daylight saving time
one_am + ddays(1)
one_am + days(1)
[1] "2024-12-31 06:00:00 UTC"
[1] "2026-03-09 02:00:00 EDT"
[1] "2026-03-09 01:00:00 EDT"
flights_dt |>
filter(arr_time < dep_time)
origin | dest | dep_delay | arr_delay | dep_time | sched_dep_time | arr_time | sched_arr_time | air_time |
---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <dbl> | <dbl> | <dttm> | <dttm> | <dttm> | <dttm> | <dbl> |
EWR | BQN | 9 | -4 | 2013-01-01 19:29:00 | 2013-01-01 19:20:00 | 2013-01-01 00:03:00 | 2013-01-01 00:07:00 | 192 |
JFK | DFW | 59 | NA | 2013-01-01 19:39:00 | 2013-01-01 18:40:00 | 2013-01-01 00:29:00 | 2013-01-01 21:51:00 | NA |
EWR | TPA | -2 | 9 | 2013-01-01 20:58:00 | 2013-01-01 21:00:00 | 2013-01-01 00:08:00 | 2013-01-01 23:59:00 | 159 |
EWR | SJU | -6 | -12 | 2013-01-01 21:02:00 | 2013-01-01 21:08:00 | 2013-01-01 01:46:00 | 2013-01-01 01:58:00 | 199 |
EWR | SFO | 11 | -14 | 2013-01-01 21:08:00 | 2013-01-01 20:57:00 | 2013-01-01 00:25:00 | 2013-01-01 00:39:00 | 354 |
LGA | FLL | -10 | -2 | 2013-01-01 21:20:00 | 2013-01-01 21:30:00 | 2013-01-01 00:16:00 | 2013-01-01 00:18:00 | 160 |
EWR | MCO | 41 | 43 | 2013-01-01 21:21:00 | 2013-01-01 20:40:00 | 2013-01-01 00:06:00 | 2013-01-01 23:23:00 | 143 |
JFK | LAX | -7 | -24 | 2013-01-01 21:28:00 | 2013-01-01 21:35:00 | 2013-01-01 00:26:00 | 2013-01-01 00:50:00 | 338 |
EWR | FLL | 49 | 28 | 2013-01-01 21:34:00 | 2013-01-01 20:45:00 | 2013-01-01 00:20:00 | 2013-01-01 23:52:00 | 152 |
EWR | FLL | -9 | -14 | 2013-01-01 21:36:00 | 2013-01-01 21:45:00 | 2013-01-01 00:25:00 | 2013-01-01 00:39:00 | 154 |
JFK | SJU | 5 | -14 | 2013-01-01 21:40:00 | 2013-01-01 21:35:00 | 2013-01-01 02:10:00 | 2013-01-01 02:24:00 | 189 |
JFK | MCO | 2 | 2 | 2013-01-01 21:57:00 | 2013-01-01 21:55:00 | 2013-01-01 00:43:00 | 2013-01-01 00:41:00 | 140 |
EWR | MIA | 285 | 246 | 2013-01-01 22:05:00 | 2013-01-01 17:20:00 | 2013-01-01 00:46:00 | 2013-01-01 20:40:00 | 146 |
JFK | PBI | 24 | 21 | 2013-01-01 22:09:00 | 2013-01-01 21:45:00 | 2013-01-01 00:58:00 | 2013-01-01 00:37:00 | 143 |
JFK | SJU | -12 | -26 | 2013-01-01 22:17:00 | 2013-01-01 22:29:00 | 2013-01-01 02:49:00 | 2013-01-01 03:15:00 | 191 |
JFK | TPA | 47 | 73 | 2013-01-01 22:17:00 | 2013-01-01 21:30:00 | 2013-01-01 01:40:00 | 2013-01-01 00:27:00 | 163 |
JFK | FLL | 30 | 49 | 2013-01-01 22:29:00 | 2013-01-01 21:59:00 | 2013-01-01 01:49:00 | 2013-01-01 01:00:00 | 153 |
JFK | ROC | 21 | 23 | 2013-01-01 23:06:00 | 2013-01-01 22:45:00 | 2013-01-01 00:28:00 | 2013-01-01 00:05:00 | 59 |
JFK | BTV | 22 | 35 | 2013-01-01 23:07:00 | 2013-01-01 22:45:00 | 2013-01-01 00:32:00 | 2013-01-01 23:57:00 | 59 |
JFK | BUF | 15 | 9 | 2013-01-01 23:10:00 | 2013-01-01 22:55:00 | 2013-01-01 00:24:00 | 2013-01-01 00:15:00 | 57 |
EWR | DCA | 192 | 191 | 2013-01-01 23:12:00 | 2013-01-01 20:00:00 | 2013-01-01 00:21:00 | 2013-01-01 21:10:00 | 44 |
EWR | BTV | 83 | 69 | 2013-01-01 23:23:00 | 2013-01-01 22:00:00 | 2013-01-01 00:22:00 | 2013-01-01 23:13:00 | 44 |
JFK | LAS | 116 | 73 | 2013-01-01 23:26:00 | 2013-01-01 21:30:00 | 2013-01-01 01:31:00 | 2013-01-01 00:18:00 | 290 |
JFK | SYR | 37 | 33 | 2013-01-01 23:27:00 | 2013-01-01 22:50:00 | 2013-01-01 00:32:00 | 2013-01-01 23:59:00 | 45 |
EWR | MCI | 379 | 456 | 2013-01-01 23:43:00 | 2013-01-01 17:24:00 | 2013-01-01 03:14:00 | 2013-01-01 19:38:00 | 222 |
JFK | PSE | -6 | -20 | 2013-01-01 23:53:00 | 2013-01-01 23:59:00 | 2013-01-01 04:25:00 | 2013-01-01 04:45:00 | 195 |
JFK | SJU | -6 | -24 | 2013-01-01 23:53:00 | 2013-01-01 23:59:00 | 2013-01-01 04:18:00 | 2013-01-01 04:42:00 | 185 |
JFK | BQN | -3 | -12 | 2013-01-01 23:56:00 | 2013-01-01 23:59:00 | 2013-01-01 04:25:00 | 2013-01-01 04:37:00 | 186 |
JFK | AUS | 5 | 32 | 2013-01-02 20:30:00 | 2013-01-02 20:25:00 | 2013-01-02 00:13:00 | 2013-01-02 23:41:00 | 259 |
EWR | AUS | 2 | 17 | 2013-01-02 20:30:00 | 2013-01-02 20:28:00 | 2013-01-02 00:08:00 | 2013-01-02 23:51:00 | 252 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
JFK | SJU | 32 | 25 | 2013-09-28 22:17:00 | 2013-09-28 21:45:00 | 2013-09-28 02:05:00 | 2013-09-28 01:40:00 | 213 |
JFK | BTV | 10 | 15 | 2013-09-28 22:55:00 | 2013-09-28 22:45:00 | 2013-09-28 00:08:00 | 2013-09-28 23:53:00 | 48 |
JFK | PSE | -8 | 17 | 2013-09-28 23:51:00 | 2013-09-28 23:59:00 | 2013-09-28 04:07:00 | 2013-09-28 03:50:00 | 218 |
JFK | SJU | -9 | -21 | 2013-09-29 20:36:00 | 2013-09-29 20:45:00 | 2013-09-29 00:32:00 | 2013-09-29 00:53:00 | 212 |
JFK | PDX | -1 | 28 | 2013-09-29 20:54:00 | 2013-09-29 20:55:00 | 2013-09-29 00:27:00 | 2013-09-29 23:59:00 | 346 |
JFK | LAX | -2 | 3 | 2013-09-29 20:58:00 | 2013-09-29 21:00:00 | 2013-09-29 00:17:00 | 2013-09-29 00:14:00 | 326 |
JFK | AUS | 5 | 3 | 2013-09-29 20:59:00 | 2013-09-29 20:54:00 | 2013-09-29 00:01:00 | 2013-09-29 23:58:00 | 202 |
JFK | SEA | -3 | -11 | 2013-09-29 21:02:00 | 2013-09-29 21:05:00 | 2013-09-29 00:06:00 | 2013-09-29 00:17:00 | 336 |
EWR | BQN | 37 | 21 | 2013-09-29 21:32:00 | 2013-09-29 20:55:00 | 2013-09-29 01:17:00 | 2013-09-29 00:56:00 | 206 |
JFK | LAX | -5 | -34 | 2013-09-29 21:35:00 | 2013-09-29 21:40:00 | 2013-09-29 00:06:00 | 2013-09-29 00:40:00 | 295 |
JFK | SJU | 9 | 11 | 2013-09-29 21:54:00 | 2013-09-29 21:45:00 | 2013-09-29 01:51:00 | 2013-09-29 01:40:00 | 208 |
LGA | MCO | 64 | 39 | 2013-09-29 22:04:00 | 2013-09-29 21:00:00 | 2013-09-29 00:23:00 | 2013-09-29 23:44:00 | 117 |
JFK | DEN | 39 | 3 | 2013-09-29 22:09:00 | 2013-09-29 21:30:00 | 2013-09-29 00:02:00 | 2013-09-29 23:59:00 | 208 |
LGA | CVG | 73 | 54 | 2013-09-29 22:23:00 | 2013-09-29 21:10:00 | 2013-09-29 00:13:00 | 2013-09-29 23:19:00 | 87 |
LGA | FLL | 47 | 20 | 2013-09-29 22:24:00 | 2013-09-29 21:37:00 | 2013-09-29 00:47:00 | 2013-09-29 00:27:00 | 128 |
EWR | MSP | 131 | 110 | 2013-09-29 22:45:00 | 2013-09-29 20:34:00 | 2013-09-29 00:27:00 | 2013-09-29 22:37:00 | 141 |
JFK | LAX | 173 | 141 | 2013-09-29 22:53:00 | 2013-09-29 20:00:00 | 2013-09-29 01:21:00 | 2013-09-29 23:00:00 | 303 |
JFK | DCA | 94 | 76 | 2013-09-29 23:14:00 | 2013-09-29 21:40:00 | 2013-09-29 00:11:00 | 2013-09-29 22:55:00 | 39 |
JFK | PWM | 33 | 19 | 2013-09-29 23:18:00 | 2013-09-29 22:45:00 | 2013-09-29 00:15:00 | 2013-09-29 23:56:00 | 44 |
LGA | RDU | 144 | 124 | 2013-09-29 23:24:00 | 2013-09-29 21:00:00 | 2013-09-29 00:39:00 | 2013-09-29 22:35:00 | 61 |
LGA | FLL | 225 | 183 | 2013-09-29 23:27:00 | 2013-09-29 19:42:00 | 2013-09-29 01:53:00 | 2013-09-29 22:50:00 | 129 |
JFK | PSE | -3 | -9 | 2013-09-29 23:56:00 | 2013-09-29 23:59:00 | 2013-09-29 03:41:00 | 2013-09-29 03:50:00 | 204 |
JFK | SJU | 5 | -33 | 2013-09-30 20:50:00 | 2013-09-30 20:45:00 | 2013-09-30 00:20:00 | 2013-09-30 00:53:00 | 188 |
JFK | PDX | 15 | 52 | 2013-09-30 21:10:00 | 2013-09-30 20:55:00 | 2013-09-30 00:51:00 | 2013-09-30 23:59:00 | 361 |
JFK | LAX | 0 | -30 | 2013-09-30 21:40:00 | 2013-09-30 21:40:00 | 2013-09-30 00:10:00 | 2013-09-30 00:40:00 | 298 |
JFK | SJU | 0 | -25 | 2013-09-30 21:45:00 | 2013-09-30 21:45:00 | 2013-09-30 01:15:00 | 2013-09-30 01:40:00 | 192 |
LGA | FLL | 10 | 3 | 2013-09-30 21:47:00 | 2013-09-30 21:37:00 | 2013-09-30 00:30:00 | 2013-09-30 00:27:00 | 139 |
EWR | SFO | 80 | 42 | 2013-09-30 22:33:00 | 2013-09-30 21:13:00 | 2013-09-30 01:12:00 | 2013-09-30 00:30:00 | 318 |
JFK | MCO | 154 | 130 | 2013-09-30 22:35:00 | 2013-09-30 20:01:00 | 2013-09-30 00:59:00 | 2013-09-30 22:49:00 | 123 |
JFK | PSE | -10 | -25 | 2013-09-30 23:49:00 | 2013-09-30 23:59:00 | 2013-09-30 03:25:00 | 2013-09-30 03:50:00 | 196 |
flights_dt <- flights_dt |>
mutate(
overnight = arr_time < dep_time,
arr_time = arr_time + days(overnight),
sched_arr_time = sched_arr_time + days(overnight)
)
flights_dt |>
filter(arr_time < dep_time)
origin | dest | dep_delay | arr_delay | dep_time | sched_dep_time | arr_time | sched_arr_time | air_time | overnight |
---|---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <dbl> | <dbl> | <dttm> | <dttm> | <dttm> | <dttm> | <dbl> | <lgl> |
Intervals
dyears(1) / ddays(365) # 1 dyear is 365.25days
years(1) / days(1)
1.00068493150685
365.25
y2023 <- ymd("2023-01-01") %--% ymd("2024-01-01")
y2024 <- ymd("2024-01-01") %--% ymd("2025-01-01")
y2023 / days(1)
y2024 / days(1)
365
366
Exercises
-
Explain days(!overnight) and days(overnight) to someone who has just started learning R. What is the key fact you need to know?
-
Create a vector of dates giving the first day of every month in 2015. Create a vector of dates giving the first day of every month in the current year.
-
Write a function that given your birthday (as a date), returns how old you are in years.
-
Why can’t
(today() %--% (today() + years(1))) / months(1)
work?
(today() %--% (today() + years(1))) / months(1)
12
17.5 Time zones
Sys.timezone() # {area}/{location}: {continent}/{city} or {ocian}/{city}
‘Australia/Brisbane’
length(OlsonNames())
head(OlsonNames())
597
<ol class=list-inline><li>‘Africa/Abidjan’</li><li>‘Africa/Accra’</li><li>‘Africa/Addis_Ababa’</li><li>‘Africa/Algiers’</li><li>‘Africa/Asmara’</li><li>‘Africa/Asmera’</li></ol>
x1 <- ymd_hms("2024-06-01 12:00:00", tz = "America/New_York")
x1
x2 <- ymd_hms("2024-06-01 18:00:00", tz = "Europe/Copenhagen")
x2
x3 <- ymd_hms("2024-06-02 04:00:00", tz = "Pacific/Auckland")
x3
x1 - x2
#> Time difference of 0 secs
x1 - x3
#> Time difference of 0 secs
[1] "2024-06-01 12:00:00 EDT"
[1] "2024-06-01 18:00:00 CEST"
[1] "2024-06-02 04:00:00 NZST"
Time difference of 0 secs
Time difference of 0 secs
You can change the time zone in two ways:
- Keep the instant in time the same, and change how it’s displayed. Use this when the instant is correct, but you want a more natural display.
- Change the underlying instant in time. Use this when you have an instant that has been labelled with the incorrect time zone, and you need to fix it.
x4 <- c(x1, x2, x3)
x4
x4a <- with_tz(x4, tzone = "Australia/Lord_Howe")
x4a
x4a - x4
[1] "2024-06-01 12:00:00 EDT" "2024-06-01 12:00:00 EDT"
[3] "2024-06-01 12:00:00 EDT"
[1] "2024-06-02 02:30:00 +1030" "2024-06-02 02:30:00 +1030"
[3] "2024-06-02 02:30:00 +1030"
Time differences in secs
[1] 0 0 0
x4b <- force_tz(x4, tzone = "Australia/Lord_Howe")
x4b
x4b - x4
[1] "2024-06-01 12:00:00 +1030" "2024-06-01 12:00:00 +1030"
[3] "2024-06-01 12:00:00 +1030"
Time differences in hours
[1] -14.5 -14.5 -14.5
C18. Missing Values
library(tidyverse)
18.2 Explicit missing values
- Last observation carried forward (repeated)
- Fixed values
treatment <- tribble(
~person, ~treatment, ~response,
"Derrick Whitmore", 1, 7,
NA, 2, 10,
NA, 3, NA,
"Katherine Burke", 1, 4
)
treatment |>
fill(everything()) # method: last observation carried forward!
person | treatment | response |
---|---|---|
<chr> | <dbl> | <dbl> |
Derrick Whitmore | 1 | 7 |
Derrick Whitmore | 2 | 10 |
Derrick Whitmore | 3 | 10 |
Katherine Burke | 1 | 4 |
x <- c(1, 4, 5, 7, NA)
coalesce(x, 0)
<ol class=list-inline><li>1</li><li>4</li><li>5</li><li>7</li><li>0</li></ol>
18.3 Implicit missing values
An explicit missing value is the presence of an absence.
An implicit missing value is the absence of a presence.
stocks <- tibble(
year = c(2020, 2020, 2020, 2020, 2021, 2021, 2021),
qtr = c( 1, 2, 3, 4, 2, 3, 4),
price = c(1.88, 0.59, 0.35, NA, 0.92, 0.17, 2.66)
)
Pivoting
Making data wider can make implicit missing values explicit because every combination of the rows and new columns must have some value.
stocks |>
pivot_wider(
names_from = qtr,
values_from = price
)
year | 1 | 2 | 3 | 4 |
---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <dbl> |
2020 | 1.88 | 0.59 | 0.35 | NA |
2021 | NA | 0.92 | 0.17 | 2.66 |
Complete
tidyr::complete() allows you to generate explicit missing values by providing a set of variables that define the combination of rows that should exist.
stocks |>
complete(year, qtr)
year | qtr | price |
---|---|---|
<dbl> | <dbl> | <dbl> |
2020 | 1 | 1.88 |
2020 | 2 | 0.59 |
2020 | 3 | 0.35 |
2020 | 4 | NA |
2021 | 1 | NA |
2021 | 2 | 0.92 |
2021 | 3 | 0.17 |
2021 | 4 | 2.66 |
stocks |>
complete(year = 2019:2021, qtr)
year | qtr | price |
---|---|---|
<dbl> | <dbl> | <dbl> |
2019 | 1 | NA |
2019 | 2 | NA |
2019 | 3 | NA |
2019 | 4 | NA |
2020 | 1 | 1.88 |
2020 | 2 | 0.59 |
2020 | 3 | 0.35 |
2020 | 4 | NA |
2021 | 1 | NA |
2021 | 2 | 0.92 |
2021 | 3 | 0.17 |
2021 | 4 | 2.66 |
Joins
In some cases, the complete set of observations can’t be generated by a simple combination of variables. In that case, you can do manually what complete() does for you: create a data frame that contains all the rows that should exist (using whatever combination of techniques you need), then combine it with your original dataset with dplyr::full_join()
.
dplyr::anti_join(x, y)
is a particularly useful tool , it selects only the rows in x that don’t have a match in y
flights |>
distinct(faa = dest) |>
anti_join(airports)
[1m[22mJoining with `by = join_by(faa)`
faa |
---|
<chr> |
BQN |
SJU |
STT |
PSE |
flights |>
distinct(tailnum) |>
anti_join(planes)
[1m[22mJoining with `by = join_by(tailnum)`
tailnum |
---|
<chr> |
N3ALAA |
N3DUAA |
N542MQ |
N730MQ |
N9EAMQ |
N532UA |
N3EMAA |
N518MQ |
N3BAAA |
N3CYAA |
N426US |
N3GKAA |
N4WNAA |
N5FMAA |
N722MQ |
N3EKAA |
N3ETAA |
N541AA |
N4WRAA |
N4WPAA |
N508MQ |
N3HMAA |
N828MQ |
N3GEAA |
N739MQ |
N531MQ |
N527JB |
N846MQ |
N3GVAA |
N4YCAA |
... |
N7BAAA |
N7BVAA |
N626MQ |
N675MQ |
N580AA |
N717MQ |
N738MQ |
N720MQ |
N7ASAA |
N328AT |
N735MQ |
N5EDAA |
N5DJAA |
N7ALAA |
N721MQ |
N7BGAA |
N5ESAA |
N456UW |
N838MQ |
N442US |
N502SW |
N451UW |
N7BKAA |
N800MQ |
N7CAAA |
N823MQ |
N5FCAA |
N5ERAA |
N654MQ |
N647MQ |
Exercises
- Can you find any relationship between the carrier and the rows that appear to be missing from planes?
18.4 Factors and empty groups
health <- tibble(
name = c("Ikaia", "Oletta", "Leriah", "Dashay", "Tresaun"),
smoker = factor(c("no", "no", "no", "no", "no"), levels = c("yes", "no")),
age = c(34, 88, 75, 47, 56),
)
health |> count(smoker)
smoker | n |
---|---|
<fct> | <int> |
no | 5 |
# We can request count() to keep all the groups, even those not seen in the data by using .drop = FALSE:
health |> count(smoker, .drop = FALSE)
smoker | n |
---|---|
<fct> | <int> |
yes | 0 |
no | 5 |
p1 <- ggplot(health, aes(x = smoker)) +
geom_bar() +
scale_x_discrete()
p2 <- ggplot(health, aes(x = smoker)) +
geom_bar() +
scale_x_discrete(drop = FALSE)
options(repr.plot.width = 10, repr.plot.height = 5)
grid.arrange(p1, p2, ncol=2)
health |>
group_by(smoker, .drop = FALSE) |>
summarize(
n = n(),
mean_age = mean(age),
min_age = min(age),
max_age = max(age),
sd_age = sd(age)
)
Warning message:
"[1m[22mThere were 2 warnings in `summarize()`.
The first warning was:
[1m[22m[36mi[39m In argument: `min_age = min(age)`.
[36mi[39m In group 1: `smoker = yes`.
Caused by warning in `min()`:
[33m![39m no non-missing arguments to min; returning Inf
[1m[22m[36mi[39m Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning."
smoker | n | mean_age | min_age | max_age | sd_age |
---|---|---|---|---|---|
<fct> | <int> | <dbl> | <dbl> | <dbl> | <dbl> |
yes | 0 | NaN | Inf | -Inf | NA |
no | 5 | 60 | 34 | 88 | 21.62175 |
health |>
group_by(smoker) |>
summarize(
n = n(),
mean_age = mean(age),
min_age = min(age),
max_age = max(age),
sd_age = sd(age)
) |>
complete(smoker)
smoker | n | mean_age | min_age | max_age | sd_age |
---|---|---|---|---|---|
<fct> | <int> | <dbl> | <dbl> | <dbl> | <dbl> |
yes | NA | NA | NA | NA | NA |
no | 5 | 60 | 34 | 88 | 21.62175 |
C19. Joins
Typically you have many data frames, and you must join them together to answer the questions that you’re interested in. This chapter will introduce you to two important types of joins:
- Mutating joins, which add new variables to one data frame from matching observations in another.
- Filtering joins, which filter observations from one data frame based on whether or not they match an observation in another.
library(tidyverse)
library(nycflights13)
19.2 Keys
Primary and foreign keys
- A primary key is a variable or set of variables that uniquely identifies each observation. When more than one variable is needed, the key is called a compound key.
- A foreign key is a variable (or set of variables) that corresponds to a primary key in another table.
For examples:
carrier
andfaa
,tailnum
are primary key of corresponding tablesairlines
,airports
,planes
origin
andtime_hour
are the compound primary key of the tableweather
flights$tailnum
is a foreign key that corresponds to the primary keyplanes$tailnum
.flights$carrier
is a foreign key that corresponds to the primary keyairlines$carrier
.flights$origin
is a foreign key that corresponds to the primary keyairports$faa
.flights$dest
is a foreign key that corresponds to the primary keyairports$faa
.flights$origin-flights$time_hour
is a compound foreign key that corresponds to the compound primary keyweather$origin-weather$time_hour
.
airlines
carrier | name |
---|---|
<chr> | <chr> |
9E | Endeavor Air Inc. |
AA | American Airlines Inc. |
AS | Alaska Airlines Inc. |
B6 | JetBlue Airways |
DL | Delta Air Lines Inc. |
EV | ExpressJet Airlines Inc. |
F9 | Frontier Airlines Inc. |
FL | AirTran Airways Corporation |
HA | Hawaiian Airlines Inc. |
MQ | Envoy Air |
OO | SkyWest Airlines Inc. |
UA | United Air Lines Inc. |
US | US Airways Inc. |
VX | Virgin America |
WN | Southwest Airlines Co. |
YV | Mesa Airlines Inc. |
airports
faa | name | lat | lon | alt | tz | dst | tzone |
---|---|---|---|---|---|---|---|
<chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> |
04G | Lansdowne Airport | 41.13047 | -80.61958 | 1044 | -5 | A | America/New_York |
06A | Moton Field Municipal Airport | 32.46057 | -85.68003 | 264 | -6 | A | America/Chicago |
06C | Schaumburg Regional | 41.98934 | -88.10124 | 801 | -6 | A | America/Chicago |
06N | Randall Airport | 41.43191 | -74.39156 | 523 | -5 | A | America/New_York |
09J | Jekyll Island Airport | 31.07447 | -81.42778 | 11 | -5 | A | America/New_York |
0A9 | Elizabethton Municipal Airport | 36.37122 | -82.17342 | 1593 | -5 | A | America/New_York |
0G6 | Williams County Airport | 41.46731 | -84.50678 | 730 | -5 | A | America/New_York |
0G7 | Finger Lakes Regional Airport | 42.88356 | -76.78123 | 492 | -5 | A | America/New_York |
0P2 | Shoestring Aviation Airfield | 39.79482 | -76.64719 | 1000 | -5 | U | America/New_York |
0S9 | Jefferson County Intl | 48.05381 | -122.81064 | 108 | -8 | A | America/Los_Angeles |
0W3 | Harford County Airport | 39.56684 | -76.20240 | 409 | -5 | A | America/New_York |
10C | Galt Field Airport | 42.40289 | -88.37511 | 875 | -6 | U | America/Chicago |
17G | Port Bucyrus-Crawford County Airport | 40.78156 | -82.97481 | 1003 | -5 | A | America/New_York |
19A | Jackson County Airport | 34.17586 | -83.56160 | 951 | -5 | U | America/New_York |
1A3 | Martin Campbell Field Airport | 35.01581 | -84.34683 | 1789 | -5 | A | America/New_York |
1B9 | Mansfield Municipal | 42.00013 | -71.19677 | 122 | -5 | A | America/New_York |
1C9 | Frazier Lake Airpark | 54.01333 | -124.76833 | 152 | -8 | A | America/Vancouver |
1CS | Clow International Airport | 41.69597 | -88.12923 | 670 | -6 | U | America/Chicago |
1G3 | Kent State Airport | 41.15139 | -81.41511 | 1134 | -5 | A | America/New_York |
1G4 | Grand Canyon West Airport | 35.89990 | -113.81567 | 4813 | -7 | A | America/Phoenix |
1H2 | Effingham Memorial Airport | 39.07000 | -88.53400 | 585 | -6 | A | America/Chicago |
1OH | Fortman Airport | 40.55533 | -84.38662 | 885 | -5 | U | America/New_York |
1RL | Point Roberts Airpark | 48.97972 | -123.07889 | 10 | -8 | A | America/Los_Angeles |
23M | Clarke CO | 32.05170 | -88.44340 | 320 | -6 | A | America/Chicago |
24C | Lowell City Airport | 42.95392 | -85.34391 | 681 | -5 | A | America/New_York |
24J | Suwannee County Airport | 30.30013 | -83.02469 | 104 | -5 | A | America/New_York |
25D | Forest Lake Airport | 45.24775 | -92.99439 | 925 | -6 | A | America/Chicago |
29D | Grove City Airport | 41.14603 | -80.16775 | 1371 | -5 | A | America/New_York |
2A0 | Mark Anton Airport | 35.48625 | -84.93108 | 718 | -5 | A | America/New_York |
2B2 | Plum Island Airport | 42.79536 | -70.83944 | 11 | -5 | A | America/New_York |
... | ... | ... | ... | ... | ... | ... | ... |
X59 | Valkaria Municipal | 27.96086 | -80.55833 | 26 | -5 | A | America/New_York |
XFL | Flagler County Airport | 29.28210 | -81.12120 | 33 | -5 | A | America/New_York |
XNA | NW Arkansas Regional | 36.28187 | -94.30681 | 1287 | -6 | A | America/Chicago |
XZK | Amherst Amtrak Station AMM | 42.37500 | -72.51139 | 258 | -5 | A | America/New_York |
Y51 | Municipal Airport | 43.57936 | -90.89647 | 1292 | -6 | A | America/Chicago |
Y72 | Bloyer Field | 43.97622 | -90.48061 | 966 | -6 | A | America/Chicago |
YAK | Yakutat | 59.30120 | -139.39370 | 33 | -9 | A | NA |
YIP | Willow Run | 42.23793 | -83.53041 | 716 | -5 | A | America/New_York |
YKM | Yakima Air Terminal McAllister Field | 46.56820 | -120.54400 | 1095 | -8 | A | America/Los_Angeles |
YKN | Chan Gurney | 42.87110 | -97.39690 | 1200 | -6 | A | America/Chicago |
YNG | Youngstown Warren Rgnl | 41.26074 | -80.67910 | 1196 | -5 | A | America/New_York |
YUM | Yuma Mcas Yuma Intl | 32.65658 | -114.60598 | 216 | -7 | N | America/Phoenix |
Z84 | Clear | 64.30120 | -149.12014 | 552 | -9 | A | America/Anchorage |
ZBP | Penn Station | 39.30722 | -76.61556 | 66 | -5 | A | America/New_York |
ZFV | Philadelphia 30th St Station | 39.95570 | -75.18200 | 0 | -5 | A | America/New_York |
ZPH | Municipal Airport | 28.22806 | -82.15583 | 90 | -5 | A | America/New_York |
ZRA | Atlantic City Rail Terminal | 39.36650 | -74.44200 | 8 | -5 | A | America/New_York |
ZRD | Train Station | 37.53430 | -77.42945 | 26 | -5 | A | America/New_York |
ZRP | Newark Penn Station | 40.73472 | -74.16417 | 0 | -5 | A | America/New_York |
ZRT | Hartford Union Station | 41.76888 | -72.68150 | 0 | -5 | A | America/New_York |
ZRZ | New Carrollton Rail Station | 38.94800 | -76.87190 | 39 | -5 | A | America/New_York |
ZSF | Springfield Amtrak Station | 42.10600 | -72.59305 | 65 | -5 | A | America/New_York |
ZSY | Scottsdale Airport | 33.62289 | -111.91053 | 1519 | -7 | A | America/Phoenix |
ZTF | Stamford Amtrak Station | 41.04694 | -73.54149 | 0 | -5 | A | America/New_York |
ZTY | Boston Back Bay Station | 42.34780 | -71.07500 | 20 | -5 | A | America/New_York |
ZUN | Black Rock | 35.08323 | -108.79178 | 6454 | -7 | A | America/Denver |
ZVE | New Haven Rail Station | 41.29867 | -72.92599 | 7 | -5 | A | America/New_York |
ZWI | Wilmington Amtrak Station | 39.73667 | -75.55167 | 0 | -5 | A | America/New_York |
ZWU | Washington Union Station | 38.89746 | -77.00643 | 76 | -5 | A | America/New_York |
ZYP | Penn Station | 40.75050 | -73.99350 | 35 | -5 | A | America/New_York |
planes
tailnum | year | type | manufacturer | model | engines | seats | speed | engine |
---|---|---|---|---|---|---|---|---|
<chr> | <int> | <chr> | <chr> | <chr> | <int> | <int> | <int> | <chr> |
N10156 | 2004 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N102UW | 1998 | Fixed wing multi engine | AIRBUS INDUSTRIE | A320-214 | 2 | 182 | NA | Turbo-fan |
N103US | 1999 | Fixed wing multi engine | AIRBUS INDUSTRIE | A320-214 | 2 | 182 | NA | Turbo-fan |
N104UW | 1999 | Fixed wing multi engine | AIRBUS INDUSTRIE | A320-214 | 2 | 182 | NA | Turbo-fan |
N10575 | 2002 | Fixed wing multi engine | EMBRAER | EMB-145LR | 2 | 55 | NA | Turbo-fan |
N105UW | 1999 | Fixed wing multi engine | AIRBUS INDUSTRIE | A320-214 | 2 | 182 | NA | Turbo-fan |
N107US | 1999 | Fixed wing multi engine | AIRBUS INDUSTRIE | A320-214 | 2 | 182 | NA | Turbo-fan |
N108UW | 1999 | Fixed wing multi engine | AIRBUS INDUSTRIE | A320-214 | 2 | 182 | NA | Turbo-fan |
N109UW | 1999 | Fixed wing multi engine | AIRBUS INDUSTRIE | A320-214 | 2 | 182 | NA | Turbo-fan |
N110UW | 1999 | Fixed wing multi engine | AIRBUS INDUSTRIE | A320-214 | 2 | 182 | NA | Turbo-fan |
N11106 | 2002 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11107 | 2002 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11109 | 2002 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11113 | 2002 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11119 | 2002 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11121 | 2003 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11127 | 2003 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11137 | 2003 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11140 | 2003 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11150 | 2003 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11155 | 2004 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11164 | 2004 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11165 | 2004 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11176 | 2004 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11181 | 2005 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11184 | 2005 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11187 | 2005 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11189 | 2005 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11191 | 2005 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
N11192 | 2005 | Fixed wing multi engine | EMBRAER | EMB-145XR | 2 | 55 | NA | Turbo-fan |
... | ... | ... | ... | ... | ... | ... | ... | ... |
N984DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N985AT | 2001 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N985DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N986AT | 2001 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N986DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N987AT | 2001 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N987DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N988AT | 2001 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N988DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N989AT | 2001 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N989DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N990AT | 2001 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N990DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N991AT | NA | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N991DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N992AT | 2002 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N992DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N993AT | 2002 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N993DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N994AT | 2002 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N994DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS CORPORATION | MD-88 | 2 | 142 | NA | Turbo-jet |
N995AT | 2002 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N995DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N996AT | 2002 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N996DL | 1991 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N997AT | 2002 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N997DL | 1992 | Fixed wing multi engine | MCDONNELL DOUGLAS AIRCRAFT CO | MD-88 | 2 | 142 | NA | Turbo-fan |
N998AT | 2002 | Fixed wing multi engine | BOEING | 717-200 | 2 | 100 | NA | Turbo-fan |
N998DL | 1992 | Fixed wing multi engine | MCDONNELL DOUGLAS CORPORATION | MD-88 | 2 | 142 | NA | Turbo-jet |
N999DN | 1992 | Fixed wing multi engine | MCDONNELL DOUGLAS CORPORATION | MD-88 | 2 | 142 | NA | Turbo-jet |
{width=75%}
Checking primary keys
planes |>
count(tailnum) |>
filter(n > 1)
weather |>
count(time_hour, origin) |>
filter(n > 1)
tailnum | n |
---|---|
<chr> | <int> |
time_hour | origin | n |
---|---|---|
<dttm> | <chr> | <int> |
planes |>
filter(is.na(tailnum))
weather |>
filter(is.na(time_hour) | is.na(origin))
tailnum | year | type | manufacturer | model | engines | seats | speed | engine |
---|---|---|---|---|---|---|---|---|
<chr> | <int> | <chr> | <chr> | <chr> | <int> | <int> | <int> | <chr> |
origin | year | month | day | hour | temp | dewp | humid | wind_dir | wind_speed | wind_gust | precip | pressure | visib | time_hour |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <int> | <int> | <int> | <int> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dttm> |
Surrogate keys
flights |>
count(time_hour, carrier, flight) |>
filter(n > 1)
time_hour | carrier | flight | n |
---|---|---|---|
<dttm> | <chr> | <int> | <int> |
flights2 <- flights |>
mutate(id = row_number(), .before = 1)
flights2
id | year | month | day | dep_time | sched_dep_time | dep_delay | arr_time | sched_arr_time | arr_delay | carrier | flight | tailnum | origin | dest | air_time | distance | hour | minute | time_hour |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<int> | <int> | <int> | <int> | <int> | <int> | <dbl> | <int> | <int> | <dbl> | <chr> | <int> | <chr> | <chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dttm> |
1 | 2013 | 1 | 1 | 517 | 515 | 2 | 830 | 819 | 11 | UA | 1545 | N14228 | EWR | IAH | 227 | 1400 | 5 | 15 | 2013-01-01 05:00:00 |
2 | 2013 | 1 | 1 | 533 | 529 | 4 | 850 | 830 | 20 | UA | 1714 | N24211 | LGA | IAH | 227 | 1416 | 5 | 29 | 2013-01-01 05:00:00 |
3 | 2013 | 1 | 1 | 542 | 540 | 2 | 923 | 850 | 33 | AA | 1141 | N619AA | JFK | MIA | 160 | 1089 | 5 | 40 | 2013-01-01 05:00:00 |
4 | 2013 | 1 | 1 | 544 | 545 | -1 | 1004 | 1022 | -18 | B6 | 725 | N804JB | JFK | BQN | 183 | 1576 | 5 | 45 | 2013-01-01 05:00:00 |
5 | 2013 | 1 | 1 | 554 | 600 | -6 | 812 | 837 | -25 | DL | 461 | N668DN | LGA | ATL | 116 | 762 | 6 | 0 | 2013-01-01 06:00:00 |
6 | 2013 | 1 | 1 | 554 | 558 | -4 | 740 | 728 | 12 | UA | 1696 | N39463 | EWR | ORD | 150 | 719 | 5 | 58 | 2013-01-01 05:00:00 |
7 | 2013 | 1 | 1 | 555 | 600 | -5 | 913 | 854 | 19 | B6 | 507 | N516JB | EWR | FLL | 158 | 1065 | 6 | 0 | 2013-01-01 06:00:00 |
8 | 2013 | 1 | 1 | 557 | 600 | -3 | 709 | 723 | -14 | EV | 5708 | N829AS | LGA | IAD | 53 | 229 | 6 | 0 | 2013-01-01 06:00:00 |
9 | 2013 | 1 | 1 | 557 | 600 | -3 | 838 | 846 | -8 | B6 | 79 | N593JB | JFK | MCO | 140 | 944 | 6 | 0 | 2013-01-01 06:00:00 |
10 | 2013 | 1 | 1 | 558 | 600 | -2 | 753 | 745 | 8 | AA | 301 | N3ALAA | LGA | ORD | 138 | 733 | 6 | 0 | 2013-01-01 06:00:00 |
11 | 2013 | 1 | 1 | 558 | 600 | -2 | 849 | 851 | -2 | B6 | 49 | N793JB | JFK | PBI | 149 | 1028 | 6 | 0 | 2013-01-01 06:00:00 |
12 | 2013 | 1 | 1 | 558 | 600 | -2 | 853 | 856 | -3 | B6 | 71 | N657JB | JFK | TPA | 158 | 1005 | 6 | 0 | 2013-01-01 06:00:00 |
13 | 2013 | 1 | 1 | 558 | 600 | -2 | 924 | 917 | 7 | UA | 194 | N29129 | JFK | LAX | 345 | 2475 | 6 | 0 | 2013-01-01 06:00:00 |
14 | 2013 | 1 | 1 | 558 | 600 | -2 | 923 | 937 | -14 | UA | 1124 | N53441 | EWR | SFO | 361 | 2565 | 6 | 0 | 2013-01-01 06:00:00 |
15 | 2013 | 1 | 1 | 559 | 600 | -1 | 941 | 910 | 31 | AA | 707 | N3DUAA | LGA | DFW | 257 | 1389 | 6 | 0 | 2013-01-01 06:00:00 |
16 | 2013 | 1 | 1 | 559 | 559 | 0 | 702 | 706 | -4 | B6 | 1806 | N708JB | JFK | BOS | 44 | 187 | 5 | 59 | 2013-01-01 05:00:00 |
17 | 2013 | 1 | 1 | 559 | 600 | -1 | 854 | 902 | -8 | UA | 1187 | N76515 | EWR | LAS | 337 | 2227 | 6 | 0 | 2013-01-01 06:00:00 |
18 | 2013 | 1 | 1 | 600 | 600 | 0 | 851 | 858 | -7 | B6 | 371 | N595JB | LGA | FLL | 152 | 1076 | 6 | 0 | 2013-01-01 06:00:00 |
19 | 2013 | 1 | 1 | 600 | 600 | 0 | 837 | 825 | 12 | MQ | 4650 | N542MQ | LGA | ATL | 134 | 762 | 6 | 0 | 2013-01-01 06:00:00 |
20 | 2013 | 1 | 1 | 601 | 600 | 1 | 844 | 850 | -6 | B6 | 343 | N644JB | EWR | PBI | 147 | 1023 | 6 | 0 | 2013-01-01 06:00:00 |
21 | 2013 | 1 | 1 | 602 | 610 | -8 | 812 | 820 | -8 | DL | 1919 | N971DL | LGA | MSP | 170 | 1020 | 6 | 10 | 2013-01-01 06:00:00 |
22 | 2013 | 1 | 1 | 602 | 605 | -3 | 821 | 805 | 16 | MQ | 4401 | N730MQ | LGA | DTW | 105 | 502 | 6 | 5 | 2013-01-01 06:00:00 |
23 | 2013 | 1 | 1 | 606 | 610 | -4 | 858 | 910 | -12 | AA | 1895 | N633AA | EWR | MIA | 152 | 1085 | 6 | 10 | 2013-01-01 06:00:00 |
24 | 2013 | 1 | 1 | 606 | 610 | -4 | 837 | 845 | -8 | DL | 1743 | N3739P | JFK | ATL | 128 | 760 | 6 | 10 | 2013-01-01 06:00:00 |
25 | 2013 | 1 | 1 | 607 | 607 | 0 | 858 | 915 | -17 | UA | 1077 | N53442 | EWR | MIA | 157 | 1085 | 6 | 7 | 2013-01-01 06:00:00 |
26 | 2013 | 1 | 1 | 608 | 600 | 8 | 807 | 735 | 32 | MQ | 3768 | N9EAMQ | EWR | ORD | 139 | 719 | 6 | 0 | 2013-01-01 06:00:00 |
27 | 2013 | 1 | 1 | 611 | 600 | 11 | 945 | 931 | 14 | UA | 303 | N532UA | JFK | SFO | 366 | 2586 | 6 | 0 | 2013-01-01 06:00:00 |
28 | 2013 | 1 | 1 | 613 | 610 | 3 | 925 | 921 | 4 | B6 | 135 | N635JB | JFK | RSW | 175 | 1074 | 6 | 10 | 2013-01-01 06:00:00 |
29 | 2013 | 1 | 1 | 615 | 615 | 0 | 1039 | 1100 | -21 | B6 | 709 | N794JB | JFK | SJU | 182 | 1598 | 6 | 15 | 2013-01-01 06:00:00 |
30 | 2013 | 1 | 1 | 615 | 615 | 0 | 833 | 842 | -9 | DL | 575 | N326NB | EWR | ATL | 120 | 746 | 6 | 15 | 2013-01-01 06:00:00 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
336747 | 2013 | 9 | 30 | 2123 | 2125 | -2 | 2223 | 2247 | -24 | EV | 5489 | N712EV | LGA | CHO | 45 | 305 | 21 | 25 | 2013-09-30 21:00:00 |
336748 | 2013 | 9 | 30 | 2127 | 2129 | -2 | 2314 | 2323 | -9 | EV | 3833 | N16546 | EWR | CLT | 72 | 529 | 21 | 29 | 2013-09-30 21:00:00 |
336749 | 2013 | 9 | 30 | 2128 | 2130 | -2 | 2328 | 2359 | -31 | B6 | 97 | N807JB | JFK | DEN | 213 | 1626 | 21 | 30 | 2013-09-30 21:00:00 |
336750 | 2013 | 9 | 30 | 2129 | 2059 | 30 | 2230 | 2232 | -2 | EV | 5048 | N751EV | LGA | RIC | 45 | 292 | 20 | 59 | 2013-09-30 20:00:00 |
336751 | 2013 | 9 | 30 | 2131 | 2140 | -9 | 2225 | 2255 | -30 | MQ | 3621 | N807MQ | JFK | DCA | 36 | 213 | 21 | 40 | 2013-09-30 21:00:00 |
336752 | 2013 | 9 | 30 | 2140 | 2140 | 0 | 10 | 40 | -30 | AA | 185 | N335AA | JFK | LAX | 298 | 2475 | 21 | 40 | 2013-09-30 21:00:00 |
336753 | 2013 | 9 | 30 | 2142 | 2129 | 13 | 2250 | 2239 | 11 | EV | 4509 | N12957 | EWR | PWM | 47 | 284 | 21 | 29 | 2013-09-30 21:00:00 |
336754 | 2013 | 9 | 30 | 2145 | 2145 | 0 | 115 | 140 | -25 | B6 | 1103 | N633JB | JFK | SJU | 192 | 1598 | 21 | 45 | 2013-09-30 21:00:00 |
336755 | 2013 | 9 | 30 | 2147 | 2137 | 10 | 30 | 27 | 3 | B6 | 1371 | N627JB | LGA | FLL | 139 | 1076 | 21 | 37 | 2013-09-30 21:00:00 |
336756 | 2013 | 9 | 30 | 2149 | 2156 | -7 | 2245 | 2308 | -23 | UA | 523 | N813UA | EWR | BOS | 37 | 200 | 21 | 56 | 2013-09-30 21:00:00 |
336757 | 2013 | 9 | 30 | 2150 | 2159 | -9 | 2250 | 2306 | -16 | EV | 3842 | N10575 | EWR | MHT | 39 | 209 | 21 | 59 | 2013-09-30 21:00:00 |
336758 | 2013 | 9 | 30 | 2159 | 1845 | 194 | 2344 | 2030 | 194 | 9E | 3320 | N906XJ | JFK | BUF | 50 | 301 | 18 | 45 | 2013-09-30 18:00:00 |
336759 | 2013 | 9 | 30 | 2203 | 2205 | -2 | 2339 | 2331 | 8 | EV | 5311 | N722EV | LGA | BGR | 61 | 378 | 22 | 5 | 2013-09-30 22:00:00 |
336760 | 2013 | 9 | 30 | 2207 | 2140 | 27 | 2257 | 2250 | 7 | MQ | 3660 | N532MQ | LGA | BNA | 97 | 764 | 21 | 40 | 2013-09-30 21:00:00 |
336761 | 2013 | 9 | 30 | 2211 | 2059 | 72 | 2339 | 2242 | 57 | EV | 4672 | N12145 | EWR | STL | 120 | 872 | 20 | 59 | 2013-09-30 20:00:00 |
336762 | 2013 | 9 | 30 | 2231 | 2245 | -14 | 2335 | 2356 | -21 | B6 | 108 | N193JB | JFK | PWM | 48 | 273 | 22 | 45 | 2013-09-30 22:00:00 |
336763 | 2013 | 9 | 30 | 2233 | 2113 | 80 | 112 | 30 | 42 | UA | 471 | N578UA | EWR | SFO | 318 | 2565 | 21 | 13 | 2013-09-30 21:00:00 |
336764 | 2013 | 9 | 30 | 2235 | 2001 | 154 | 59 | 2249 | 130 | B6 | 1083 | N804JB | JFK | MCO | 123 | 944 | 20 | 1 | 2013-09-30 20:00:00 |
336765 | 2013 | 9 | 30 | 2237 | 2245 | -8 | 2345 | 2353 | -8 | B6 | 234 | N318JB | JFK | BTV | 43 | 266 | 22 | 45 | 2013-09-30 22:00:00 |
336766 | 2013 | 9 | 30 | 2240 | 2245 | -5 | 2334 | 2351 | -17 | B6 | 1816 | N354JB | JFK | SYR | 41 | 209 | 22 | 45 | 2013-09-30 22:00:00 |
336767 | 2013 | 9 | 30 | 2240 | 2250 | -10 | 2347 | 7 | -20 | B6 | 2002 | N281JB | JFK | BUF | 52 | 301 | 22 | 50 | 2013-09-30 22:00:00 |
336768 | 2013 | 9 | 30 | 2241 | 2246 | -5 | 2345 | 1 | -16 | B6 | 486 | N346JB | JFK | ROC | 47 | 264 | 22 | 46 | 2013-09-30 22:00:00 |
336769 | 2013 | 9 | 30 | 2307 | 2255 | 12 | 2359 | 2358 | 1 | B6 | 718 | N565JB | JFK | BOS | 33 | 187 | 22 | 55 | 2013-09-30 22:00:00 |
336770 | 2013 | 9 | 30 | 2349 | 2359 | -10 | 325 | 350 | -25 | B6 | 745 | N516JB | JFK | PSE | 196 | 1617 | 23 | 59 | 2013-09-30 23:00:00 |
336771 | 2013 | 9 | 30 | NA | 1842 | NA | NA | 2019 | NA | EV | 5274 | N740EV | LGA | BNA | NA | 764 | 18 | 42 | 2013-09-30 18:00:00 |
336772 | 2013 | 9 | 30 | NA | 1455 | NA | NA | 1634 | NA | 9E | 3393 | NA | JFK | DCA | NA | 213 | 14 | 55 | 2013-09-30 14:00:00 |
336773 | 2013 | 9 | 30 | NA | 2200 | NA | NA | 2312 | NA | 9E | 3525 | NA | LGA | SYR | NA | 198 | 22 | 0 | 2013-09-30 22:00:00 |
336774 | 2013 | 9 | 30 | NA | 1210 | NA | NA | 1330 | NA | MQ | 3461 | N535MQ | LGA | BNA | NA | 764 | 12 | 10 | 2013-09-30 12:00:00 |
336775 | 2013 | 9 | 30 | NA | 1159 | NA | NA | 1344 | NA | MQ | 3572 | N511MQ | LGA | CLE | NA | 419 | 11 | 59 | 2013-09-30 11:00:00 |
336776 | 2013 | 9 | 30 | NA | 840 | NA | NA | 1020 | NA | MQ | 3531 | N839MQ | LGA | RDU | NA | 431 | 8 | 40 | 2013-09-30 08:00:00 |
Exercises
-
We forgot to draw the relationship between weather and airports in Figure 19.1. What is the relationship and how should it appear in the diagram?
-
weather only contains information for the three origin airports in NYC. If it contained weather records for all airports in the USA, what additional connection would it make to flights?
-
The year, month, day, hour, and origin variables almost form a compound key for weather, but there’s one hour that has duplicate observations. Can you figure out what’s special about that hour?
-
We know that some days of the year are special and fewer people than usual fly on them (e.g., Christmas eve and Christmas day). How might you represent that data as a data frame? What would be the primary key? How would it connect to the existing data frames?
-
Draw a diagram illustrating the connections between the Batting, People, and Salaries data frames in the Lahman package. Draw another diagram that shows the relationship between People, Managers, AwardsManagers. How would you characterize the relationship between the Batting, Pitching, and Fielding data frames?
19.3 Basic joins
dplyr
provides six join functions:
left_join()
, to add in additional metadata.inner_join()
,right_join()
,full_join()
,semi_join()
,anti_join()
Mutating joins:
A mutating join allows you to combine variables from two data frames: it first matches observations by their keys, then copies across variables from one data frame to the other.
inner_join()
, right_join()
, full_join()
have the same interface as left_join()
. The difference is which rows they keep: left join keeps all the rows in x
, the right join keeps all rows in y
, the full join keeps all rows in either x
or y
, and the inner join only keeps rows that occur in both x
and y
.
flights2 <- flights |>
select(year, time_hour, origin, dest, tailnum, carrier)
flights2
year | time_hour | origin | dest | tailnum | carrier |
---|---|---|---|---|---|
<int> | <dttm> | <chr> | <chr> | <chr> | <chr> |
2013 | 2013-01-01 05:00:00 | EWR | IAH | N14228 | UA |
2013 | 2013-01-01 05:00:00 | LGA | IAH | N24211 | UA |
2013 | 2013-01-01 05:00:00 | JFK | MIA | N619AA | AA |
2013 | 2013-01-01 05:00:00 | JFK | BQN | N804JB | B6 |
2013 | 2013-01-01 06:00:00 | LGA | ATL | N668DN | DL |
2013 | 2013-01-01 05:00:00 | EWR | ORD | N39463 | UA |
2013 | 2013-01-01 06:00:00 | EWR | FLL | N516JB | B6 |
2013 | 2013-01-01 06:00:00 | LGA | IAD | N829AS | EV |
2013 | 2013-01-01 06:00:00 | JFK | MCO | N593JB | B6 |
2013 | 2013-01-01 06:00:00 | LGA | ORD | N3ALAA | AA |
2013 | 2013-01-01 06:00:00 | JFK | PBI | N793JB | B6 |
2013 | 2013-01-01 06:00:00 | JFK | TPA | N657JB | B6 |
2013 | 2013-01-01 06:00:00 | JFK | LAX | N29129 | UA |
2013 | 2013-01-01 06:00:00 | EWR | SFO | N53441 | UA |
2013 | 2013-01-01 06:00:00 | LGA | DFW | N3DUAA | AA |
2013 | 2013-01-01 05:00:00 | JFK | BOS | N708JB | B6 |
2013 | 2013-01-01 06:00:00 | EWR | LAS | N76515 | UA |
2013 | 2013-01-01 06:00:00 | LGA | FLL | N595JB | B6 |
2013 | 2013-01-01 06:00:00 | LGA | ATL | N542MQ | MQ |
2013 | 2013-01-01 06:00:00 | EWR | PBI | N644JB | B6 |
2013 | 2013-01-01 06:00:00 | LGA | MSP | N971DL | DL |
2013 | 2013-01-01 06:00:00 | LGA | DTW | N730MQ | MQ |
2013 | 2013-01-01 06:00:00 | EWR | MIA | N633AA | AA |
2013 | 2013-01-01 06:00:00 | JFK | ATL | N3739P | DL |
2013 | 2013-01-01 06:00:00 | EWR | MIA | N53442 | UA |
2013 | 2013-01-01 06:00:00 | EWR | ORD | N9EAMQ | MQ |
2013 | 2013-01-01 06:00:00 | JFK | SFO | N532UA | UA |
2013 | 2013-01-01 06:00:00 | JFK | RSW | N635JB | B6 |
2013 | 2013-01-01 06:00:00 | JFK | SJU | N794JB | B6 |
2013 | 2013-01-01 06:00:00 | EWR | ATL | N326NB | DL |
... | ... | ... | ... | ... | ... |
2013 | 2013-09-30 21:00:00 | LGA | CHO | N712EV | EV |
2013 | 2013-09-30 21:00:00 | EWR | CLT | N16546 | EV |
2013 | 2013-09-30 21:00:00 | JFK | DEN | N807JB | B6 |
2013 | 2013-09-30 20:00:00 | LGA | RIC | N751EV | EV |
2013 | 2013-09-30 21:00:00 | JFK | DCA | N807MQ | MQ |
2013 | 2013-09-30 21:00:00 | JFK | LAX | N335AA | AA |
2013 | 2013-09-30 21:00:00 | EWR | PWM | N12957 | EV |
2013 | 2013-09-30 21:00:00 | JFK | SJU | N633JB | B6 |
2013 | 2013-09-30 21:00:00 | LGA | FLL | N627JB | B6 |
2013 | 2013-09-30 21:00:00 | EWR | BOS | N813UA | UA |
2013 | 2013-09-30 21:00:00 | EWR | MHT | N10575 | EV |
2013 | 2013-09-30 18:00:00 | JFK | BUF | N906XJ | 9E |
2013 | 2013-09-30 22:00:00 | LGA | BGR | N722EV | EV |
2013 | 2013-09-30 21:00:00 | LGA | BNA | N532MQ | MQ |
2013 | 2013-09-30 20:00:00 | EWR | STL | N12145 | EV |
2013 | 2013-09-30 22:00:00 | JFK | PWM | N193JB | B6 |
2013 | 2013-09-30 21:00:00 | EWR | SFO | N578UA | UA |
2013 | 2013-09-30 20:00:00 | JFK | MCO | N804JB | B6 |
2013 | 2013-09-30 22:00:00 | JFK | BTV | N318JB | B6 |
2013 | 2013-09-30 22:00:00 | JFK | SYR | N354JB | B6 |
2013 | 2013-09-30 22:00:00 | JFK | BUF | N281JB | B6 |
2013 | 2013-09-30 22:00:00 | JFK | ROC | N346JB | B6 |
2013 | 2013-09-30 22:00:00 | JFK | BOS | N565JB | B6 |
2013 | 2013-09-30 23:00:00 | JFK | PSE | N516JB | B6 |
2013 | 2013-09-30 18:00:00 | LGA | BNA | N740EV | EV |
2013 | 2013-09-30 14:00:00 | JFK | DCA | NA | 9E |
2013 | 2013-09-30 22:00:00 | LGA | SYR | NA | 9E |
2013 | 2013-09-30 12:00:00 | LGA | BNA | N535MQ | MQ |
2013 | 2013-09-30 11:00:00 | LGA | CLE | N511MQ | MQ |
2013 | 2013-09-30 08:00:00 | LGA | RDU | N839MQ | MQ |
# add the full airline name to the flights2 data
flights2 |>
left_join(airlines)
[1m[22mJoining with `by = join_by(carrier)`
year | time_hour | origin | dest | tailnum | carrier | name |
---|---|---|---|---|---|---|
<int> | <dttm> | <chr> | <chr> | <chr> | <chr> | <chr> |
2013 | 2013-01-01 05:00:00 | EWR | IAH | N14228 | UA | United Air Lines Inc. |
2013 | 2013-01-01 05:00:00 | LGA | IAH | N24211 | UA | United Air Lines Inc. |
2013 | 2013-01-01 05:00:00 | JFK | MIA | N619AA | AA | American Airlines Inc. |
2013 | 2013-01-01 05:00:00 | JFK | BQN | N804JB | B6 | JetBlue Airways |
2013 | 2013-01-01 06:00:00 | LGA | ATL | N668DN | DL | Delta Air Lines Inc. |
2013 | 2013-01-01 05:00:00 | EWR | ORD | N39463 | UA | United Air Lines Inc. |
2013 | 2013-01-01 06:00:00 | EWR | FLL | N516JB | B6 | JetBlue Airways |
2013 | 2013-01-01 06:00:00 | LGA | IAD | N829AS | EV | ExpressJet Airlines Inc. |
2013 | 2013-01-01 06:00:00 | JFK | MCO | N593JB | B6 | JetBlue Airways |
2013 | 2013-01-01 06:00:00 | LGA | ORD | N3ALAA | AA | American Airlines Inc. |
2013 | 2013-01-01 06:00:00 | JFK | PBI | N793JB | B6 | JetBlue Airways |
2013 | 2013-01-01 06:00:00 | JFK | TPA | N657JB | B6 | JetBlue Airways |
2013 | 2013-01-01 06:00:00 | JFK | LAX | N29129 | UA | United Air Lines Inc. |
2013 | 2013-01-01 06:00:00 | EWR | SFO | N53441 | UA | United Air Lines Inc. |
2013 | 2013-01-01 06:00:00 | LGA | DFW | N3DUAA | AA | American Airlines Inc. |
2013 | 2013-01-01 05:00:00 | JFK | BOS | N708JB | B6 | JetBlue Airways |
2013 | 2013-01-01 06:00:00 | EWR | LAS | N76515 | UA | United Air Lines Inc. |
2013 | 2013-01-01 06:00:00 | LGA | FLL | N595JB | B6 | JetBlue Airways |
2013 | 2013-01-01 06:00:00 | LGA | ATL | N542MQ | MQ | Envoy Air |
2013 | 2013-01-01 06:00:00 | EWR | PBI | N644JB | B6 | JetBlue Airways |
2013 | 2013-01-01 06:00:00 | LGA | MSP | N971DL | DL | Delta Air Lines Inc. |
2013 | 2013-01-01 06:00:00 | LGA | DTW | N730MQ | MQ | Envoy Air |
2013 | 2013-01-01 06:00:00 | EWR | MIA | N633AA | AA | American Airlines Inc. |
2013 | 2013-01-01 06:00:00 | JFK | ATL | N3739P | DL | Delta Air Lines Inc. |
2013 | 2013-01-01 06:00:00 | EWR | MIA | N53442 | UA | United Air Lines Inc. |
2013 | 2013-01-01 06:00:00 | EWR | ORD | N9EAMQ | MQ | Envoy Air |
2013 | 2013-01-01 06:00:00 | JFK | SFO | N532UA | UA | United Air Lines Inc. |
2013 | 2013-01-01 06:00:00 | JFK | RSW | N635JB | B6 | JetBlue Airways |
2013 | 2013-01-01 06:00:00 | JFK | SJU | N794JB | B6 | JetBlue Airways |
2013 | 2013-01-01 06:00:00 | EWR | ATL | N326NB | DL | Delta Air Lines Inc. |
... | ... | ... | ... | ... | ... | ... |
2013 | 2013-09-30 21:00:00 | LGA | CHO | N712EV | EV | ExpressJet Airlines Inc. |
2013 | 2013-09-30 21:00:00 | EWR | CLT | N16546 | EV | ExpressJet Airlines Inc. |
2013 | 2013-09-30 21:00:00 | JFK | DEN | N807JB | B6 | JetBlue Airways |
2013 | 2013-09-30 20:00:00 | LGA | RIC | N751EV | EV | ExpressJet Airlines Inc. |
2013 | 2013-09-30 21:00:00 | JFK | DCA | N807MQ | MQ | Envoy Air |
2013 | 2013-09-30 21:00:00 | JFK | LAX | N335AA | AA | American Airlines Inc. |
2013 | 2013-09-30 21:00:00 | EWR | PWM | N12957 | EV | ExpressJet Airlines Inc. |
2013 | 2013-09-30 21:00:00 | JFK | SJU | N633JB | B6 | JetBlue Airways |
2013 | 2013-09-30 21:00:00 | LGA | FLL | N627JB | B6 | JetBlue Airways |
2013 | 2013-09-30 21:00:00 | EWR | BOS | N813UA | UA | United Air Lines Inc. |
2013 | 2013-09-30 21:00:00 | EWR | MHT | N10575 | EV | ExpressJet Airlines Inc. |
2013 | 2013-09-30 18:00:00 | JFK | BUF | N906XJ | 9E | Endeavor Air Inc. |
2013 | 2013-09-30 22:00:00 | LGA | BGR | N722EV | EV | ExpressJet Airlines Inc. |
2013 | 2013-09-30 21:00:00 | LGA | BNA | N532MQ | MQ | Envoy Air |
2013 | 2013-09-30 20:00:00 | EWR | STL | N12145 | EV | ExpressJet Airlines Inc. |
2013 | 2013-09-30 22:00:00 | JFK | PWM | N193JB | B6 | JetBlue Airways |
2013 | 2013-09-30 21:00:00 | EWR | SFO | N578UA | UA | United Air Lines Inc. |
2013 | 2013-09-30 20:00:00 | JFK | MCO | N804JB | B6 | JetBlue Airways |
2013 | 2013-09-30 22:00:00 | JFK | BTV | N318JB | B6 | JetBlue Airways |
2013 | 2013-09-30 22:00:00 | JFK | SYR | N354JB | B6 | JetBlue Airways |
2013 | 2013-09-30 22:00:00 | JFK | BUF | N281JB | B6 | JetBlue Airways |
2013 | 2013-09-30 22:00:00 | JFK | ROC | N346JB | B6 | JetBlue Airways |
2013 | 2013-09-30 22:00:00 | JFK | BOS | N565JB | B6 | JetBlue Airways |
2013 | 2013-09-30 23:00:00 | JFK | PSE | N516JB | B6 | JetBlue Airways |
2013 | 2013-09-30 18:00:00 | LGA | BNA | N740EV | EV | ExpressJet Airlines Inc. |
2013 | 2013-09-30 14:00:00 | JFK | DCA | NA | 9E | Endeavor Air Inc. |
2013 | 2013-09-30 22:00:00 | LGA | SYR | NA | 9E | Endeavor Air Inc. |
2013 | 2013-09-30 12:00:00 | LGA | BNA | N535MQ | MQ | Envoy Air |
2013 | 2013-09-30 11:00:00 | LGA | CLE | N511MQ | MQ | Envoy Air |
2013 | 2013-09-30 08:00:00 | LGA | RDU | N839MQ | MQ | Envoy Air |
# find out the temperature and wind speed when each plane departed:
flights2 |>
left_join(weather |> select(origin, time_hour, temp, wind_speed))
[1m[22mJoining with `by = join_by(time_hour, origin)`
year | time_hour | origin | dest | tailnum | carrier | temp | wind_speed |
---|---|---|---|---|---|---|---|
<int> | <dttm> | <chr> | <chr> | <chr> | <chr> | <dbl> | <dbl> |
2013 | 2013-01-01 05:00:00 | EWR | IAH | N14228 | UA | 39.02 | 12.65858 |
2013 | 2013-01-01 05:00:00 | LGA | IAH | N24211 | UA | 39.92 | 14.96014 |
2013 | 2013-01-01 05:00:00 | JFK | MIA | N619AA | AA | 39.02 | 14.96014 |
2013 | 2013-01-01 05:00:00 | JFK | BQN | N804JB | B6 | 39.02 | 14.96014 |
2013 | 2013-01-01 06:00:00 | LGA | ATL | N668DN | DL | 39.92 | 16.11092 |
2013 | 2013-01-01 05:00:00 | EWR | ORD | N39463 | UA | 39.02 | 12.65858 |
2013 | 2013-01-01 06:00:00 | EWR | FLL | N516JB | B6 | 37.94 | 11.50780 |
2013 | 2013-01-01 06:00:00 | LGA | IAD | N829AS | EV | 39.92 | 16.11092 |
2013 | 2013-01-01 06:00:00 | JFK | MCO | N593JB | B6 | 37.94 | 13.80936 |
2013 | 2013-01-01 06:00:00 | LGA | ORD | N3ALAA | AA | 39.92 | 16.11092 |
2013 | 2013-01-01 06:00:00 | JFK | PBI | N793JB | B6 | 37.94 | 13.80936 |
2013 | 2013-01-01 06:00:00 | JFK | TPA | N657JB | B6 | 37.94 | 13.80936 |
2013 | 2013-01-01 06:00:00 | JFK | LAX | N29129 | UA | 37.94 | 13.80936 |
2013 | 2013-01-01 06:00:00 | EWR | SFO | N53441 | UA | 37.94 | 11.50780 |
2013 | 2013-01-01 06:00:00 | LGA | DFW | N3DUAA | AA | 39.92 | 16.11092 |
2013 | 2013-01-01 05:00:00 | JFK | BOS | N708JB | B6 | 39.02 | 14.96014 |
2013 | 2013-01-01 06:00:00 | EWR | LAS | N76515 | UA | 37.94 | 11.50780 |
2013 | 2013-01-01 06:00:00 | LGA | FLL | N595JB | B6 | 39.92 | 16.11092 |
2013 | 2013-01-01 06:00:00 | LGA | ATL | N542MQ | MQ | 39.92 | 16.11092 |
2013 | 2013-01-01 06:00:00 | EWR | PBI | N644JB | B6 | 37.94 | 11.50780 |
2013 | 2013-01-01 06:00:00 | LGA | MSP | N971DL | DL | 39.92 | 16.11092 |
2013 | 2013-01-01 06:00:00 | LGA | DTW | N730MQ | MQ | 39.92 | 16.11092 |
2013 | 2013-01-01 06:00:00 | EWR | MIA | N633AA | AA | 37.94 | 11.50780 |
2013 | 2013-01-01 06:00:00 | JFK | ATL | N3739P | DL | 37.94 | 13.80936 |
2013 | 2013-01-01 06:00:00 | EWR | MIA | N53442 | UA | 37.94 | 11.50780 |
2013 | 2013-01-01 06:00:00 | EWR | ORD | N9EAMQ | MQ | 37.94 | 11.50780 |
2013 | 2013-01-01 06:00:00 | JFK | SFO | N532UA | UA | 37.94 | 13.80936 |
2013 | 2013-01-01 06:00:00 | JFK | RSW | N635JB | B6 | 37.94 | 13.80936 |
2013 | 2013-01-01 06:00:00 | JFK | SJU | N794JB | B6 | 37.94 | 13.80936 |
2013 | 2013-01-01 06:00:00 | EWR | ATL | N326NB | DL | 37.94 | 11.50780 |
... | ... | ... | ... | ... | ... | ... | ... |
2013 | 2013-09-30 21:00:00 | LGA | CHO | N712EV | EV | 64.94 | 8.05546 |
2013 | 2013-09-30 21:00:00 | EWR | CLT | N16546 | EV | 62.96 | 3.45234 |
2013 | 2013-09-30 21:00:00 | JFK | DEN | N807JB | B6 | 62.06 | 9.20624 |
2013 | 2013-09-30 20:00:00 | LGA | RIC | N751EV | EV | 64.94 | 6.90468 |
2013 | 2013-09-30 21:00:00 | JFK | DCA | N807MQ | MQ | 62.06 | 9.20624 |
2013 | 2013-09-30 21:00:00 | JFK | LAX | N335AA | AA | 62.06 | 9.20624 |
2013 | 2013-09-30 21:00:00 | EWR | PWM | N12957 | EV | 62.96 | 3.45234 |
2013 | 2013-09-30 21:00:00 | JFK | SJU | N633JB | B6 | 62.06 | 9.20624 |
2013 | 2013-09-30 21:00:00 | LGA | FLL | N627JB | B6 | 64.94 | 8.05546 |
2013 | 2013-09-30 21:00:00 | EWR | BOS | N813UA | UA | 62.96 | 3.45234 |
2013 | 2013-09-30 21:00:00 | EWR | MHT | N10575 | EV | 62.96 | 3.45234 |
2013 | 2013-09-30 18:00:00 | JFK | BUF | N906XJ | 9E | 64.04 | 6.90468 |
2013 | 2013-09-30 22:00:00 | LGA | BGR | N722EV | EV | 64.94 | 6.90468 |
2013 | 2013-09-30 21:00:00 | LGA | BNA | N532MQ | MQ | 64.94 | 8.05546 |
2013 | 2013-09-30 20:00:00 | EWR | STL | N12145 | EV | 64.94 | 3.45234 |
2013 | 2013-09-30 22:00:00 | JFK | PWM | N193JB | B6 | 60.98 | 9.20624 |
2013 | 2013-09-30 21:00:00 | EWR | SFO | N578UA | UA | 62.96 | 3.45234 |
2013 | 2013-09-30 20:00:00 | JFK | MCO | N804JB | B6 | 62.06 | 8.05546 |
2013 | 2013-09-30 22:00:00 | JFK | BTV | N318JB | B6 | 60.98 | 9.20624 |
2013 | 2013-09-30 22:00:00 | JFK | SYR | N354JB | B6 | 60.98 | 9.20624 |
2013 | 2013-09-30 22:00:00 | JFK | BUF | N281JB | B6 | 60.98 | 9.20624 |
2013 | 2013-09-30 22:00:00 | JFK | ROC | N346JB | B6 | 60.98 | 9.20624 |
2013 | 2013-09-30 22:00:00 | JFK | BOS | N565JB | B6 | 60.98 | 9.20624 |
2013 | 2013-09-30 23:00:00 | JFK | PSE | N516JB | B6 | 60.08 | 9.20624 |
2013 | 2013-09-30 18:00:00 | LGA | BNA | N740EV | EV | 66.92 | 9.20624 |
2013 | 2013-09-30 14:00:00 | JFK | DCA | NA | 9E | 68.00 | 11.50780 |
2013 | 2013-09-30 22:00:00 | LGA | SYR | NA | 9E | 64.94 | 6.90468 |
2013 | 2013-09-30 12:00:00 | LGA | BNA | N535MQ | MQ | 69.08 | 5.75390 |
2013 | 2013-09-30 11:00:00 | LGA | CLE | N511MQ | MQ | 66.92 | 8.05546 |
2013 | 2013-09-30 08:00:00 | LGA | RDU | N839MQ | MQ | 60.98 | 5.75390 |
# what size of plane was flying:
flights2 |>
left_join(planes |> select(tailnum, type, engines, seats))
[1m[22mJoining with `by = join_by(tailnum)`
year | time_hour | origin | dest | tailnum | carrier | type | engines | seats |
---|---|---|---|---|---|---|---|---|
<int> | <dttm> | <chr> | <chr> | <chr> | <chr> | <chr> | <int> | <int> |
2013 | 2013-01-01 05:00:00 | EWR | IAH | N14228 | UA | Fixed wing multi engine | 2 | 149 |
2013 | 2013-01-01 05:00:00 | LGA | IAH | N24211 | UA | Fixed wing multi engine | 2 | 149 |
2013 | 2013-01-01 05:00:00 | JFK | MIA | N619AA | AA | Fixed wing multi engine | 2 | 178 |
2013 | 2013-01-01 05:00:00 | JFK | BQN | N804JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-01-01 06:00:00 | LGA | ATL | N668DN | DL | Fixed wing multi engine | 2 | 178 |
2013 | 2013-01-01 05:00:00 | EWR | ORD | N39463 | UA | Fixed wing multi engine | 2 | 191 |
2013 | 2013-01-01 06:00:00 | EWR | FLL | N516JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-01-01 06:00:00 | LGA | IAD | N829AS | EV | Fixed wing multi engine | 2 | 55 |
2013 | 2013-01-01 06:00:00 | JFK | MCO | N593JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-01-01 06:00:00 | LGA | ORD | N3ALAA | AA | NA | NA | NA |
2013 | 2013-01-01 06:00:00 | JFK | PBI | N793JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-01-01 06:00:00 | JFK | TPA | N657JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-01-01 06:00:00 | JFK | LAX | N29129 | UA | Fixed wing multi engine | 2 | 178 |
2013 | 2013-01-01 06:00:00 | EWR | SFO | N53441 | UA | Fixed wing multi engine | 2 | 191 |
2013 | 2013-01-01 06:00:00 | LGA | DFW | N3DUAA | AA | NA | NA | NA |
2013 | 2013-01-01 05:00:00 | JFK | BOS | N708JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-01-01 06:00:00 | EWR | LAS | N76515 | UA | Fixed wing multi engine | 2 | 149 |
2013 | 2013-01-01 06:00:00 | LGA | FLL | N595JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-01-01 06:00:00 | LGA | ATL | N542MQ | MQ | NA | NA | NA |
2013 | 2013-01-01 06:00:00 | EWR | PBI | N644JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-01-01 06:00:00 | LGA | MSP | N971DL | DL | Fixed wing multi engine | 2 | 142 |
2013 | 2013-01-01 06:00:00 | LGA | DTW | N730MQ | MQ | NA | NA | NA |
2013 | 2013-01-01 06:00:00 | EWR | MIA | N633AA | AA | Fixed wing multi engine | 2 | 178 |
2013 | 2013-01-01 06:00:00 | JFK | ATL | N3739P | DL | Fixed wing multi engine | 2 | 189 |
2013 | 2013-01-01 06:00:00 | EWR | MIA | N53442 | UA | Fixed wing multi engine | 2 | 191 |
2013 | 2013-01-01 06:00:00 | EWR | ORD | N9EAMQ | MQ | NA | NA | NA |
2013 | 2013-01-01 06:00:00 | JFK | SFO | N532UA | UA | NA | NA | NA |
2013 | 2013-01-01 06:00:00 | JFK | RSW | N635JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-01-01 06:00:00 | JFK | SJU | N794JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-01-01 06:00:00 | EWR | ATL | N326NB | DL | Fixed wing multi engine | 2 | 145 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2013 | 2013-09-30 21:00:00 | LGA | CHO | N712EV | EV | Fixed wing multi engine | 2 | 80 |
2013 | 2013-09-30 21:00:00 | EWR | CLT | N16546 | EV | Fixed wing multi engine | 2 | 55 |
2013 | 2013-09-30 21:00:00 | JFK | DEN | N807JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-09-30 20:00:00 | LGA | RIC | N751EV | EV | Fixed wing multi engine | 2 | 80 |
2013 | 2013-09-30 21:00:00 | JFK | DCA | N807MQ | MQ | NA | NA | NA |
2013 | 2013-09-30 21:00:00 | JFK | LAX | N335AA | AA | Fixed wing multi engine | 2 | 255 |
2013 | 2013-09-30 21:00:00 | EWR | PWM | N12957 | EV | Fixed wing multi engine | 2 | 55 |
2013 | 2013-09-30 21:00:00 | JFK | SJU | N633JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-09-30 21:00:00 | LGA | FLL | N627JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-09-30 21:00:00 | EWR | BOS | N813UA | UA | Fixed wing multi engine | 2 | 179 |
2013 | 2013-09-30 21:00:00 | EWR | MHT | N10575 | EV | Fixed wing multi engine | 2 | 55 |
2013 | 2013-09-30 18:00:00 | JFK | BUF | N906XJ | 9E | Fixed wing multi engine | 2 | 95 |
2013 | 2013-09-30 22:00:00 | LGA | BGR | N722EV | EV | Fixed wing multi engine | 2 | 80 |
2013 | 2013-09-30 21:00:00 | LGA | BNA | N532MQ | MQ | NA | NA | NA |
2013 | 2013-09-30 20:00:00 | EWR | STL | N12145 | EV | Fixed wing multi engine | 2 | 55 |
2013 | 2013-09-30 22:00:00 | JFK | PWM | N193JB | B6 | Fixed wing multi engine | 2 | 20 |
2013 | 2013-09-30 21:00:00 | EWR | SFO | N578UA | UA | Fixed wing multi engine | 2 | 178 |
2013 | 2013-09-30 20:00:00 | JFK | MCO | N804JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-09-30 22:00:00 | JFK | BTV | N318JB | B6 | Fixed wing multi engine | 2 | 20 |
2013 | 2013-09-30 22:00:00 | JFK | SYR | N354JB | B6 | Fixed wing multi engine | 2 | 20 |
2013 | 2013-09-30 22:00:00 | JFK | BUF | N281JB | B6 | Fixed wing multi engine | 2 | 20 |
2013 | 2013-09-30 22:00:00 | JFK | ROC | N346JB | B6 | Fixed wing multi engine | 2 | 20 |
2013 | 2013-09-30 22:00:00 | JFK | BOS | N565JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-09-30 23:00:00 | JFK | PSE | N516JB | B6 | Fixed wing multi engine | 2 | 200 |
2013 | 2013-09-30 18:00:00 | LGA | BNA | N740EV | EV | Fixed wing multi engine | 2 | 80 |
2013 | 2013-09-30 14:00:00 | JFK | DCA | NA | 9E | NA | NA | NA |
2013 | 2013-09-30 22:00:00 | LGA | SYR | NA | 9E | NA | NA | NA |
2013 | 2013-09-30 12:00:00 | LGA | BNA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-30 11:00:00 | LGA | CLE | N511MQ | MQ | NA | NA | NA |
2013 | 2013-09-30 08:00:00 | LGA | RDU | N839MQ | MQ | NA | NA | NA |
# When left_join() fails to find a match for a row in x, it fills in the new variables with missing values.
flights2 |>
filter(tailnum == "N535MQ") |>
left_join(planes |> select(tailnum, type, engines, seats))
[1m[22mJoining with `by = join_by(tailnum)`
year | time_hour | origin | dest | tailnum | carrier | type | engines | seats |
---|---|---|---|---|---|---|---|---|
<int> | <dttm> | <chr> | <chr> | <chr> | <chr> | <chr> | <int> | <int> |
2013 | 2013-01-02 16:00:00 | LGA | BNA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-03 06:00:00 | LGA | CLT | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-03 11:00:00 | LGA | MSP | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-03 18:00:00 | LGA | MSP | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-11 16:00:00 | LGA | ATL | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-12 12:00:00 | LGA | BNA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-12 18:00:00 | LGA | CLE | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-13 15:00:00 | LGA | CLT | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-13 20:00:00 | LGA | ATL | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-14 08:00:00 | LGA | ATL | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-14 20:00:00 | LGA | ATL | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-18 11:00:00 | JFK | DCA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-19 15:00:00 | JFK | DCA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-19 19:00:00 | JFK | CMH | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-20 08:00:00 | LGA | ATL | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-20 14:00:00 | LGA | MSP | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-20 21:00:00 | LGA | BNA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-21 09:00:00 | LGA | ATL | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-23 18:00:00 | EWR | ORD | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-25 06:00:00 | LGA | MSP | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-26 08:00:00 | LGA | ATL | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-26 15:00:00 | LGA | CLT | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-27 11:00:00 | LGA | MSP | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-28 09:00:00 | LGA | CLT | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-28 14:00:00 | LGA | ATL | N535MQ | MQ | NA | NA | NA |
2013 | 2013-01-31 12:00:00 | EWR | ORD | N535MQ | MQ | NA | NA | NA |
2013 | 2013-10-01 09:00:00 | LGA | CLT | N535MQ | MQ | NA | NA | NA |
2013 | 2013-10-01 14:00:00 | LGA | MSP | N535MQ | MQ | NA | NA | NA |
2013 | 2013-10-01 21:00:00 | LGA | BNA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-10-02 15:00:00 | LGA | CLT | N535MQ | MQ | NA | NA | NA |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2013 | 2013-08-30 10:00:00 | EWR | ORD | N535MQ | MQ | NA | NA | NA |
2013 | 2013-08-30 15:00:00 | EWR | ORD | N535MQ | MQ | NA | NA | NA |
2013 | 2013-08-31 13:00:00 | EWR | ORD | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-03 13:00:00 | EWR | ORD | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-05 15:00:00 | EWR | ORD | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-13 13:00:00 | EWR | ORD | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-18 10:00:00 | LGA | ATL | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-18 16:00:00 | LGA | BNA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-19 06:00:00 | LGA | ATL | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-19 12:00:00 | LGA | MSP | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-20 13:00:00 | LGA | CLE | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-20 18:00:00 | LGA | CLE | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-21 11:00:00 | LGA | MSP | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-23 11:00:00 | JFK | DCA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-25 06:00:00 | EWR | ORD | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-25 20:00:00 | JFK | CMH | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-26 11:00:00 | JFK | DCA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-26 15:00:00 | JFK | DCA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-27 11:00:00 | LGA | CLE | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-27 15:00:00 | LGA | RDU | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-27 21:00:00 | LGA | CLT | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-28 11:00:00 | LGA | CLE | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-29 08:00:00 | LGA | DTW | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-29 13:00:00 | LGA | RDU | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-29 17:00:00 | LGA | RDU | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-29 21:00:00 | LGA | BNA | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-30 11:00:00 | LGA | RDU | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-30 15:00:00 | LGA | RDU | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-30 21:00:00 | LGA | RDU | N535MQ | MQ | NA | NA | NA |
2013 | 2013-09-30 12:00:00 | LGA | BNA | N535MQ | MQ | NA | NA | NA |
Specifying join keys
By default, left_join() will use all variables that appear in both data frames as the join key, the so called natural join. This is a useful heuristic, but it doesn’t always work, for example, both dataframes have the same column’s name (but with different meanings)
flights2 |>
left_join(airports, join_by(origin == faa))
year | time_hour | origin | dest | tailnum | carrier | name | lat | lon | alt | tz | dst | tzone |
---|---|---|---|---|---|---|---|---|---|---|---|---|
<int> | <dttm> | <chr> | <chr> | <chr> | <chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> |
2013 | 2013-01-01 05:00:00 | EWR | IAH | N14228 | UA | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-01-01 05:00:00 | LGA | IAH | N24211 | UA | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-01-01 05:00:00 | JFK | MIA | N619AA | AA | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-01-01 05:00:00 | JFK | BQN | N804JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | LGA | ATL | N668DN | DL | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-01-01 05:00:00 | EWR | ORD | N39463 | UA | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | EWR | FLL | N516JB | B6 | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | LGA | IAD | N829AS | EV | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | JFK | MCO | N593JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | LGA | ORD | N3ALAA | AA | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | JFK | PBI | N793JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | JFK | TPA | N657JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | JFK | LAX | N29129 | UA | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | EWR | SFO | N53441 | UA | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | LGA | DFW | N3DUAA | AA | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-01-01 05:00:00 | JFK | BOS | N708JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | EWR | LAS | N76515 | UA | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | LGA | FLL | N595JB | B6 | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | LGA | ATL | N542MQ | MQ | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | EWR | PBI | N644JB | B6 | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | LGA | MSP | N971DL | DL | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | LGA | DTW | N730MQ | MQ | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | EWR | MIA | N633AA | AA | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | JFK | ATL | N3739P | DL | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | EWR | MIA | N53442 | UA | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | EWR | ORD | N9EAMQ | MQ | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | JFK | SFO | N532UA | UA | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | JFK | RSW | N635JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | JFK | SJU | N794JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-01-01 06:00:00 | EWR | ATL | N326NB | DL | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2013 | 2013-09-30 21:00:00 | LGA | CHO | N712EV | EV | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-09-30 21:00:00 | EWR | CLT | N16546 | EV | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-09-30 21:00:00 | JFK | DEN | N807JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 20:00:00 | LGA | RIC | N751EV | EV | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-09-30 21:00:00 | JFK | DCA | N807MQ | MQ | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 21:00:00 | JFK | LAX | N335AA | AA | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 21:00:00 | EWR | PWM | N12957 | EV | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-09-30 21:00:00 | JFK | SJU | N633JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 21:00:00 | LGA | FLL | N627JB | B6 | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-09-30 21:00:00 | EWR | BOS | N813UA | UA | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-09-30 21:00:00 | EWR | MHT | N10575 | EV | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-09-30 18:00:00 | JFK | BUF | N906XJ | 9E | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 22:00:00 | LGA | BGR | N722EV | EV | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-09-30 21:00:00 | LGA | BNA | N532MQ | MQ | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-09-30 20:00:00 | EWR | STL | N12145 | EV | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-09-30 22:00:00 | JFK | PWM | N193JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 21:00:00 | EWR | SFO | N578UA | UA | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
2013 | 2013-09-30 20:00:00 | JFK | MCO | N804JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 22:00:00 | JFK | BTV | N318JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 22:00:00 | JFK | SYR | N354JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 22:00:00 | JFK | BUF | N281JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 22:00:00 | JFK | ROC | N346JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 22:00:00 | JFK | BOS | N565JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 23:00:00 | JFK | PSE | N516JB | B6 | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 18:00:00 | LGA | BNA | N740EV | EV | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-09-30 14:00:00 | JFK | DCA | NA | 9E | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
2013 | 2013-09-30 22:00:00 | LGA | SYR | NA | 9E | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-09-30 12:00:00 | LGA | BNA | N535MQ | MQ | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-09-30 11:00:00 | LGA | CLE | N511MQ | MQ | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
2013 | 2013-09-30 08:00:00 | LGA | RDU | N839MQ | MQ | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
Filtering joins
The primary action of a filtering join is to filter the rows.
semi_join()
keep all rows inx
that have a match iny
.anti_join()
return all rows inx
that don’t have a match iny
. They’re useful for finding missing values that are implicit in the data.
airports |>
semi_join(flights2, join_by(faa == origin))
faa | name | lat | lon | alt | tz | dst | tzone |
---|---|---|---|---|---|---|---|
<chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> |
EWR | Newark Liberty Intl | 40.69250 | -74.16867 | 18 | -5 | A | America/New_York |
JFK | John F Kennedy Intl | 40.63975 | -73.77893 | 13 | -5 | A | America/New_York |
LGA | La Guardia | 40.77725 | -73.87261 | 22 | -5 | A | America/New_York |
airports |>
semi_join(flights2, join_by(faa == dest))
faa | name | lat | lon | alt | tz | dst | tzone |
---|---|---|---|---|---|---|---|
<chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> |
ABQ | Albuquerque International Sunport | 35.04022 | -106.60919 | 5355 | -7 | A | America/Denver |
ACK | Nantucket Mem | 41.25305 | -70.06018 | 48 | -5 | A | America/New_York |
ALB | Albany Intl | 42.74827 | -73.80169 | 285 | -5 | A | America/New_York |
ANC | Ted Stevens Anchorage Intl | 61.17436 | -149.99636 | 152 | -9 | A | America/Anchorage |
ATL | Hartsfield Jackson Atlanta Intl | 33.63672 | -84.42807 | 1026 | -5 | A | America/New_York |
AUS | Austin Bergstrom Intl | 30.19453 | -97.66989 | 542 | -6 | A | America/Chicago |
AVL | Asheville Regional Airport | 35.43619 | -82.54181 | 2165 | -5 | A | America/New_York |
BDL | Bradley Intl | 41.93889 | -72.68322 | 173 | -5 | A | America/New_York |
BGR | Bangor Intl | 44.80744 | -68.82814 | 192 | -5 | A | America/New_York |
BHM | Birmingham Intl | 33.56294 | -86.75355 | 644 | -6 | A | America/Chicago |
BNA | Nashville Intl | 36.12447 | -86.67819 | 599 | -6 | A | America/Chicago |
BOS | General Edward Lawrence Logan Intl | 42.36435 | -71.00518 | 19 | -5 | A | America/New_York |
BTV | Burlington Intl | 44.47186 | -73.15328 | 335 | -5 | A | America/New_York |
BUF | Buffalo Niagara Intl | 42.94053 | -78.73217 | 724 | -5 | A | America/New_York |
BUR | Bob Hope | 34.20067 | -118.35867 | 778 | -8 | A | America/Los_Angeles |
BWI | Baltimore Washington Intl | 39.17536 | -76.66833 | 146 | -5 | A | America/New_York |
BZN | Gallatin Field | 45.77764 | -111.16015 | 4500 | -7 | A | America/Denver |
CAE | Columbia Metropolitan | 33.93883 | -81.11953 | 236 | -5 | A | America/New_York |
CAK | Akron Canton Regional Airport | 40.91608 | -81.44219 | 1228 | -5 | A | America/New_York |
CHO | Charlottesville-Albemarle | 38.13864 | -78.45286 | 639 | -5 | A | America/New_York |
CHS | Charleston Afb Intl | 32.89865 | -80.04053 | 45 | -5 | A | America/New_York |
CLE | Cleveland Hopkins Intl | 41.41169 | -81.84979 | 791 | -5 | A | America/New_York |
CLT | Charlotte Douglas Intl | 35.21400 | -80.94314 | 748 | -5 | A | America/New_York |
CMH | Port Columbus Intl | 39.99797 | -82.89189 | 815 | -5 | A | America/New_York |
CRW | Yeager | 38.37315 | -81.59319 | 981 | -5 | A | America/New_York |
CVG | Cincinnati Northern Kentucky Intl | 39.04884 | -84.66782 | 896 | -5 | A | America/New_York |
DAY | James M Cox Dayton Intl | 39.90237 | -84.21937 | 1009 | -5 | A | America/New_York |
DCA | Ronald Reagan Washington Natl | 38.85208 | -77.03772 | 15 | -5 | A | America/New_York |
DEN | Denver Intl | 39.86166 | -104.67318 | 5431 | -7 | A | America/Denver |
DFW | Dallas Fort Worth Intl | 32.89683 | -97.03800 | 607 | -6 | A | America/Chicago |
... | ... | ... | ... | ... | ... | ... | ... |
PDX | Portland Intl | 45.58872 | -122.59750 | 30 | -8 | A | America/Los_Angeles |
PHL | Philadelphia Intl | 39.87194 | -75.24114 | 36 | -5 | A | America/New_York |
PHX | Phoenix Sky Harbor Intl | 33.43428 | -112.01158 | 1135 | -7 | N | America/Phoenix |
PIT | Pittsburgh Intl | 40.49147 | -80.23287 | 1204 | -5 | A | America/New_York |
PSP | Palm Springs Intl | 33.82967 | -116.50669 | 477 | -8 | A | America/Los_Angeles |
PVD | Theodore Francis Green State | 41.73258 | -71.42038 | 55 | -5 | A | America/New_York |
PWM | Portland Intl Jetport | 43.64616 | -70.30928 | 77 | -5 | A | America/New_York |
RDU | Raleigh Durham Intl | 35.87764 | -78.78747 | 435 | -5 | A | America/New_York |
RIC | Richmond Intl | 37.50517 | -77.31967 | 167 | -5 | A | America/New_York |
ROC | Greater Rochester Intl | 43.11887 | -77.67239 | 559 | -5 | A | America/New_York |
RSW | Southwest Florida Intl | 26.53617 | -81.75517 | 30 | -5 | A | America/New_York |
SAN | San Diego Intl | 32.73356 | -117.18967 | 17 | -8 | A | America/Los_Angeles |
SAT | San Antonio Intl | 29.53369 | -98.46978 | 809 | -6 | A | America/Chicago |
SAV | Savannah Hilton Head Intl | 32.12758 | -81.20214 | 51 | -5 | A | America/New_York |
SBN | South Bend Rgnl | 41.70866 | -86.31725 | 799 | -5 | A | America/New_York |
SDF | Louisville International Airport | 38.17409 | -85.73650 | 501 | -5 | A | America/New_York |
SEA | Seattle Tacoma Intl | 47.44900 | -122.30931 | 433 | -8 | A | America/Los_Angeles |
SFO | San Francisco Intl | 37.61897 | -122.37489 | 13 | -8 | A | America/Los_Angeles |
SJC | Norman Y Mineta San Jose Intl | 37.36260 | -121.92902 | 62 | -8 | A | America/Los_Angeles |
SLC | Salt Lake City Intl | 40.78839 | -111.97777 | 4227 | -7 | A | America/Denver |
SMF | Sacramento Intl | 38.69542 | -121.59078 | 27 | -8 | A | America/Los_Angeles |
SNA | John Wayne Arpt Orange Co | 33.67567 | -117.86822 | 56 | -8 | A | America/Los_Angeles |
SRQ | Sarasota Bradenton Intl | 27.39544 | -82.55439 | 30 | -5 | A | America/New_York |
STL | Lambert St Louis Intl | 38.74870 | -90.37003 | 618 | -6 | A | America/Chicago |
SYR | Syracuse Hancock Intl | 43.11119 | -76.10631 | 421 | -5 | A | America/New_York |
TPA | Tampa Intl | 27.97547 | -82.53325 | 26 | -5 | A | America/New_York |
TUL | Tulsa Intl | 36.19839 | -95.88811 | 677 | -6 | A | America/Chicago |
TVC | Cherry Capital Airport | 44.74144 | -85.58223 | 624 | -5 | A | America/New_York |
TYS | Mc Ghee Tyson | 35.81097 | -83.99403 | 981 | -5 | A | America/New_York |
XNA | NW Arkansas Regional | 36.28187 | -94.30681 | 1287 | -6 | A | America/Chicago |
# find rows that are missing from airports by looking for flights that don’t have a matching destination airport.
flights2 |>
anti_join(airports, join_by(dest == faa)) |>
distinct(dest)
dest |
---|
<chr> |
BQN |
SJU |
STT |
PSE |
# find which tailnums are missing from planes
flights2 |>
anti_join(planes, join_by(tailnum)) |>
distinct(tailnum)
tailnum |
---|
<chr> |
N3ALAA |
N3DUAA |
N542MQ |
N730MQ |
N9EAMQ |
N532UA |
N3EMAA |
N518MQ |
N3BAAA |
N3CYAA |
N426US |
N3GKAA |
N4WNAA |
N5FMAA |
N722MQ |
N3EKAA |
N3ETAA |
N541AA |
N4WRAA |
N4WPAA |
N508MQ |
N3HMAA |
N828MQ |
N3GEAA |
N739MQ |
N531MQ |
N527JB |
N846MQ |
N3GVAA |
N4YCAA |
... |
N7BAAA |
N7BVAA |
N626MQ |
N675MQ |
N580AA |
N717MQ |
N738MQ |
N720MQ |
N7ASAA |
N328AT |
N735MQ |
N5EDAA |
N5DJAA |
N7ALAA |
N721MQ |
N7BGAA |
N5ESAA |
N456UW |
N838MQ |
N442US |
N502SW |
N451UW |
N7BKAA |
N800MQ |
N7CAAA |
N823MQ |
N5FCAA |
N5ERAA |
N654MQ |
N647MQ |
Exercises
-
Find the 48 hours (over the course of the whole year) that have the worst delays. Cross-reference it with the weather data. Can you see any patterns?
-
Imagine you’ve found the top 10 most popular destinations using this code:
top_dest <- flights2 |>
count(dest, sort = TRUE) |>
head(10)
How can you find all flights to those destinations?
-
Does every departing flight have corresponding weather data for that hour?
-
What do the tail numbers that don’t have a matching record in planes have in common? (Hint: one variable explains ~90% of the problems.)
-
Add a column to planes that lists every carrier that has flown that plane. You might expect that there’s an implicit relationship between plane and airline, because each plane is flown by a single airline. Confirm or reject this hypothesis using the tools you’ve learned in previous chapters.
-
Add the latitude and the longitude of the origin and destination airport to flights. Is it easier to rename the columns before or after the join?
- Compute the average delay by destination, then join on the airports data frame so you can show the spatial distribution of delays. Here’s an easy way to draw a map of the United States:
airports |> semi_join(flights, join_by(faa == dest)) |> ggplot(aes(x = lon, y = lat)) + borders("state") + geom_point() + coord_quickmap()
You might want to use the size or color of the points to display the average delay for each airport.
- What happened on June 13 2013? Draw a map of the delays, and then use Google to cross-reference with the weather.
19.4 How do joins work?
{width=30%} {width=30%} {width=30%}
{width=30%} {width=30%}
x <- tribble(
~key, ~val_x,
1, "x1",
2, "x2",
3, "x3"
)
y <- tribble(
~key, ~val_y,
1, "y1",
2, "y2",
4, "y3"
)
x |>
inner_join(y)
[1m[22mJoining with `by = join_by(key)`
key | val_x | val_y |
---|---|---|
<dbl> | <chr> | <chr> |
1 | x1 | y1 |
2 | x2 | y2 |
x |>
left_join(y)
[1m[22mJoining with `by = join_by(key)`
key | val_x | val_y |
---|---|---|
<dbl> | <chr> | <chr> |
1 | x1 | y1 |
2 | x2 | y2 |
3 | x3 | NA |
x |>
right_join(y)
[1m[22mJoining with `by = join_by(key)`
key | val_x | val_y |
---|---|---|
<dbl> | <chr> | <chr> |
1 | x1 | y1 |
2 | x2 | y2 |
4 | NA | y3 |
full_join(x,y)
[1m[22mJoining with `by = join_by(key)`
key | val_x | val_y |
---|---|---|
<dbl> | <chr> | <chr> |
1 | x1 | y1 |
2 | x2 | y2 |
3 | x3 | NA |
4 | NA | y3 |
Row matching
{width=30%}
one particularly dangerous case which can cause a combinatorial explosion of rows.
Filterring joins
{width=30%} {width=30%}
- In a semi-join it only matters that there is a match; otherwise values in y don’t affect the output.
- An anti-join is the inverse of a semi-join, dropping rows from x that have a match in y.
semi_join(x, y)
anti_join(x, y)
[1m[22mJoining with `by = join_by(key)`
key | val_x |
---|---|
<dbl> | <chr> |
1 | x1 |
2 | x2 |
[1m[22mJoining with `by = join_by(key)`
key | val_x |
---|---|
<dbl> | <chr> |
3 | x3 |
19.5 Non-equi joins
- Cross joins match every pair of rows.
- Inequality joins use <, <=, >, and >= instead of ==.
- Rolling joins are similar to inequality joins but only find the closest match.
- Overlap joins are a special type of inequality join designed to work with ranges.
# equi join (where the rows match if the x key equals the y key)
x |>
inner_join(y, join_by(key == key), keep = TRUE)
key.x | val_x | key.y | val_y |
---|---|---|---|
<dbl> | <chr> | <dbl> | <chr> |
1 | x1 | 1 | y1 |
2 | x2 | 2 | y2 |
# non-equi join
x |>
inner_join(y, join_by(key >= key), keep = TRUE)
key.x | val_x | key.y | val_y |
---|---|---|---|
<dbl> | <chr> | <dbl> | <chr> |
1 | x1 | 1 | y1 |
2 | x2 | 1 | y1 |
2 | x2 | 2 | y2 |
3 | x3 | 1 | y1 |
3 | x3 | 2 | y2 |
{width=30%} {width=30%} {width=30%}
cross_join(x,y)
key.x | val_x | key.y | val_y |
---|---|---|---|
<dbl> | <chr> | <dbl> | <chr> |
1 | x1 | 1 | y1 |
1 | x1 | 2 | y2 |
1 | x1 | 4 | y3 |
2 | x2 | 1 | y1 |
2 | x2 | 2 | y2 |
2 | x2 | 4 | y3 |
3 | x3 | 1 | y1 |
3 | x3 | 2 | y2 |
3 | x3 | 4 | y3 |
inner_join(x, y, join_by(key < key))
key.x | val_x | key.y | val_y |
---|---|---|---|
<dbl> | <chr> | <dbl> | <chr> |
1 | x1 | 2 | y2 |
1 | x1 | 4 | y3 |
2 | x2 | 4 | y3 |
3 | x3 | 4 | y3 |
left_join(x, y, join_by(closest(key >= key)))
key.x | val_x | key.y | val_y |
---|---|---|---|
<dbl> | <chr> | <dbl> | <chr> |
1 | x1 | 1 | y1 |
2 | x2 | 2 | y2 |
3 | x3 | 2 | y2 |
anti_join(x, y, join_by(closest(key >= key)))
key | val_x |
---|---|
<dbl> | <chr> |
Overlap join
parties <- tibble(
q = 1:4,
party = ymd(c("2022-01-10", "2022-04-04", "2022-07-11", "2022-10-03")),
start = ymd(c("2022-01-01", "2022-04-04", "2022-07-11", "2022-10-03")),
end = ymd(c("2022-04-03", "2022-07-11", "2022-10-02", "2022-12-31"))
)
parties
q | party | start | end |
---|---|---|---|
<int> | <date> | <date> | <date> |
1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
2 | 2022-04-04 | 2022-04-04 | 2022-07-11 |
3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
4 | 2022-10-03 | 2022-10-03 | 2022-12-31 |
parties |>
inner_join(parties, join_by(overlaps(start, end, start, end), q < q)) |>
select(start.x, end.x, start.y, end.y)
start.x | end.x | start.y | end.y |
---|---|---|---|
<date> | <date> | <date> | <date> |
2022-04-04 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
parties <- tibble(
q = 1:4,
party = ymd(c("2022-01-10", "2022-04-04", "2022-07-11", "2022-10-03")),
start = ymd(c("2022-01-01", "2022-04-04", "2022-07-11", "2022-10-03")),
end = ymd(c("2022-04-03", "2022-07-10", "2022-10-02", "2022-12-31"))
)
set.seed(123)
employees <- tibble(
name = sample(babynames::babynames$name, 100),
birthday = ymd("2022-01-01") + (sample(365, 100, replace = TRUE) - 1)
)
employees
name | birthday |
---|---|
<chr> | <date> |
Kemba | 2022-01-22 |
Orean | 2022-06-26 |
Kirstyn | 2022-02-11 |
Amparo | 2022-11-11 |
Belen | 2022-03-25 |
Rayshaun | 2022-01-11 |
Brazil | 2022-05-01 |
Chaston | 2022-10-29 |
Reyn | 2022-03-26 |
Ogechi | 2022-12-31 |
Raylin | 2022-07-13 |
Elisha | 2022-04-17 |
Francico | 2022-03-18 |
Theoplis | 2022-07-17 |
Ashea | 2022-09-06 |
Angela | 2022-07-19 |
Essie | 2022-06-09 |
Allyn | 2022-09-07 |
Tanita | 2022-10-19 |
Sherriann | 2022-01-16 |
Raven | 2022-02-02 |
Glenys | 2022-02-09 |
Li | 2022-01-10 |
Miana | 2022-07-19 |
Sewell | 2022-05-05 |
Wayland | 2022-09-22 |
Nadean | 2022-09-20 |
Gregoria | 2022-07-05 |
Sumiye | 2022-03-02 |
Norvell | 2022-09-09 |
... | ... |
Liora | 2022-07-31 |
Andrina | 2022-08-10 |
Gay | 2022-06-08 |
Dustyn | 2022-12-15 |
Nasiya | 2022-05-25 |
Belma | 2022-02-26 |
Thena | 2022-05-28 |
Cooper | 2022-06-12 |
Adele | 2022-08-26 |
Atiana | 2022-06-10 |
Jamil | 2022-03-07 |
Nalani | 2022-01-04 |
Nathalie | 2022-11-26 |
Duriel | 2022-08-13 |
Yesenia | 2022-04-27 |
Sherlie | 2022-01-25 |
Brooksley | 2022-05-16 |
Kristi | 2022-02-24 |
Alethea | 2022-08-05 |
Teandre | 2022-03-26 |
Mohamedali | 2022-02-14 |
Audray | 2022-05-26 |
Elta | 2022-06-19 |
Suzannah | 2022-05-14 |
Cleve | 2022-07-18 |
Promise | 2022-12-27 |
Ian | 2022-06-25 |
Shelva | 2022-09-02 |
Aedan | 2022-11-05 |
Dorianna | 2022-04-14 |
employees |>
inner_join(parties, join_by(between(birthday, start, end)), unmatched = "error")
name | birthday | q | party | start | end |
---|---|---|---|---|---|
<chr> | <date> | <int> | <date> | <date> | <date> |
Kemba | 2022-01-22 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Orean | 2022-06-26 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Kirstyn | 2022-02-11 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Amparo | 2022-11-11 | 4 | 2022-10-03 | 2022-10-03 | 2022-12-31 |
Belen | 2022-03-25 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Rayshaun | 2022-01-11 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Brazil | 2022-05-01 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Chaston | 2022-10-29 | 4 | 2022-10-03 | 2022-10-03 | 2022-12-31 |
Reyn | 2022-03-26 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Ogechi | 2022-12-31 | 4 | 2022-10-03 | 2022-10-03 | 2022-12-31 |
Raylin | 2022-07-13 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Elisha | 2022-04-17 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Francico | 2022-03-18 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Theoplis | 2022-07-17 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Ashea | 2022-09-06 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Angela | 2022-07-19 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Essie | 2022-06-09 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Allyn | 2022-09-07 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Tanita | 2022-10-19 | 4 | 2022-10-03 | 2022-10-03 | 2022-12-31 |
Sherriann | 2022-01-16 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Raven | 2022-02-02 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Glenys | 2022-02-09 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Li | 2022-01-10 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Miana | 2022-07-19 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Sewell | 2022-05-05 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Wayland | 2022-09-22 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Nadean | 2022-09-20 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Gregoria | 2022-07-05 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Sumiye | 2022-03-02 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Norvell | 2022-09-09 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
... | ... | ... | ... | ... | ... |
Liora | 2022-07-31 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Andrina | 2022-08-10 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Gay | 2022-06-08 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Dustyn | 2022-12-15 | 4 | 2022-10-03 | 2022-10-03 | 2022-12-31 |
Nasiya | 2022-05-25 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Belma | 2022-02-26 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Thena | 2022-05-28 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Cooper | 2022-06-12 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Adele | 2022-08-26 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Atiana | 2022-06-10 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Jamil | 2022-03-07 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Nalani | 2022-01-04 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Nathalie | 2022-11-26 | 4 | 2022-10-03 | 2022-10-03 | 2022-12-31 |
Duriel | 2022-08-13 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Yesenia | 2022-04-27 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Sherlie | 2022-01-25 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Brooksley | 2022-05-16 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Kristi | 2022-02-24 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Alethea | 2022-08-05 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Teandre | 2022-03-26 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Mohamedali | 2022-02-14 | 1 | 2022-01-10 | 2022-01-01 | 2022-04-03 |
Audray | 2022-05-26 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Elta | 2022-06-19 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Suzannah | 2022-05-14 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Cleve | 2022-07-18 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Promise | 2022-12-27 | 4 | 2022-10-03 | 2022-10-03 | 2022-12-31 |
Ian | 2022-06-25 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Shelva | 2022-09-02 | 3 | 2022-07-11 | 2022-07-11 | 2022-10-02 |
Aedan | 2022-11-05 | 4 | 2022-10-03 | 2022-10-03 | 2022-12-31 |
Dorianna | 2022-04-14 | 2 | 2022-04-04 | 2022-04-04 | 2022-07-10 |
Exercises
x |> full_join(y, join_by(key == key))
key | val_x | val_y |
---|---|---|
<dbl> | <chr> | <chr> |
1 | x1 | y1 |
2 | x2 | y2 |
3 | x3 | NA |
4 | NA | y3 |
x |> full_join(y, join_by(key == key), keep = TRUE)
key.x | val_x | key.y | val_y |
---|---|---|---|
<dbl> | <chr> | <dbl> | <chr> |
1 | x1 | 1 | y1 |
2 | x2 | 2 | y2 |
3 | x3 | NA | NA |
NA | NA | 4 | y3 |
- When finding if any party period overlapped with another party period we used q < q in the join_by()? Why? What happens if you remove this inequality?`