Grundläggande dplyr

arrange ordnar raderna i data

mtcars <- tibble::rownames_to_column(mtcars, var = "model")
head(arrange(mtcars, mpg))
##                 model  mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1  Cadillac Fleetwood 10.4   8  472 205 2.93 5.250 17.98  0  0    3    4
## 2 Lincoln Continental 10.4   8  460 215 3.00 5.424 17.82  0  0    3    4
## 3          Camaro Z28 13.3   8  350 245 3.73 3.840 15.41  0  0    3    4
## 4          Duster 360 14.3   8  360 245 3.21 3.570 15.84  0  0    3    4
## 5   Chrysler Imperial 14.7   8  440 230 3.23 5.345 17.42  0  0    3    4
## 6       Maserati Bora 15.0   8  301 335 3.54 3.570 14.60  0  1    5    8

arrange ordnar raderna i data

head(arrange(mtcars, mpg, disp))
##                 model  mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1 Lincoln Continental 10.4   8  460 215 3.00 5.424 17.82  0  0    3    4
## 2  Cadillac Fleetwood 10.4   8  472 205 2.93 5.250 17.98  0  0    3    4
## 3          Camaro Z28 13.3   8  350 245 3.73 3.840 15.41  0  0    3    4
## 4          Duster 360 14.3   8  360 245 3.21 3.570 15.84  0  0    3    4
## 5   Chrysler Imperial 14.7   8  440 230 3.23 5.345 17.42  0  0    3    4
## 6       Maserati Bora 15.0   8  301 335 3.54 3.570 14.60  0  1    5    8

filter välj ut rader (observationer)

head(filter(mtcars, am == 1)) #only those with manual transmission
##            model  mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1      Mazda RX4 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 2  Mazda RX4 Wag 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 3     Datsun 710 22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 4       Fiat 128 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 5    Honda Civic 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 6 Toyota Corolla 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1

filter välj ut rader (observationer)

head(filter(mtcars, mpg < 30))
##               model  mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1         Mazda RX4 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## 2     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## 3        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## 4    Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## 5 Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## 6           Valiant 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

mutate inför ny/transformerar variabel

head(mutate(mtcars, lpm = 235 / mpg))
##               model  mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1         Mazda RX4 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## 2     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## 3        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## 4    Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## 5 Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## 6           Valiant 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
##        lpm
## 1 11.19048
## 2 11.19048
## 3 10.30702
## 4 10.98131
## 5 12.56684
## 6 12.98343

select väljer ut variabler (kolumner)

head(select(mtcars, model, mpg))
##               model  mpg
## 1         Mazda RX4 21.0
## 2     Mazda RX4 Wag 21.0
## 3        Datsun 710 22.8
## 4    Hornet 4 Drive 21.4
## 5 Hornet Sportabout 18.7
## 6           Valiant 18.1

Pipe %>%

Pipe %>%

Bestäm \(h\circ g \circ f(a) = h(g(f(a)))\)

Tre olika sätt att räkna detta i R:

b <- f(a)
c <- g(b)
h(c)
h(f(g(a)))
a %>%  
    f %>%
    g %>% 
    h

Pipe %>%

mtcars <- mutate(mtcars, lpm = 235 / mpg)
mtcars <- filter(mtcars, am == 1)
ggplot(mtcars, aes(x = hp, y = lpm)) + geom_point()
ggplot(
    filter(
        mutate(mtcars, lpm = 235 / mpg)
        , am ==1), 
    aes(x = hp, y = lpm)) + geom_point()
mtcars %>% 
    mutate(lpm = 235 / mpg) %>% 
    filter(am == 1) %>% 
    ggplot(aes(x = hp, y = lpm)) + geom_point()
    

Grundläggande ggplot2

ggplot2

En statistisk plot har beståndsdelar (“satsdelar”)

  • data
  • geom: typ av geometriska objekt (punkter, linjer, …)
  • cord: koordinatsystem
  • mapping: binder data till koordinatsysteets dimensioner/“aesthetics” (läge, färg, form, storlek, …)

ggplot2

En scatterplot

  • data: mpg och hp för ett antal bilar
  • geom: punkter
  • coord: Kartesiska
  • mapping: binder hp till position på x-axeln och mpgy-axeln

ggplot2

ggplot(data = mtcars, mapping = aes(x = hp, y = mpg)) + geom_point()

ggplot2

ggplot(mtcars, 
       aes(x = hp, y = mpg, size = wt, color = cyl)) + 
    geom_point()

ggplot2

ggplot(mtcars, 
       aes(x = hp, y = mpg, size = wt, color = as.factor(cyl))) + 
    geom_point()