1 Zakład Bioinformatyki, Instytut Informatyki, Uniwersytet w Białymstoku
✉ Correspondence: Jarosław Kotowicz <j.kotowicz@uwb.edu.pl>
R for Data Science (rozdział 28)
Rysunki w oparciu o kody z książki R for Data Science.
Wykonywanie map (prostych)
Rozciągnięcie mapy i zapobieganie rozciągnięciu
nz <- map_data("nz")
ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black")
ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
coord_quickmap()
Mapa świata
world <- map_data("world")
ggplot(world, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
coord_quickmap()
Trochę wykresów ze se statystyki
bar <- ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = cut),
show.legend = FALSE,
width = 1
) +
theme(aspect.ratio = 1) +
labs(x = NULL, y = NULL)
Słupkowe
Kołowe
Estetyka wykresów (co dorobić do wykresu, aby był czytelny)
Tytuł
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
labs(title = "Fuel efficiency generally decreases with engine size")
Podtytuł i podpis
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
labs(
title = "Fuel efficiency generally decreases with engine size",
subtitle = "Two seaters (sports cars) are an exception because of their light weight",
caption = "Data from fueleconomy.gov"
)
Osie
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_smooth(se = FALSE) +
labs(
title = "Fuel efficiency generally decreases with engine size",
subtitle = "Two seaters (sports cars) are an exception because of their light weight",
caption = "Data from fueleconomy.gov",
x = "Engine displacement (L)",
y = "Highway fuel economy (mpg)",
colour = "Car type"
)
Jak używać symboli i wzorów np. w nazwach osi
set.seed(2020)
df <- tibble(
x = runif(10),
y = runif(10)
)
ggplot(df, aes(x, y)) +
geom_point() +
labs(
x = quote(sum(x[i] ^ 2, i == 1, n)),
y = quote(alpha + beta + frac(delta, theta))
)
Umieszczanie textu na rysunku
best_in_class <- mpg %>%
group_by(class) %>%
filter(row_number(desc(hwy)) == 1)
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_text(aes(label = model), data = best_in_class)
Lepszy sposób umieszczania textu na rysunku
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_label(aes(label = model), data = best_in_class, nudge_y = 2, alpha = 0.5)
Pakiet ggrepel i jego warstwa geom_label_repel - jeszcze lepszy sposób umieszczania textu na rysunku
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class), position = "jitter") +
geom_point(size = 3, shape = 1, data = best_in_class) +
ggrepel::geom_label_repel(aes(label = model), data = best_in_class)
Text na rysunku zamiast legendy do niego plus funkcja theme
class_avg <- mpg %>%
group_by(class) %>%
summarise(
displ = median(displ),
hwy = median(hwy)
)
ggplot(mpg, aes(displ, hwy, colour = class)) +
ggrepel::geom_label_repel(aes(label = class),
data = class_avg,
size = 6,
label.size = 0,
segment.color = NA
) +
geom_point() +
theme(legend.position = "none")
Umieszczanie dłuższych napisów
label <- mpg %>%
summarise(
displ = max(displ),
hwy = max(hwy),
label = "Increasing engine size is \nrelated to decreasing fuel economy."
)
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_text(aes(label = label), data = label, vjust = "top", hjust = "right")
label <- tibble(
displ = Inf,
hwy = Inf,
label = "Increasing engine size is \nrelated to decreasing fuel economy."
)
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_text(aes(label = label), data = label, vjust = "top", hjust = "right")
Ustawianie parametrów osi
ggplot(mpg, aes(displ, hwy)) +
geom_point()
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
scale_y_continuous(breaks = seq(15, 40, by = 5))
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
scale_x_continuous(labels = NULL) +
scale_y_continuous(labels = NULL)
oraz geometria odcinków geom_segment
presidential %>% datatable
presidential %>%
mutate(id = 33 + row_number()) %>%
ggplot(aes(start, id)) +
geom_point() +
geom_segment(aes(xend = end, yend = id)) +
scale_x_date(NULL, breaks = presidential$start, date_labels = "'%y")
base <- ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class))
Gdzie chcemy legendę?
base + theme(legend.position = "left")
base + theme(legend.position = "top")
base + theme(legend.position = "bottom")
base + theme(legend.position = "right") # the default
Funcja guides do pracy z legendą
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_smooth(se = FALSE) +
theme(legend.position = "bottom") +
guides(colour = guide_legend(nrow = 1, override.aes = list(size = 4)))
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_smooth(se = FALSE) +
theme(legend.position = "bottom") +
guides(colour = guide_legend(nrow = 1))
Geometria geom_bind2
ggplot(diamonds, aes(carat, price)) +
geom_bin2d()
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_bin2d()
wraz ze skalowaniem logarytmicznym osi
ggplot(diamonds, aes(carat, price)) +
geom_bin2d() +
scale_x_log10() +
scale_y_log10()
Zmiana wybieranych kolorów do mapowania zmiennych czynnikowych
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = drv))
Proszę zobaczyć stronę z lepszym obrazowaniem kolorów z pakietu RColorBrewer
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = drv)) +
scale_colour_brewer(palette = "Set2")
presidential %>%
mutate(id = 33 + row_number()) %>%
ggplot(aes(start, id, colour = party)) +
geom_point() +
geom_segment(aes(xend = end, yend = id)) +
scale_colour_manual(values = c(Republican = "red", Democratic = "blue"))
set.seed(2020)
df <- tibble(
x = rnorm(10000),
y = rnorm(10000)
)
Układ współrzędnych (nierówne skale na osich i co z tym zrobić)
ggplot(df, aes(x, y)) +
geom_hex()
ggplot(df, aes(x, y)) +
geom_hex() +
coord_fixed()
Trochę poprawy kolorów wypełnienia z pakietem viridis
ggplot(df, aes(x, y)) +
geom_hex() +
viridis::scale_fill_viridis() +
coord_fixed()
Zoom na rysunek, czyli wyświetlamy fragment rysunku
ggplot(mpg, mapping = aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth() +
coord_cartesian(xlim = c(5, 7), ylim = c(10, 30))
A to robi troszeczkę coś innego
mpg %>%
filter(displ >= 5, displ <= 7, hwy >= 10, hwy <= 30) %>%
ggplot(aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth()
Zoom na dane, czyli ograniczenie wyświetlanych danych (filtrowanie na różne sposoby)
suv <- mpg %>% filter(class == "suv")
compact <- mpg %>% filter(class == "compact")
ggplot(suv, aes(displ, hwy, colour = drv)) +
geom_point()
ggplot(compact, aes(displ, hwy, colour = drv)) +
geom_point()
x_scale <- scale_x_continuous(limits = range(mpg$displ))
y_scale <- scale_y_continuous(limits = range(mpg$hwy))
col_scale <- scale_colour_discrete(limits = unique(mpg$drv))
ggplot(suv, aes(displ, hwy, colour = drv)) +
geom_point() +
x_scale +
y_scale +
col_scale
ggplot(compact, aes(displ, hwy, colour = drv)) +
geom_point() +
x_scale +
y_scale +
col_scale
Różne predefiniowane sposoby wyświetlania rysunków, czyli theme_XX
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
theme_bw()
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_smooth(se = FALSE) +
labs(
title = "Fuel efficiency generally decreases with engine size",
subtitle = "Two seaters (sports cars) are an exception because of their light weight",
caption = "Data from fueleconomy.gov",
x = "Engine displacement (L)",
y = "Highway fuel economy (mpg)",
colour = "Car type"
)
Zapisywanie rysunku do zmiennych globalnych
rysunek <- ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_smooth(se = FALSE) +
labs(
title = "Fuel efficiency generally decreases with engine size",
subtitle = "Two seaters (sports cars) are an exception because of their light weight",
caption = "Data from fueleconomy.gov",
x = "Engine displacement (L)",
y = "Highway fuel economy (mpg)",
colour = "Car type"
)
Jakich blibliotek możemy jeszcze użyć
- Predefiniowane tematy
- Pomoc w modyfikowaniu parametr ów rysunku
- ggThemeAssist i ggThemeAssistGadget
- Dla map
itd.
Praca domowa z dnia 17 marca 2020r. (do wykładu)
Wypisz do 20 biblioteki ze strony projekty R, które rozszerzają działania pakietu ggplot2.
---
title: "Wprowadzenie do  środowiska *R* - grafika z ggplot2"
author:
- Jarosław Kotowicz:
    correspondence: no
    email: j.kotowicz@uwb.edu.pl
    institute: IIUwB
date: "17 marca 2020"
output:
  html_notebook:
    fig_caption: yes
    highlight: haddock
    number_sections: yes
    pandoc_args:
    - --lua-filter=scholarly-metadata.lua
    - --lua-filter=author-info-blocks.lua
    theme: cerulean
    toc: yes
bibliography: InfEko.bib
institute:
- IIUwB: Zakład Bioinformatyki, Instytut Informatyki, Uniwersytet w Białymstoku
csl: big-data-and-information-analytics.csl
always_allow_html: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Książki i inne rzeczy

1. [R for Data Science](https://r4ds.had.co.nz/index.html)
2. [ggplot2: Elegant Graphics for Data Analysis](https://ggplot2-book.org/)
3. [Colors in R](http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf)

# Praca ggplot2

## Czyścimy środowisko
```{r czyszczenie_danych, echo=TRUE}
rm(list=ls())
```

# R for Data Science (rozdział 28)
```{r pakiet_tidyverse}
library(tidyverse)
```

Rysunki w oparciu o kody z książki **R for Data Science**.

## Wykonywanie map (prostych)

```{r message=FALSE, warning=FALSE}
library(maps)
```

### Rozciągnięcie mapy i zapobieganie rozciągnięciu
```{r}
nz <- map_data("nz")

ggplot(nz, aes(long, lat, group = group)) +
  geom_polygon(fill = "white", colour = "black")
```

```{r}
ggplot(nz, aes(long, lat, group = group)) +
  geom_polygon(fill = "white", colour = "black") +
  coord_quickmap()
```

### Mapa świata
```{r}
world <- map_data("world")

ggplot(world, aes(long, lat, group = group)) +
  geom_polygon(fill = "white", colour = "black") +
  coord_quickmap()
```
## Trochę wykresów ze se statystyki

```{r}
bar <- ggplot(data = diamonds) + 
  geom_bar(
    mapping = aes(x = cut, fill = cut), 
    show.legend = FALSE,
    width = 1
  ) + 
  theme(aspect.ratio = 1) +
  labs(x = NULL, y = NULL)
```

### Słupkowe
```{r}
bar
```

```{r}
bar + coord_flip()
```

### Kołowe
```{r}
bar + coord_polar()
```
## Estetyka wykresów (co dorobić do wykresu, aby był czytelny)

### Tytuł
```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth(se = FALSE) +
  labs(title = "Fuel efficiency generally decreases with engine size")
```
### Podtytuł i podpis
```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov"
  )
```

### Osie

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov",
    x = "Engine displacement (L)",
    y = "Highway fuel economy (mpg)",
    colour = "Car type"
  )
```
### Jak używać symboli i wzorów  np. w nazwach osi
```{r}
set.seed(2020)
df <- tibble(
  x = runif(10),
  y = runif(10)
)
```

```{r}
ggplot(df, aes(x, y)) +
  geom_point() +
  labs(
    x = quote(sum(x[i] ^ 2, i == 1, n)),
    y = quote(alpha + beta + frac(delta, theta))
  )
```

### Umieszczanie textu na rysunku

```{r}
best_in_class <- mpg %>%
  group_by(class) %>%
  filter(row_number(desc(hwy)) == 1)

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_text(aes(label = model), data = best_in_class)
```

#### Lepszy sposób umieszczania textu na rysunku

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_label(aes(label = model), data = best_in_class, nudge_y = 2, alpha = 0.5)
```

#### Pakiet **ggrepel** i jego warstwa *geom_label_repel* - jeszcze lepszy sposób umieszczania textu na rysunku

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class), position = "jitter") +
  geom_point(size = 3, shape = 1, data = best_in_class) +
  ggrepel::geom_label_repel(aes(label = model), data = best_in_class)
```

#### Text na rysunku zamiast legendy do niego plus funkcja **theme**
```{r}
class_avg <- mpg %>%
  group_by(class) %>%
  summarise(
    displ = median(displ),
    hwy = median(hwy)
  )

ggplot(mpg, aes(displ, hwy, colour = class)) +
  ggrepel::geom_label_repel(aes(label = class),
                            data = class_avg,
                            size = 6,
                            label.size = 0,
                            segment.color = NA
  ) +
  geom_point() +
  theme(legend.position = "none")
```
#### Umieszczanie dłuższych napisów 
```{r}
label <- mpg %>%
  summarise(
    displ = max(displ),
    hwy = max(hwy),
    label = "Increasing engine size is \nrelated to decreasing fuel economy."
  )

ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  geom_text(aes(label = label), data = label, vjust = "top", hjust = "right")
```

```{r}
label <- tibble(
  displ = Inf,
  hwy = Inf,
  label = "Increasing engine size is \nrelated to decreasing fuel economy."
)

ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  geom_text(aes(label = label), data = label, vjust = "top", hjust = "right")
```
### Ustawianie parametrów osi

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point()
```

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  scale_y_continuous(breaks = seq(15, 40, by = 5))
```

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  scale_x_continuous(labels = NULL) +
  scale_y_continuous(labels = NULL)
```
####  oraz geometria odcinków *geom_segment* 

```{r message=FALSE, warning=FALSE}
library(DT)
```

```{r}
presidential %>% datatable
```

```{r}
presidential %>%
  mutate(id = 33 + row_number()) %>%
  ggplot(aes(start, id)) +
  geom_point() +
  geom_segment(aes(xend = end, yend = id)) +
  scale_x_date(NULL, breaks = presidential$start, date_labels = "'%y")
```

```{r}
base <- ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class))
```

### Gdzie chcemy legendę?

```{r}
base + theme(legend.position = "left")
```

```{r}
base + theme(legend.position = "top")
```

```{r}
base + theme(legend.position = "bottom")
```

```{r}
base + theme(legend.position = "right") # the default
```

#### Funcja *guides* do pracy z legendą
```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  theme(legend.position = "bottom") +
  guides(colour = guide_legend(nrow = 1, override.aes = list(size = 4)))
```

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  theme(legend.position = "bottom") +
  guides(colour = guide_legend(nrow = 1))
```

### Geometria *geom_bind2*
```{r}
ggplot(diamonds, aes(carat, price)) +
  geom_bin2d()
```

```{r}
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_bin2d()
```
#### wraz ze skalowaniem logarytmicznym osi
```{r}
ggplot(diamonds, aes(carat, price)) +
  geom_bin2d() + 
  scale_x_log10() + 
  scale_y_log10()
```
### Zmiana wybieranych kolorów do mapowania zmiennych czynnikowych
```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = drv))
```

```{r}
library(RColorBrewer)
```

```{r}
display.brewer.all()
```

[Proszę zobaczyć stronę z lepszym obrazowaniem kolorów z pakietu RColorBrewer](https://rdrr.io/cran/RColorBrewer/man/ColorBrewer.html "ColorBrewer: ColorBrewer palette")
```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = drv)) +
  scale_colour_brewer(palette = "Set2")
```

```{r}
presidential %>%
  mutate(id = 33 + row_number()) %>%
  ggplot(aes(start, id, colour = party)) +
  geom_point() +
  geom_segment(aes(xend = end, yend = id)) +
  scale_colour_manual(values = c(Republican = "red", Democratic = "blue"))
```

```{r}
set.seed(2020)
df <- tibble(
  x = rnorm(10000),
  y = rnorm(10000)
)
```

### Układ współrzędnych (nierówne skale na osich i co z tym zrobić)

```{r}
ggplot(df, aes(x, y)) +
  geom_hex()
```

```{r}
ggplot(df, aes(x, y)) +
  geom_hex() +
  coord_fixed()
```

#### Trochę poprawy kolorów wypełnienia z pakietem **viridis**
```{r}
ggplot(df, aes(x, y)) +
  geom_hex() +
  viridis::scale_fill_viridis() +
  coord_fixed()
```
### Zoom na rysunek, czyli wyświetlamy fragment rysunku
```{r}
ggplot(mpg, mapping = aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth() +
  coord_cartesian(xlim = c(5, 7), ylim = c(10, 30))
```
##### A to robi troszeczkę coś innego

```{r}
mpg %>%
  filter(displ >= 5, displ <= 7, hwy >= 10, hwy <= 30) %>%
  ggplot(aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth()
```
### Zoom na dane, czyli ograniczenie wyświetlanych danych (filtrowanie na różne sposoby)
```{r}
suv <- mpg %>% filter(class == "suv")
compact <- mpg %>% filter(class == "compact")
```

```{r}
ggplot(suv, aes(displ, hwy, colour = drv)) +
  geom_point()
```

```{r}
ggplot(compact, aes(displ, hwy, colour = drv)) +
  geom_point()
```

```{r}
x_scale <- scale_x_continuous(limits = range(mpg$displ))
y_scale <- scale_y_continuous(limits = range(mpg$hwy))
col_scale <- scale_colour_discrete(limits = unique(mpg$drv))
```

```{r}
ggplot(suv, aes(displ, hwy, colour = drv)) +
  geom_point() +
  x_scale +
  y_scale +
  col_scale
```

```{r}
ggplot(compact, aes(displ, hwy, colour = drv)) +
  geom_point() +
  x_scale +
  y_scale +
  col_scale
```
### Różne predefiniowane sposoby wyświetlania rysunków, czyli **theme_XX**

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth(se = FALSE) +
  theme_bw()
```

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov",
    x = "Engine displacement (L)",
    y = "Highway fuel economy (mpg)",
    colour = "Car type"
  )
```

### Zapisywanie rysunku do zmiennych globalnych
```{r}
rysunek <- ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov",
    x = "Engine displacement (L)",
    y = "Highway fuel economy (mpg)",
    colour = "Car type"
  )
```


# Jakich blibliotek możemy jeszcze użyć

1. Predefiniowane tematy
  - **ggthemes**
  - **hrbrthemes**
2. Pomoc w modyfikowaniu parametr ów rysunku
  - **ggThemeAssist** i *ggThemeAssistGadget*
3. Dla map
  - **ggmaps**

itd.


# Praca domowa z dnia 17 marca 2020r. (do wykładu)
Wypisz do 20 biblioteki ze [strony projekty R](https://cran.r-project.org), które rozszerzają działania pakietu **ggplot2**.
