1 Zakład Bioinformatyki, Instytut Informatyki, Uniwersytet w Białymstoku

Correspondence: Jarosław Kotowicz <>

1 Książki i inne rzeczy

  1. R for Data Science
  2. ggplot2: Elegant Graphics for Data Analysis
  3. Colors in R

2 Praca ggplot2

2.1 Czyścimy środowisko

rm(list=ls())

3 R for Data Science (rozdział 28)

library(tidyverse)

Rysunki w oparciu o kody z książki R for Data Science.

3.1 Wykonywanie map (prostych)

library(maps)

3.1.1 Rozciągnięcie mapy i zapobieganie rozciągnięciu

nz <- map_data("nz")
ggplot(nz, aes(long, lat, group = group)) +
  geom_polygon(fill = "white", colour = "black")

ggplot(nz, aes(long, lat, group = group)) +
  geom_polygon(fill = "white", colour = "black") +
  coord_quickmap()

3.1.2 Mapa świata

world <- map_data("world")
ggplot(world, aes(long, lat, group = group)) +
  geom_polygon(fill = "white", colour = "black") +
  coord_quickmap()

3.2 Trochę wykresów ze se statystyki

bar <- ggplot(data = diamonds) + 
  geom_bar(
    mapping = aes(x = cut, fill = cut), 
    show.legend = FALSE,
    width = 1
  ) + 
  theme(aspect.ratio = 1) +
  labs(x = NULL, y = NULL)

3.2.1 Słupkowe

bar

bar + coord_flip()

3.2.2 Kołowe

bar + coord_polar()

3.3 Estetyka wykresów (co dorobić do wykresu, aby był czytelny)

3.3.1 Tytuł

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth(se = FALSE) +
  labs(title = "Fuel efficiency generally decreases with engine size")

3.3.2 Podtytuł i podpis

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov"
  )

3.3.3 Osie

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov",
    x = "Engine displacement (L)",
    y = "Highway fuel economy (mpg)",
    colour = "Car type"
  )

3.3.4 Jak używać symboli i wzorów np. w nazwach osi

set.seed(2020)
df <- tibble(
  x = runif(10),
  y = runif(10)
)
ggplot(df, aes(x, y)) +
  geom_point() +
  labs(
    x = quote(sum(x[i] ^ 2, i == 1, n)),
    y = quote(alpha + beta + frac(delta, theta))
  )

3.3.5 Umieszczanie textu na rysunku

best_in_class <- mpg %>%
  group_by(class) %>%
  filter(row_number(desc(hwy)) == 1)
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_text(aes(label = model), data = best_in_class)

3.3.5.1 Lepszy sposób umieszczania textu na rysunku

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_label(aes(label = model), data = best_in_class, nudge_y = 2, alpha = 0.5)

3.3.5.2 Pakiet ggrepel i jego warstwa geom_label_repel - jeszcze lepszy sposób umieszczania textu na rysunku

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class), position = "jitter") +
  geom_point(size = 3, shape = 1, data = best_in_class) +
  ggrepel::geom_label_repel(aes(label = model), data = best_in_class)

3.3.5.3 Text na rysunku zamiast legendy do niego plus funkcja theme

class_avg <- mpg %>%
  group_by(class) %>%
  summarise(
    displ = median(displ),
    hwy = median(hwy)
  )
ggplot(mpg, aes(displ, hwy, colour = class)) +
  ggrepel::geom_label_repel(aes(label = class),
                            data = class_avg,
                            size = 6,
                            label.size = 0,
                            segment.color = NA
  ) +
  geom_point() +
  theme(legend.position = "none")

3.3.5.4 Umieszczanie dłuższych napisów

label <- mpg %>%
  summarise(
    displ = max(displ),
    hwy = max(hwy),
    label = "Increasing engine size is \nrelated to decreasing fuel economy."
  )
ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  geom_text(aes(label = label), data = label, vjust = "top", hjust = "right")

label <- tibble(
  displ = Inf,
  hwy = Inf,
  label = "Increasing engine size is \nrelated to decreasing fuel economy."
)
ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  geom_text(aes(label = label), data = label, vjust = "top", hjust = "right")

3.3.6 Ustawianie parametrów osi

ggplot(mpg, aes(displ, hwy)) +
  geom_point()

ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  scale_y_continuous(breaks = seq(15, 40, by = 5))

ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  scale_x_continuous(labels = NULL) +
  scale_y_continuous(labels = NULL)

3.3.6.1 oraz geometria odcinków geom_segment

library(DT)
presidential %>% datatable
presidential %>%
  mutate(id = 33 + row_number()) %>%
  ggplot(aes(start, id)) +
  geom_point() +
  geom_segment(aes(xend = end, yend = id)) +
  scale_x_date(NULL, breaks = presidential$start, date_labels = "'%y")

base <- ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class))

3.3.7 Gdzie chcemy legendę?

base + theme(legend.position = "left")

base + theme(legend.position = "top")

base + theme(legend.position = "bottom")

base + theme(legend.position = "right") # the default

3.3.7.1 Funcja guides do pracy z legendą

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  theme(legend.position = "bottom") +
  guides(colour = guide_legend(nrow = 1, override.aes = list(size = 4)))

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  theme(legend.position = "bottom") +
  guides(colour = guide_legend(nrow = 1))

3.3.8 Geometria geom_bind2

ggplot(diamonds, aes(carat, price)) +
  geom_bin2d()

ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_bin2d()

3.3.8.1 wraz ze skalowaniem logarytmicznym osi

ggplot(diamonds, aes(carat, price)) +
  geom_bin2d() + 
  scale_x_log10() + 
  scale_y_log10()

3.3.9 Zmiana wybieranych kolorów do mapowania zmiennych czynnikowych

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = drv))

library(RColorBrewer)
display.brewer.all()

Proszę zobaczyć stronę z lepszym obrazowaniem kolorów z pakietu RColorBrewer

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = drv)) +
  scale_colour_brewer(palette = "Set2")

presidential %>%
  mutate(id = 33 + row_number()) %>%
  ggplot(aes(start, id, colour = party)) +
  geom_point() +
  geom_segment(aes(xend = end, yend = id)) +
  scale_colour_manual(values = c(Republican = "red", Democratic = "blue"))

set.seed(2020)
df <- tibble(
  x = rnorm(10000),
  y = rnorm(10000)
)

3.3.10 Układ współrzędnych (nierówne skale na osich i co z tym zrobić)

ggplot(df, aes(x, y)) +
  geom_hex()

ggplot(df, aes(x, y)) +
  geom_hex() +
  coord_fixed()

3.3.10.1 Trochę poprawy kolorów wypełnienia z pakietem viridis

ggplot(df, aes(x, y)) +
  geom_hex() +
  viridis::scale_fill_viridis() +
  coord_fixed()

3.3.11 Zoom na rysunek, czyli wyświetlamy fragment rysunku

ggplot(mpg, mapping = aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth() +
  coord_cartesian(xlim = c(5, 7), ylim = c(10, 30))

3.3.11.0.1 A to robi troszeczkę coś innego
mpg %>%
  filter(displ >= 5, displ <= 7, hwy >= 10, hwy <= 30) %>%
  ggplot(aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth()

3.3.12 Zoom na dane, czyli ograniczenie wyświetlanych danych (filtrowanie na różne sposoby)

suv <- mpg %>% filter(class == "suv")
compact <- mpg %>% filter(class == "compact")
ggplot(suv, aes(displ, hwy, colour = drv)) +
  geom_point()

ggplot(compact, aes(displ, hwy, colour = drv)) +
  geom_point()

x_scale <- scale_x_continuous(limits = range(mpg$displ))
y_scale <- scale_y_continuous(limits = range(mpg$hwy))
col_scale <- scale_colour_discrete(limits = unique(mpg$drv))
ggplot(suv, aes(displ, hwy, colour = drv)) +
  geom_point() +
  x_scale +
  y_scale +
  col_scale

ggplot(compact, aes(displ, hwy, colour = drv)) +
  geom_point() +
  x_scale +
  y_scale +
  col_scale

3.3.13 Różne predefiniowane sposoby wyświetlania rysunków, czyli theme_XX

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth(se = FALSE) +
  theme_bw()

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov",
    x = "Engine displacement (L)",
    y = "Highway fuel economy (mpg)",
    colour = "Car type"
  )

3.3.14 Zapisywanie rysunku do zmiennych globalnych

rysunek <- ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov",
    x = "Engine displacement (L)",
    y = "Highway fuel economy (mpg)",
    colour = "Car type"
  )

4 Jakich blibliotek możemy jeszcze użyć

  1. Predefiniowane tematy
  1. Pomoc w modyfikowaniu parametr ów rysunku
  1. Dla map

itd.

5 Praca domowa z dnia 17 marca 2020r. (do wykładu)

Wypisz do 20 biblioteki ze strony projekty R, które rozszerzają działania pakietu ggplot2.

---
title: "Wprowadzenie do  środowiska *R* - grafika z ggplot2"
author:
- Jarosław Kotowicz:
    correspondence: no
    email: j.kotowicz@uwb.edu.pl
    institute: IIUwB
date: "17 marca 2020"
output:
  html_notebook:
    fig_caption: yes
    highlight: haddock
    number_sections: yes
    pandoc_args:
    - --lua-filter=scholarly-metadata.lua
    - --lua-filter=author-info-blocks.lua
    theme: cerulean
    toc: yes
bibliography: InfEko.bib
institute:
- IIUwB: Zakład Bioinformatyki, Instytut Informatyki, Uniwersytet w Białymstoku
csl: big-data-and-information-analytics.csl
always_allow_html: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Książki i inne rzeczy

1. [R for Data Science](https://r4ds.had.co.nz/index.html)
2. [ggplot2: Elegant Graphics for Data Analysis](https://ggplot2-book.org/)
3. [Colors in R](http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf)

# Praca ggplot2

## Czyścimy środowisko
```{r czyszczenie_danych, echo=TRUE}
rm(list=ls())
```

# R for Data Science (rozdział 28)
```{r pakiet_tidyverse}
library(tidyverse)
```

Rysunki w oparciu o kody z książki **R for Data Science**.

## Wykonywanie map (prostych)

```{r message=FALSE, warning=FALSE}
library(maps)
```

### Rozciągnięcie mapy i zapobieganie rozciągnięciu
```{r}
nz <- map_data("nz")

ggplot(nz, aes(long, lat, group = group)) +
  geom_polygon(fill = "white", colour = "black")
```

```{r}
ggplot(nz, aes(long, lat, group = group)) +
  geom_polygon(fill = "white", colour = "black") +
  coord_quickmap()
```

### Mapa świata
```{r}
world <- map_data("world")

ggplot(world, aes(long, lat, group = group)) +
  geom_polygon(fill = "white", colour = "black") +
  coord_quickmap()
```
## Trochę wykresów ze se statystyki

```{r}
bar <- ggplot(data = diamonds) + 
  geom_bar(
    mapping = aes(x = cut, fill = cut), 
    show.legend = FALSE,
    width = 1
  ) + 
  theme(aspect.ratio = 1) +
  labs(x = NULL, y = NULL)
```

### Słupkowe
```{r}
bar
```

```{r}
bar + coord_flip()
```

### Kołowe
```{r}
bar + coord_polar()
```
## Estetyka wykresów (co dorobić do wykresu, aby był czytelny)

### Tytuł
```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth(se = FALSE) +
  labs(title = "Fuel efficiency generally decreases with engine size")
```
### Podtytuł i podpis
```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov"
  )
```

### Osie

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov",
    x = "Engine displacement (L)",
    y = "Highway fuel economy (mpg)",
    colour = "Car type"
  )
```
### Jak używać symboli i wzorów  np. w nazwach osi
```{r}
set.seed(2020)
df <- tibble(
  x = runif(10),
  y = runif(10)
)
```

```{r}
ggplot(df, aes(x, y)) +
  geom_point() +
  labs(
    x = quote(sum(x[i] ^ 2, i == 1, n)),
    y = quote(alpha + beta + frac(delta, theta))
  )
```

### Umieszczanie textu na rysunku

```{r}
best_in_class <- mpg %>%
  group_by(class) %>%
  filter(row_number(desc(hwy)) == 1)

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_text(aes(label = model), data = best_in_class)
```

#### Lepszy sposób umieszczania textu na rysunku

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_label(aes(label = model), data = best_in_class, nudge_y = 2, alpha = 0.5)
```

#### Pakiet **ggrepel** i jego warstwa *geom_label_repel* - jeszcze lepszy sposób umieszczania textu na rysunku

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class), position = "jitter") +
  geom_point(size = 3, shape = 1, data = best_in_class) +
  ggrepel::geom_label_repel(aes(label = model), data = best_in_class)
```

#### Text na rysunku zamiast legendy do niego plus funkcja **theme**
```{r}
class_avg <- mpg %>%
  group_by(class) %>%
  summarise(
    displ = median(displ),
    hwy = median(hwy)
  )

ggplot(mpg, aes(displ, hwy, colour = class)) +
  ggrepel::geom_label_repel(aes(label = class),
                            data = class_avg,
                            size = 6,
                            label.size = 0,
                            segment.color = NA
  ) +
  geom_point() +
  theme(legend.position = "none")
```
#### Umieszczanie dłuższych napisów 
```{r}
label <- mpg %>%
  summarise(
    displ = max(displ),
    hwy = max(hwy),
    label = "Increasing engine size is \nrelated to decreasing fuel economy."
  )

ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  geom_text(aes(label = label), data = label, vjust = "top", hjust = "right")
```

```{r}
label <- tibble(
  displ = Inf,
  hwy = Inf,
  label = "Increasing engine size is \nrelated to decreasing fuel economy."
)

ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  geom_text(aes(label = label), data = label, vjust = "top", hjust = "right")
```
### Ustawianie parametrów osi

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point()
```

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  scale_y_continuous(breaks = seq(15, 40, by = 5))
```

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  scale_x_continuous(labels = NULL) +
  scale_y_continuous(labels = NULL)
```
####  oraz geometria odcinków *geom_segment* 

```{r message=FALSE, warning=FALSE}
library(DT)
```

```{r}
presidential %>% datatable
```

```{r}
presidential %>%
  mutate(id = 33 + row_number()) %>%
  ggplot(aes(start, id)) +
  geom_point() +
  geom_segment(aes(xend = end, yend = id)) +
  scale_x_date(NULL, breaks = presidential$start, date_labels = "'%y")
```

```{r}
base <- ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class))
```

### Gdzie chcemy legendę?

```{r}
base + theme(legend.position = "left")
```

```{r}
base + theme(legend.position = "top")
```

```{r}
base + theme(legend.position = "bottom")
```

```{r}
base + theme(legend.position = "right") # the default
```

#### Funcja *guides* do pracy z legendą
```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  theme(legend.position = "bottom") +
  guides(colour = guide_legend(nrow = 1, override.aes = list(size = 4)))
```

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  theme(legend.position = "bottom") +
  guides(colour = guide_legend(nrow = 1))
```

### Geometria *geom_bind2*
```{r}
ggplot(diamonds, aes(carat, price)) +
  geom_bin2d()
```

```{r}
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_bin2d()
```
#### wraz ze skalowaniem logarytmicznym osi
```{r}
ggplot(diamonds, aes(carat, price)) +
  geom_bin2d() + 
  scale_x_log10() + 
  scale_y_log10()
```
### Zmiana wybieranych kolorów do mapowania zmiennych czynnikowych
```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = drv))
```

```{r}
library(RColorBrewer)
```

```{r}
display.brewer.all()
```

[Proszę zobaczyć stronę z lepszym obrazowaniem kolorów z pakietu RColorBrewer](https://rdrr.io/cran/RColorBrewer/man/ColorBrewer.html "ColorBrewer: ColorBrewer palette")
```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = drv)) +
  scale_colour_brewer(palette = "Set2")
```

```{r}
presidential %>%
  mutate(id = 33 + row_number()) %>%
  ggplot(aes(start, id, colour = party)) +
  geom_point() +
  geom_segment(aes(xend = end, yend = id)) +
  scale_colour_manual(values = c(Republican = "red", Democratic = "blue"))
```

```{r}
set.seed(2020)
df <- tibble(
  x = rnorm(10000),
  y = rnorm(10000)
)
```

### Układ współrzędnych (nierówne skale na osich i co z tym zrobić)

```{r}
ggplot(df, aes(x, y)) +
  geom_hex()
```

```{r}
ggplot(df, aes(x, y)) +
  geom_hex() +
  coord_fixed()
```

#### Trochę poprawy kolorów wypełnienia z pakietem **viridis**
```{r}
ggplot(df, aes(x, y)) +
  geom_hex() +
  viridis::scale_fill_viridis() +
  coord_fixed()
```
### Zoom na rysunek, czyli wyświetlamy fragment rysunku
```{r}
ggplot(mpg, mapping = aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth() +
  coord_cartesian(xlim = c(5, 7), ylim = c(10, 30))
```
##### A to robi troszeczkę coś innego

```{r}
mpg %>%
  filter(displ >= 5, displ <= 7, hwy >= 10, hwy <= 30) %>%
  ggplot(aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth()
```
### Zoom na dane, czyli ograniczenie wyświetlanych danych (filtrowanie na różne sposoby)
```{r}
suv <- mpg %>% filter(class == "suv")
compact <- mpg %>% filter(class == "compact")
```

```{r}
ggplot(suv, aes(displ, hwy, colour = drv)) +
  geom_point()
```

```{r}
ggplot(compact, aes(displ, hwy, colour = drv)) +
  geom_point()
```

```{r}
x_scale <- scale_x_continuous(limits = range(mpg$displ))
y_scale <- scale_y_continuous(limits = range(mpg$hwy))
col_scale <- scale_colour_discrete(limits = unique(mpg$drv))
```

```{r}
ggplot(suv, aes(displ, hwy, colour = drv)) +
  geom_point() +
  x_scale +
  y_scale +
  col_scale
```

```{r}
ggplot(compact, aes(displ, hwy, colour = drv)) +
  geom_point() +
  x_scale +
  y_scale +
  col_scale
```
### Różne predefiniowane sposoby wyświetlania rysunków, czyli **theme_XX**

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth(se = FALSE) +
  theme_bw()
```

```{r}
ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov",
    x = "Engine displacement (L)",
    y = "Highway fuel economy (mpg)",
    colour = "Car type"
  )
```

### Zapisywanie rysunku do zmiennych globalnych
```{r}
rysunek <- ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(colour = class)) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Fuel efficiency generally decreases with engine size",
    subtitle = "Two seaters (sports cars) are an exception because of their light weight",
    caption = "Data from fueleconomy.gov",
    x = "Engine displacement (L)",
    y = "Highway fuel economy (mpg)",
    colour = "Car type"
  )
```


# Jakich blibliotek możemy jeszcze użyć

1. Predefiniowane tematy
  - **ggthemes**
  - **hrbrthemes**
2. Pomoc w modyfikowaniu parametr ów rysunku
  - **ggThemeAssist** i *ggThemeAssistGadget*
3. Dla map
  - **ggmaps**

itd.


# Praca domowa z dnia 17 marca 2020r. (do wykładu)
Wypisz do 20 biblioteki ze [strony projekty R](https://cran.r-project.org), które rozszerzają działania pakietu **ggplot2**.
