1 Zakład Bioinformatyki, Instytut Informatyki, Uniwersytet w Białymstoku
✉ Correspondence: Jarosław Kotowicz <j.kotowicz@uwb.edu.pl>
R for Data Science (rozdział 28)
Rysunki w oparciu o kody z książki R for Data Science.
Wykonywanie map (prostych)
Rozciągnięcie mapy i zapobieganie rozciągnięciu
nz <- map_data("nz")
ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black")

ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
coord_quickmap()

Mapa świata
world <- map_data("world")
ggplot(world, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
coord_quickmap()

Trochę wykresów ze se statystyki
bar <- ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = cut),
show.legend = FALSE,
width = 1
) +
theme(aspect.ratio = 1) +
labs(x = NULL, y = NULL)
Słupkowe


Kołowe

Estetyka wykresów (co dorobić do wykresu, aby był czytelny)
Tytuł
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
labs(title = "Fuel efficiency generally decreases with engine size")

Podtytuł i podpis
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
labs(
title = "Fuel efficiency generally decreases with engine size",
subtitle = "Two seaters (sports cars) are an exception because of their light weight",
caption = "Data from fueleconomy.gov"
)

Osie
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_smooth(se = FALSE) +
labs(
title = "Fuel efficiency generally decreases with engine size",
subtitle = "Two seaters (sports cars) are an exception because of their light weight",
caption = "Data from fueleconomy.gov",
x = "Engine displacement (L)",
y = "Highway fuel economy (mpg)",
colour = "Car type"
)

Jak używać symboli i wzorów np. w nazwach osi
set.seed(2020)
df <- tibble(
x = runif(10),
y = runif(10)
)
ggplot(df, aes(x, y)) +
geom_point() +
labs(
x = quote(sum(x[i] ^ 2, i == 1, n)),
y = quote(alpha + beta + frac(delta, theta))
)

Umieszczanie textu na rysunku
best_in_class <- mpg %>%
group_by(class) %>%
filter(row_number(desc(hwy)) == 1)
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_text(aes(label = model), data = best_in_class)

Lepszy sposób umieszczania textu na rysunku
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_label(aes(label = model), data = best_in_class, nudge_y = 2, alpha = 0.5)

Pakiet ggrepel i jego warstwa geom_label_repel - jeszcze lepszy sposób umieszczania textu na rysunku
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class), position = "jitter") +
geom_point(size = 3, shape = 1, data = best_in_class) +
ggrepel::geom_label_repel(aes(label = model), data = best_in_class)

Text na rysunku zamiast legendy do niego plus funkcja theme
class_avg <- mpg %>%
group_by(class) %>%
summarise(
displ = median(displ),
hwy = median(hwy)
)
ggplot(mpg, aes(displ, hwy, colour = class)) +
ggrepel::geom_label_repel(aes(label = class),
data = class_avg,
size = 6,
label.size = 0,
segment.color = NA
) +
geom_point() +
theme(legend.position = "none")

Umieszczanie dłuższych napisów
label <- mpg %>%
summarise(
displ = max(displ),
hwy = max(hwy),
label = "Increasing engine size is \nrelated to decreasing fuel economy."
)
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_text(aes(label = label), data = label, vjust = "top", hjust = "right")

label <- tibble(
displ = Inf,
hwy = Inf,
label = "Increasing engine size is \nrelated to decreasing fuel economy."
)
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_text(aes(label = label), data = label, vjust = "top", hjust = "right")

Ustawianie parametrów osi
ggplot(mpg, aes(displ, hwy)) +
geom_point()

ggplot(mpg, aes(displ, hwy)) +
geom_point() +
scale_y_continuous(breaks = seq(15, 40, by = 5))

ggplot(mpg, aes(displ, hwy)) +
geom_point() +
scale_x_continuous(labels = NULL) +
scale_y_continuous(labels = NULL)

oraz geometria odcinków geom_segment
presidential %>% datatable
presidential %>%
mutate(id = 33 + row_number()) %>%
ggplot(aes(start, id)) +
geom_point() +
geom_segment(aes(xend = end, yend = id)) +
scale_x_date(NULL, breaks = presidential$start, date_labels = "'%y")

base <- ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class))
Gdzie chcemy legendę?
base + theme(legend.position = "left")

base + theme(legend.position = "top")

base + theme(legend.position = "bottom")

base + theme(legend.position = "right") # the default

Funcja guides do pracy z legendą
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_smooth(se = FALSE) +
theme(legend.position = "bottom") +
guides(colour = guide_legend(nrow = 1, override.aes = list(size = 4)))

ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_smooth(se = FALSE) +
theme(legend.position = "bottom") +
guides(colour = guide_legend(nrow = 1))

Geometria geom_bind2
ggplot(diamonds, aes(carat, price)) +
geom_bin2d()

ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_bin2d()

wraz ze skalowaniem logarytmicznym osi
ggplot(diamonds, aes(carat, price)) +
geom_bin2d() +
scale_x_log10() +
scale_y_log10()

Zmiana wybieranych kolorów do mapowania zmiennych czynnikowych
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = drv))


Proszę zobaczyć stronę z lepszym obrazowaniem kolorów z pakietu RColorBrewer
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = drv)) +
scale_colour_brewer(palette = "Set2")

presidential %>%
mutate(id = 33 + row_number()) %>%
ggplot(aes(start, id, colour = party)) +
geom_point() +
geom_segment(aes(xend = end, yend = id)) +
scale_colour_manual(values = c(Republican = "red", Democratic = "blue"))

set.seed(2020)
df <- tibble(
x = rnorm(10000),
y = rnorm(10000)
)
Układ współrzędnych (nierówne skale na osich i co z tym zrobić)
ggplot(df, aes(x, y)) +
geom_hex()

ggplot(df, aes(x, y)) +
geom_hex() +
coord_fixed()

Trochę poprawy kolorów wypełnienia z pakietem viridis
ggplot(df, aes(x, y)) +
geom_hex() +
viridis::scale_fill_viridis() +
coord_fixed()

Zoom na rysunek, czyli wyświetlamy fragment rysunku
ggplot(mpg, mapping = aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth() +
coord_cartesian(xlim = c(5, 7), ylim = c(10, 30))

A to robi troszeczkę coś innego
mpg %>%
filter(displ >= 5, displ <= 7, hwy >= 10, hwy <= 30) %>%
ggplot(aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth()

Zoom na dane, czyli ograniczenie wyświetlanych danych (filtrowanie na różne sposoby)
suv <- mpg %>% filter(class == "suv")
compact <- mpg %>% filter(class == "compact")
ggplot(suv, aes(displ, hwy, colour = drv)) +
geom_point()

ggplot(compact, aes(displ, hwy, colour = drv)) +
geom_point()

x_scale <- scale_x_continuous(limits = range(mpg$displ))
y_scale <- scale_y_continuous(limits = range(mpg$hwy))
col_scale <- scale_colour_discrete(limits = unique(mpg$drv))
ggplot(suv, aes(displ, hwy, colour = drv)) +
geom_point() +
x_scale +
y_scale +
col_scale

ggplot(compact, aes(displ, hwy, colour = drv)) +
geom_point() +
x_scale +
y_scale +
col_scale

Różne predefiniowane sposoby wyświetlania rysunków, czyli theme_XX
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
theme_bw()

ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_smooth(se = FALSE) +
labs(
title = "Fuel efficiency generally decreases with engine size",
subtitle = "Two seaters (sports cars) are an exception because of their light weight",
caption = "Data from fueleconomy.gov",
x = "Engine displacement (L)",
y = "Highway fuel economy (mpg)",
colour = "Car type"
)

Zapisywanie rysunku do zmiennych globalnych
rysunek <- ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = class)) +
geom_smooth(se = FALSE) +
labs(
title = "Fuel efficiency generally decreases with engine size",
subtitle = "Two seaters (sports cars) are an exception because of their light weight",
caption = "Data from fueleconomy.gov",
x = "Engine displacement (L)",
y = "Highway fuel economy (mpg)",
colour = "Car type"
)
Jakich blibliotek możemy jeszcze użyć
- Predefiniowane tematy
- Pomoc w modyfikowaniu parametr ów rysunku
- ggThemeAssist i ggThemeAssistGadget
- Dla map
itd.
Praca domowa z dnia 17 marca 2020r. (do wykładu)
Wypisz do 20 biblioteki ze strony projekty R, które rozszerzają działania pakietu ggplot2.
