1 Zakład Bioinformatyki, Instytut Informatyki, Uniwersytet w Białymstoku

Correspondence: Jarosław Kotowicz <>

1 Bazy danych ekonomicznych i korzystanie z nich w środowisku R

  1. Unia Europejska
  1. OECD
  2. World Bank
  3. NArodowy Bank Polski - API
  4. Serwisy internetowe (dane finansowe)
  5. Inne bazy danych

2 Wybrane biblioteki w środowisku R

Wybrałem kilka bibiolotek dostępnych w środowisku R

load("Packages.RData")

Chwila na obejrzenie

packages %>% datatable()

2.1 Komisja Europejska

download.file(url = "http://ec.europa.eu/economy_finance/db_indicators/ameco/documents/ameco0.zip",
              destfile = "ameco0.zip" )
unzip("ameco0.zip")

Przeanalizujemy jedną z tabel z AMECO.

load("AMECO1.RData")
AMECO1.pl <- AMECO1 %>% filter(COUNTRY == "Poland") %>% select(-68)
AMECO1.pl
AMECO1.pl.pop <- AMECO1.pl %>% 
  slice(2:5) %>% 
  pivot_longer(
    -c(CODE, COUNTRY, `SUB-CHAPTER`, TITLE, UNIT),
    names_to = "Year",
    values_to = "Population"
  ) %>%
  select(-c(1:3, 5))
AMECO1.pl.pop %>% datatable()

Ilustracja graficzna

AMECO1.pl.pop %>% ggplot(aes(x = Year, y = Population)) +
  geom_point(aes(color = TITLE)) +
  geom_line(aes(x = Year, y = Population, group = TITLE, color = TITLE), stat="identity") +
  scale_x_discrete(breaks = seq(1960, 2022, 5)) +
  scale_y_continuous(breaks = seq(0, 40000, 5000), labels = c(0,paste(seq(5, 40, 5), "mln."))) +
  labs(title = "Populacja Polski w latach 1960 - 2020",
       colour = "Grupy wiekowe",
       x = "Rok",
       y = "Populacja Polski",
       caption = "Opracowanie własne na podstawie danych KE.") +
  theme(plot.title = element_text(hjust=0.5))

2.2 Eurostat

library(eurostat)

Podczytanie struktur magazynu danych

tabela.danych <- get_eurostat_toc()
tabela.danych %>% head(12)

Przeszukiwanie magazynu danych w poszukiwaniu danych związanych ze słowem kluczowym

wynik.education <- search_eurostat("education")
wynik.education %>% head(8)
wynik.population <- search_eurostat("Population")
wynik.population %>% head(8)

Importowanie konkretnego zbioru danych z magazynu danych Eurosatu

wynik.population[6,]
wynik.population.06 <- get_eurostat(wynik.population[6,2])
trying URL 'https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=data%2Fdemo_r_pjanind2.tsv.gz'
Content type 'application/octet-stream;charset=UTF-8' length 604613 bytes (590 KB)
downloaded 590 KB

Table demo_r_pjanind2 cached at C:\Users\user\AppData\Local\Temp\RtmpsnjNfc/eurostat/demo_r_pjanind2_date_code_TF.rds
wynik.population.06 %>% 
  filter(str_detect(geo, "PL"))

2.3 OECD

library(OECD)
# -------->> Get a data frame with information on all available datasets.
oecd_datasets <- get_datasets()
oecd_datasets %>% dim
[1] 1366    2
# -------->> Search codes and descriptions of available OECD series
oecd_employment <- search_dataset("employment", oecd_datasets)
oecd_employment %>% datatable()
# -------->> Get the data structure of a dataset.
oecd_employment_DUR_D <- get_data_structure("DUR_D")
oecd_employment_DUR_D %>% class
[1] "list"
oecd_employment_DUR_D %>% length()
[1] 12
oecd_employment_DUR_D %>% names
 [1] "VAR_DESC"        "COUNTRY"         "TIME"            "SEX"             "AGE"             "DURATION"       
 [7] "FREQUENCY"       "OBS_STATUS"      "UNIT"            "POWERCODE"       "REFERENCEPERIOD" "TIME_FORMAT"    
oecd_employment_DUR_D$POWERCODE
oecd_employment_DUR_D$DURATION
oecd_employment_DUR_D$AGE
oecd_employment_DUR_D$COUNTRY
oecd_employment_DUR_D$UNIT %>% datatable()

Można odnaleźć na stronie OECD metadane zbioru danych.

# -------->> Download OECD data sets
oecd_employment_DUR_D_df <- get_dataset("DUR_D",
                                        filter = list(c("POL"), c("MEN")),
                                        start_time = 2000, end_time = 2018)
oecd_employment_DUR_D_df_f <- oecd_employment_DUR_D_df %>%
  filter(DURATION == "UN", AGE != "1524", AGE !="6599", AGE != "900000")
oecd_employment_DUR_D_df_f %>% head(12)

Wykres

oecd_employment_DUR_D_df_f %>% 
  ggplot(aes(x = obsTime, y = obsValue)) +
  geom_point(aes(color = AGE)) +
  geom_line(aes(x = obsTime, y = obsValue, group = AGE, color = AGE), stat="identity") +
  scale_x_discrete(breaks = seq(2000, 2018, 3)) +
 scale_y_continuous(breaks = seq(0, 1250, 250), labels = c(0,paste(seq(250, 1250, 250), "tyś."))) +
  labs(title = "Bezrobotni meżczyźni w Polsce wg grup wiekowych",
       colour = "Grupy wiekowe",
       x = "Rok",
       y = "Liczba bezrobotnych",
       caption = "Opracowanie własne na podstawie danych OECD.") +
  theme(plot.title = element_text(hjust=0.5))

2.4 Dane finansowe (biblioteki quantmod i tidyquant)

load("WebUrls.RData")
web_urls %>% datatable

Strona quantmod

library(quantmod)
library(tidyquant)
== Need to Learn tidyquant? ====================================================
Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
</> Learn more at: https://university.business-science.io/p/learning-labs-pro </>

2.4.1 Skąd można pobrać dane

  1. Strona Yahoo Finance
  2. Strona Google Finance
  3. St. Louis Federal Reserve Bank’s FRED system
  4. Currency and Metals Data from Oanda.com

Użycie

getSymbols("USD/EUR",src="oanda")
[1] "USD/EUR"
USDEUR %>% class
[1] "xts" "zoo"
USDEUR %>% dim
[1] 179   1
USDEUR %>% colnames()
[1] "USD.EUR"

2.4.2 Microsoft

getSymbols('MSFT')
[1] "MSFT"
MSFT %>% class
[1] "xts" "zoo"
MSFT %>% dim
[1] 3333    6
MSFT %>% colnames()
[1] "MSFT.Open"     "MSFT.High"     "MSFT.Low"      "MSFT.Close"    "MSFT.Volume"   "MSFT.Adjusted"
barChart(MSFT)

candleChart(MSFT, multi.col = TRUE, theme="white")

MSFT.20 <- MSFT[3313:3333,]
barChart(MSFT.20)

candleChart(MSFT.20, multi.col = TRUE, theme="white")

library(TTR)
MSFT.my <- tq_get("MSFT", from = "2017-01-01", to = "2019-12-31")
# Bar Chart
MSFT.my %>%
    ggplot(aes(x = date, y = close)) +
    geom_barchart(aes(open = open, high = high, low = low, close = close)) +
    geom_ma(color = "darkgreen") +
    coord_x_date(xlim = c("2019-10-01", "2019-12-31"),
                 ylim = c(130, 165)) +
    labs(title = "Wykres słupkowy kursu akcji Microsoft od 01.10 do 31.12 w roku 2019",
       x = "Data",
       y = "Cena zamknięcia",
       caption = "Opracowanie własne na podstawie danych Yahoo Finance.") +
  theme(plot.title = element_text(hjust=0.5))

MSFT.my %>%
    ggplot(aes(x = date, y = close)) +
    geom_candlestick(aes(open = open, high = high, low = low, close = close)) +
    geom_ma(color = "darkgreen") +
    coord_x_date(xlim = c("2019-10-01", "2019-12-31"),
                 ylim = c(130, 165)) +
  labs(title = "Wykres świecowy kursu akcji Microsoft od 01.10 do 31.12 w roku 2019",
       x = "Data",
       y = "Cena zamknięcia",
       caption = "Opracowanie własne na podstawie danych Yahoo Finance.") +
  theme(plot.title = element_text(hjust=0.5))

2.5 Narodowy Bank Polski i biblioteka rnbp

library(rnbp)

2.5.1 Kursy walut

waluta_EUR_A <- get_current_exchangerate("A", "EUR")
waluta_EUR_A %>% class
[1] "nbp_api_response"
waluta_EUR_A$content
$table
[1] "A"

$currency
[1] "euro"

$code
[1] "EUR"

$rates
NA
waluta_EUR_A <- get_exchangerate_from("A", "EUR", Sys.Date() - 180)
waluta_EUR_A$content
$table
[1] "A"

$currency
[1] "euro"

$code
[1] "EUR"

$rates
NA
waluta_EUR_A <- get_exchangerate_tables_from_interval(table = "A", from = Sys.Date() - 93, to = Sys.Date())
waluta_EUR_A$content
waluta_EUR_A <- get_exchangerate_from_interval(table = "A", currency_code = "EUR", 
                                                      from = Sys.Date() - 93, to = Sys.Date())
waluta_EUR_A$content
$table
[1] "A"

$currency
[1] "euro"

$code
[1] "EUR"

$rates
NA
waluta_EUR_A <- get_last_n_exchangerates("A", "EUR", 93)
waluta_EUR_A$content[[4]] %>% datatable

2.5.2 Ceny złota w NBP

2.5.2.1 Z ostatnich n dni

gold_93 <- get_last_n_goldprices(93)
gold_93$content
gold_93 <- get_goldprice_from_interval(Sys.Date() - 93, Sys.Date())
gold_93$content
---
title: "Informatyka ekonomiczna (kierunek informatyka i ekonometria) - wykład 4"
author:
- Jarosław Kotowicz:
    correspondence: no
    email: j.kotowicz@uwb.edu.pl
    institute: IIUwB
date: "31 marca 2020"
output:
  html_notebook:
    fig_caption: yes
    highlight: haddock
    number_sections: yes
    pandoc_args:
    - --lua-filter=scholarly-metadata.lua
    - --lua-filter=author-info-blocks.lua
    theme: cerulean
    toc: yes
institute:
- IIUwB: Zakład Bioinformatyki, Instytut Informatyki, Uniwersytet w Białymstoku
csl: big-data-and-information-analytics.csl
always_allow_html: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Bazy danych ekonomicznych i korzystanie z nich w środowisku R

1. Unia Europejska 
  - [Komisja Europejska](https://ec.europa.eu/info/business-economy-euro/indicators-statistics/economic-databases_en)
    - [Macro-economic database AMECO](https://ec.europa.eu/info/business-economy-euro/indicators-statistics/economic-databases/macro-economic-database-ameco_en)
    - [Business and consumer surveys](https://ec.europa.eu/info/business-economy-euro/indicators-statistics/economic-databases/business-and-consumer-surveys_en)
    - [Price and Cost Competitiveness](https://ec.europa.eu/info/business-economy-euro/indicators-statistics/economic-databases/price-and-cost-competitiveness_en)
    - [KLEMS data](https://euklems.eu/)
  - [EBC](http://sdw.ecb.europa.eu/browse.do?node=9689727)
  - [Eurostat](https://ec.europa.eu/eurostat/data/database)
2. [OECD](https://data.oecd.org/)
3. [World Bank](https://data.worldbank.org/)
4. [NArodowy Bank Polski - API](http://api.nbp.pl/#kursyWalut)
5. Serwisy internetowe (dane finansowe)
6. Inne bazy danych
    
# Wybrane biblioteki w środowisku **R**

```{r message=TRUE, warning=TRUE, include=FALSE}
rm(list = ls())
library(tidyverse)
library(DT)
```

Wybrałem kilka bibiolotek dostępnych w środowisku **R**
```{r}
load("Packages.RData")
```

Chwila na obejrzenie
```{r}
packages %>% datatable()
```
## Komisja Europejska

```{r eval=FALSE, purl = TRUE}
download.file(url = "http://ec.europa.eu/economy_finance/db_indicators/ameco/documents/ameco0.zip",
              destfile = "ameco0.zip" )
unzip("ameco0.zip")
```

Przeanalizujemy jedną z tabel z *AMECO*.
```{r}
load("AMECO1.RData")
```

```{r}
AMECO1.pl <- AMECO1 %>% filter(COUNTRY == "Poland") %>% select(-68)
AMECO1.pl
```

```{r}
AMECO1.pl.pop <- AMECO1.pl %>% 
  slice(2:5) %>% 
  pivot_longer(
    -c(CODE, COUNTRY, `SUB-CHAPTER`, TITLE, UNIT),
    names_to = "Year",
    values_to = "Population"
  ) %>%
  select(-c(1:3, 5))
AMECO1.pl.pop %>% datatable()
```

Ilustracja graficzna
```{r fig.height=5, fig.width=10}
AMECO1.pl.pop %>% ggplot(aes(x = Year, y = Population)) +
  geom_point(aes(color = TITLE)) +
  geom_line(aes(x = Year, y = Population, group = TITLE, color = TITLE), stat="identity") +
  scale_x_discrete(breaks = seq(1960, 2022, 5)) +
  scale_y_continuous(breaks = seq(0, 40000, 5000), labels = c(0,paste(seq(5, 40, 5), "mln."))) +
  labs(title = "Populacja Polski w latach 1960 - 2020",
       colour = "Grupy wiekowe",
       x = "Rok",
       y = "Populacja Polski",
       caption = "Opracowanie własne na podstawie danych KE.") +
  theme(plot.title = element_text(hjust=0.5))
```

## Eurostat

```{r}
library(eurostat)
```

Podczytanie struktur magazynu danych
```{r}
tabela.danych <- get_eurostat_toc()
tabela.danych %>% head(12)
```

Przeszukiwanie magazynu danych w poszukiwaniu danych związanych ze słowem kluczowym
```{r}
wynik.education <- search_eurostat("education")
wynik.education %>% head(8)
```

```{r}
wynik.population <- search_eurostat("Population")
wynik.population %>% head(8)
```

Importowanie konkretnego zbioru danych z magazynu danych Eurosatu
```{r}
wynik.population[6,]
wynik.population.06 <- get_eurostat(wynik.population[6,2])
```

```{r}
wynik.population.06 %>% 
  filter(str_detect(geo, "PL"))
```

```{r message=FALSE, warning=FALSE, include=FALSE}
detach(package:eurostat)
```


## OECD

```{r}
library(OECD)
```

```{r}
# -------->> Get a data frame with information on all available datasets.
oecd_datasets <- get_datasets()
```

```{r}
oecd_datasets %>% dim
```

```{r}
# -------->> Search codes and descriptions of available OECD series
oecd_employment <- search_dataset("employment", oecd_datasets)
```

```{r}
oecd_employment %>% datatable()
```

```{r}
# -------->> Get the data structure of a dataset.
oecd_employment_DUR_D <- get_data_structure("DUR_D")
```

```{r}
oecd_employment_DUR_D %>% class
oecd_employment_DUR_D %>% length()
oecd_employment_DUR_D %>% names

```

```{r}
oecd_employment_DUR_D$POWERCODE
```

```{r}
oecd_employment_DUR_D$DURATION
```

```{r}
oecd_employment_DUR_D$AGE
```

```{r}
oecd_employment_DUR_D$COUNTRY
```

```{r}
oecd_employment_DUR_D$UNIT %>% datatable()
```
Można odnaleźć na stronie OECD *metadane* zbioru danych.
```{r eval=FALSE, include=FALSE, purl=TRUE}
# -------->>  Browse the metadata related to a series.
browse_metadata("DUR_D")
```

```{r}
# -------->> Download OECD data sets
oecd_employment_DUR_D_df <- get_dataset("DUR_D",
                                        filter = list(c("POL"), c("MEN")),
                                        start_time = 2000, end_time = 2018)
```

```{r}
oecd_employment_DUR_D_df_f <- oecd_employment_DUR_D_df %>%
  filter(DURATION == "UN", AGE != "1524", AGE !="6599", AGE != "900000")
```

```{r}
oecd_employment_DUR_D_df_f %>% head(12)
```
Wykres
```{r fig.height=5, fig.width=10}
oecd_employment_DUR_D_df_f %>% 
  ggplot(aes(x = obsTime, y = obsValue)) +
  geom_point(aes(color = AGE)) +
  geom_line(aes(x = obsTime, y = obsValue, group = AGE, color = AGE), stat="identity") +
  scale_x_discrete(breaks = seq(2000, 2018, 3)) +
 scale_y_continuous(breaks = seq(0, 1250, 250), labels = c(0,paste(seq(250, 1250, 250), "tyś."))) +
  labs(title = "Bezrobotni meżczyźni w Polsce wg grup wiekowych",
       colour = "Grupy wiekowe",
       x = "Rok",
       y = "Liczba bezrobotnych",
       caption = "Opracowanie własne na podstawie danych OECD.") +
  theme(plot.title = element_text(hjust=0.5))
```


```{r message=FALSE, warning=FALSE, include=FALSE}
detach(package:OECD)
```

## Dane finansowe (biblioteki **quantmod** i **tidyquant**)

```{r}
load("WebUrls.RData")
web_urls %>% datatable
```
[Strona quantmod](http://www.quantmod.com/)


```{r warning=FALSE}
library(quantmod)
library(tidyquant)
```

### Skąd można pobrać dane

1. [Strona Yahoo Finance](https://finance.yahoo.com/)
2. [Strona Google Finance](https://www.google.com/finance/)
3. [St. Louis Federal Reserve Bank's FRED system](https://fred.stlouisfed.org/)
4. [Currency and Metals Data from Oanda.com](https://www.oanda.com/)

Użycie
```{r}
getSymbols("USD/EUR",src="oanda")
```
```{r}
USDEUR %>% class
USDEUR %>% dim
USDEUR %>% colnames()
```


### Microsoft
 
```{r message=FALSE, warning=FALSE}
getSymbols('MSFT')
```

```{r}
MSFT %>% class
MSFT %>% dim
MSFT %>% colnames()
```

```{r}
barChart(MSFT)
```
```{r}
candleChart(MSFT, multi.col = TRUE, theme="white")
```

```{r}
MSFT.20 <- MSFT[3313:3333,]
```

```{r}
barChart(MSFT.20)
```

```{r}
candleChart(MSFT.20, multi.col = TRUE, theme="white")
```

```{r}
library(TTR)
```

```{r}
MSFT.my <- tq_get("MSFT", from = "2017-01-01", to = "2019-12-31")
```

```{r fig.height=5, fig.width=10}
# Bar Chart
MSFT.my %>%
    ggplot(aes(x = date, y = close)) +
    geom_barchart(aes(open = open, high = high, low = low, close = close)) +
    geom_ma(color = "darkgreen") +
    coord_x_date(xlim = c("2019-10-01", "2019-12-31"),
                 ylim = c(130, 165)) +
    labs(title = "Wykres słupkowy kursu akcji Microsoft od 01.10 do 31.12 w roku 2019",
       x = "Data",
       y = "Cena zamknięcia",
       caption = "Opracowanie własne na podstawie danych Yahoo Finance.") +
  theme(plot.title = element_text(hjust=0.5))

```

```{r fig.height=5, fig.width=10}
MSFT.my %>%
    ggplot(aes(x = date, y = close)) +
    geom_candlestick(aes(open = open, high = high, low = low, close = close)) +
    geom_ma(color = "darkgreen") +
    coord_x_date(xlim = c("2019-10-01", "2019-12-31"),
                 ylim = c(130, 165)) +
  labs(title = "Wykres świecowy kursu akcji Microsoft od 01.10 do 31.12 w roku 2019",
       x = "Data",
       y = "Cena zamknięcia",
       caption = "Opracowanie własne na podstawie danych Yahoo Finance.") +
  theme(plot.title = element_text(hjust=0.5))
```
## Narodowy Bank Polski i biblioteka **rnbp**

```{r}
library(rnbp)
```

### [Kursy walut](http://api.nbp.pl/#kursyWalut)
```{r}
waluta_EUR_A <- get_current_exchangerate("A", "EUR")
waluta_EUR_A %>% class
waluta_EUR_A$content
```
```{r}
waluta_EUR_A <- get_exchangerate_from("A", "EUR", Sys.Date() - 180)
waluta_EUR_A$content
```

```{r}
waluta_EUR_A <- get_exchangerate_tables_from_interval(table = "A", from = Sys.Date() - 93, to = Sys.Date())
waluta_EUR_A$content
```


```{r}
waluta_EUR_A <- get_exchangerate_from_interval(table = "A", currency_code = "EUR", 
                                                      from = Sys.Date() - 93, to = Sys.Date())
waluta_EUR_A$content
```

```{r}
waluta_EUR_A <- get_last_n_exchangerates("A", "EUR", 93)
waluta_EUR_A$content[[4]] %>% datatable
```
### [Ceny złota w NBP](http://api.nbp.pl/#cenyZlota)

#### Z ostatnich *n* dni
```{r}
gold_93 <- get_last_n_goldprices(93)
gold_93$content
```

```{r}
gold_93 <- get_goldprice_from_interval(Sys.Date() - 93, Sys.Date())
gold_93$content
```

```{r message=FALSE, warning=FALSE, include=FALSE}
detach(package:rnbp)
detach(package:tidyquant)
detach(package:BatchGetSymbols)
detach(package:quantmod)
detach(package:TTR)
detach(package:tidyverse)
detach(package:ggplot2)
detach(package:tibble)
detach(package:tidyr)
detach(package:readr)
detach(package:purrr)
detach(package:dplyr)
detach(package:stringr)
detach(package:forcats)

```

