1 Zakład Bioinformatyki, Instytut Informatyki, Uniwersytet w Białymstoku

Correspondence: Jarosław Kotowicz <>

1 Realizacja zadań z laboratorium 3

rm(list = ls())
download.file(url = "https://fueleconomy.gov/feg/epadata/vehicles.csv.zip",
              destfile = "vehicles.csv.zip")
próbowanie adresu URL 'https://fueleconomy.gov/feg/epadata/vehicles.csv.zip'
Content type 'application/x-zip-compressed' length 1597020 bytes (1.5 MB)
downloaded 1.5 MB
unzip("vehicles.csv.zip")
library(tidyverse)

2 Różnice w podczytywaniu plików csv przez funkcje readr::read_csv i base::read.csv

vehicles_csv <- read_csv("vehicles.csv")
Parsed with column specification:
cols(
  .default = col_double(),
  drive = col_character(),
  eng_dscr = col_character(),
  fuelType = col_character(),
  fuelType1 = col_character(),
  make = col_character(),
  model = col_character(),
  mpgData = col_character(),
  phevBlended = col_logical(),
  trany = col_character(),
  VClass = col_character(),
  guzzler = col_logical(),
  trans_dscr = col_character(),
  tCharger = col_logical(),
  sCharger = col_character(),
  atvType = col_character(),
  fuelType2 = col_logical(),
  rangeA = col_logical(),
  evMotor = col_logical(),
  mfrCode = col_logical(),
  c240Dscr = col_logical()
  # ... with 4 more columns
)
See spec(...) for full column specifications.
27932 parsing failures.
 row     col           expected actual           file
4430 guzzler 1/0/T/F/TRUE/FALSE      G 'vehicles.csv'
4431 guzzler 1/0/T/F/TRUE/FALSE      G 'vehicles.csv'
4432 guzzler 1/0/T/F/TRUE/FALSE      G 'vehicles.csv'
4433 guzzler 1/0/T/F/TRUE/FALSE      G 'vehicles.csv'
4442 guzzler 1/0/T/F/TRUE/FALSE      G 'vehicles.csv'
.... ....... .................. ...... ..............
See problems(...) for more details.
vehicles_csv <- vehicles_csv %>%
  select(cityE, cityUF, evMotor, highwayE)
vehicles.csv <- read.csv("vehicles.csv")

vehicles.csv <- vehicles.csv %>%
  select(cityE, cityUF, evMotor, highwayE)
vehicles_csv.test <- vehicles_csv %>% 
  filter(is.na(evMotor)!=TRUE ) %>% 
  select(evMotor)
vehicles.csv.test <- vehicles.csv %>% 
  filter(is.na(evMotor)!=TRUE  & evMotor!="") %>% 
  select(evMotor)
vehicles.evM <- vehicles_csv %>% select(evMotor)

vehicles.test <- vehicles.csv %>% select(evMotor) %>% bind_cols(vehicles.evM) %>% filter(evMotor != "")
vehicles.test %>% head(10)

2.1 Dystrybuanta empiryczna

vehicles.csv %>% 
  filter(cityE !=0) %>%
  ggplot(aes(x = cityE)) +
  stat_ecdf()

vehicles.csv %>% 
  filter(cityE !=0) %>%
  ggplot(aes(x = highwayE)) +
  stat_ecdf()

3 Grupa 2 i 6 realizowały w dniu 26 marca 2020r., a grupy 1 i 5 w dniu 2 kwietnia 2020r.

3.1 Kowariancja i korelacja. Macierz kwariancji i korelacji

set.seed(202003)
x <- rnorm(100, mean = 2, sd = 3)
x[1:10]
 [1] 3.452712 3.862487 6.762493 9.113025 4.964333 3.072050 1.886295 8.796903 1.889214 3.497148
set.seed(202003)
y <- rnorm(100, mean = 2, sd = 3)
cov(x, y)
[1] 9.619885
cor(x, y)
[1] 1
cor(x, -y)
[1] -1

Liczby pseudolosowe z rozkładów gamma i t-Studenta.

rgamma(10, 1, 3)
 [1] 0.141113702 0.011058933 0.004633047 0.134021354 0.201444871 0.221894552 0.414136641 0.123855898 0.012947569
[10] 0.092161444
rt(10, 20)
 [1]  0.07343334  1.37104862  1.04494751 -0.30928986  0.54942319  0.89946908  0.28351875 -0.59885696 -0.03943426
[10]  1.07777070
rt(10, 100)
 [1]  0.6152318 -0.6148539 -0.2797890 -0.9907686  0.7603531  0.1572298 -0.9681729 -0.3357833 -0.3689049 -1.6977490
macierz <- matrix(0, nrow = 10, ncol = 5)
macierz[,1] <- x[1:10]
macierz[,2] <- y[1:10]
set.seed(202003)
macierz[,3] <- rgamma(10, 1, 3)
set.seed(202003)
macierz[,4] <- rt(10, 20)
set.seed(202003)
macierz[,5] <- rt(10, 100)
macierz
          [,1]     [,2]      [,3]        [,4]        [,5]
 [1,] 3.452712 3.452712 0.3003430  0.45135970  0.46611172
 [2,] 3.862487 3.862487 0.3451166  1.17629995  1.36542137
 [3,] 6.762493 6.762493 0.7508558  0.95823194  0.96849329
 [4,] 9.113025 9.113025 1.1939920 -0.02843529 -0.03280980
 [5,] 4.964333 4.964333 0.4809303 -0.03505054 -0.03584358
 [6,] 3.072050 3.072050 0.2615365 -3.26642904 -2.64232707
 [7,] 1.886295 1.886295 0.1578529  0.22125619  0.21892872
 [8,] 8.796903 8.796903 0.2790636  0.50718021  0.45654820
 [9,] 1.889214 1.889214 1.0230030  1.20199293  1.20620569
[10,] 3.497148 3.497148 0.5648129 -0.46349670 -0.47004098
cov(macierz)
          [,1]      [,2]      [,3]      [,4]      [,5]
[1,] 6.9717810 6.9717810 0.3156483 0.4693202 0.3116449
[2,] 6.9717810 6.9717810 0.3156483 0.4693202 0.3116449
[3,] 0.3156483 0.3156483 0.1218292 0.1236594 0.1019312
[4,] 0.4693202 0.4693202 0.1236594 1.6767504 1.4685502
[5,] 0.3116449 0.3116449 0.1019312 1.4685502 1.3012110
cor(macierz)
          [,1]      [,2]      [,3]      [,4]      [,5]
[1,] 1.0000000 1.0000000 0.3424963 0.1372660 0.1034700
[2,] 1.0000000 1.0000000 0.3424963 0.1372660 0.1034700
[3,] 0.3424963 0.3424963 1.0000000 0.2736006 0.2560104
[4,] 0.1372660 0.1372660 0.2736006 1.0000000 0.9942160
[5,] 0.1034700 0.1034700 0.2560104 0.9942160 1.0000000

3.2 Testowanie hipotez

3.2.1 Test parametryczny dotyczący średniej lub średnich (test t)

t.test(macierz[,1], alternative = "two.side", mu = 2)

    One Sample t-test

data:  macierz[, 1]
t = 3.2692, df = 9, p-value = 0.009696
alternative hypothesis: true mean is not equal to 2
95 percent confidence interval:
 2.840828 6.618504
sample estimates:
mean of x 
 4.729666 
t.test(macierz[,1], alternative = "two.side", mu = 6)

    One Sample t-test

data:  macierz[, 1]
t = -1.5214, df = 9, p-value = 0.1625
alternative hypothesis: true mean is not equal to 6
95 percent confidence interval:
 2.840828 6.618504
sample estimates:
mean of x 
 4.729666 
t.test(macierz[,1], alternative = "two.side", mu = 4.73)

    One Sample t-test

data:  macierz[, 1]
t = -0.0004001, df = 9, p-value = 0.9997
alternative hypothesis: true mean is not equal to 4.73
95 percent confidence interval:
 2.840828 6.618504
sample estimates:
mean of x 
 4.729666 
t.test(macierz[,1], macierz[,2] )

    Welch Two Sample t-test

data:  macierz[, 1] and macierz[, 2]
t = 0, df = 18, p-value = 1
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -2.480829  2.480829
sample estimates:
mean of x mean of y 
 4.729666  4.729666 

3.2.2 Nieparametryczny dotyczący postaci funkcyjnej (rozkład normalny)

shapiro.test(macierz[,1])

    Shapiro-Wilk normality test

data:  macierz[, 1]
W = 0.87504, p-value = 0.1144
shapiro.test(macierz[,3])

    Shapiro-Wilk normality test

data:  macierz[, 3]
W = 0.88306, p-value = 0.1415

4 Praca domowa

  1. Odszukać ,,wszystkie’’ testy w R do testowania, że próba pochodzi z rozkładu normalnego.
  2. Podać 3 inne testy niż służące do testowania normalności rozkładu.
LS0tDQp0aXRsZTogIk1ldG9keSBwcm9iYWJpbGlzdHljem5lIGkgc3RhdHlzdHlrYSAoa2llcnVuZWsgaW5mb3JtYXR5a2EpIC0gbGFib3JhdG9yaXVtIDMgKHJlYWxpemFjamEpIg0KYXV0aG9yOg0KLSBKYXJvc8WCYXcgS290b3dpY3o6DQogICAgY29ycmVzcG9uZGVuY2U6IG5vDQogICAgZW1haWw6IGoua290b3dpY3pAdXdiLmVkdS5wbA0KICAgIGluc3RpdHV0ZTogSUlVd0INCmRhdGU6ICIxOSBtYXJjYSAyMDIwci4gKGdydXB5IDEgaSA1KSAyNiBtYXJjYSAyMDIwci4gKGdydXB5IDIgaSA2KSINCm91dHB1dDoNCiAgaHRtbF9ub3RlYm9vazoNCiAgICBmaWdfY2FwdGlvbjogeWVzDQogICAgaGlnaGxpZ2h0OiBoYWRkb2NrDQogICAgbnVtYmVyX3NlY3Rpb25zOiB5ZXMNCiAgICBwYW5kb2NfYXJnczoNCiAgICAtIC0tbHVhLWZpbHRlcj1zY2hvbGFybHktbWV0YWRhdGEubHVhDQogICAgLSAtLWx1YS1maWx0ZXI9YXV0aG9yLWluZm8tYmxvY2tzLmx1YQ0KICAgIHRoZW1lOiBjZXJ1bGVhbg0KICAgIHRvYzogeWVzDQppbnN0aXR1dGU6DQotIElJVXdCOiBaYWvFgmFkIEJpb2luZm9ybWF0eWtpLCBJbnN0eXR1dCBJbmZvcm1hdHlraSwgVW5pd2Vyc3l0ZXQgdyBCaWHFgnltc3Rva3UNCmNzbDogYmlnLWRhdGEtYW5kLWluZm9ybWF0aW9uLWFuYWx5dGljcy5jc2wNCmFsd2F5c19hbGxvd19odG1sOiB5ZXMNCi0tLQ0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkNCmBgYA0KDQojIFJlYWxpemFjamEgemFkYcWEIHogbGFib3JhdG9yaXVtIDMNCg0KYGBge3J9DQpybShsaXN0ID0gbHMoKSkNCmBgYA0KDQpgYGB7cn0NCmRvd25sb2FkLmZpbGUodXJsID0gImh0dHBzOi8vZnVlbGVjb25vbXkuZ292L2ZlZy9lcGFkYXRhL3ZlaGljbGVzLmNzdi56aXAiLA0KICAgICAgICAgICAgICBkZXN0ZmlsZSA9ICJ2ZWhpY2xlcy5jc3YuemlwIikNCmBgYA0KDQpgYGB7cn0NCnVuemlwKCJ2ZWhpY2xlcy5jc3YuemlwIikNCmBgYA0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KYGBgDQoNCiMgUsOzxbxuaWNlIHcgcG9kY3p5dHl3YW5pdSBwbGlrw7N3ICpjc3YqIHByemV6IGZ1bmtjamUgKipyZWFkcjo6cmVhZF9jc3YqKiBpICoqYmFzZTo6cmVhZC5jc3YqKg0KYGBge3J9DQp2ZWhpY2xlc19jc3YgPC0gcmVhZF9jc3YoInZlaGljbGVzLmNzdiIpDQoNCnZlaGljbGVzX2NzdiA8LSB2ZWhpY2xlc19jc3YgJT4lDQogIHNlbGVjdChjaXR5RSwgY2l0eVVGLCBldk1vdG9yLCBoaWdod2F5RSkNCmBgYA0KDQpgYGB7cn0NCnZlaGljbGVzLmNzdiA8LSByZWFkLmNzdigidmVoaWNsZXMuY3N2IikNCg0KdmVoaWNsZXMuY3N2IDwtIHZlaGljbGVzLmNzdiAlPiUNCiAgc2VsZWN0KGNpdHlFLCBjaXR5VUYsIGV2TW90b3IsIGhpZ2h3YXlFKQ0KYGBgDQoNCmBgYHtyfQ0KdmVoaWNsZXNfY3N2LnRlc3QgPC0gdmVoaWNsZXNfY3N2ICU+JSANCiAgZmlsdGVyKGlzLm5hKGV2TW90b3IpIT1UUlVFICkgJT4lIA0KICBzZWxlY3QoZXZNb3RvcikNCmBgYA0KDQpgYGB7cn0NCnZlaGljbGVzLmNzdi50ZXN0IDwtIHZlaGljbGVzLmNzdiAlPiUgDQogIGZpbHRlcihpcy5uYShldk1vdG9yKSE9VFJVRSAgJiBldk1vdG9yIT0iIikgJT4lIA0KICBzZWxlY3QoZXZNb3RvcikNCmBgYA0KDQpgYGB7cn0NCnZlaGljbGVzLmV2TSA8LSB2ZWhpY2xlc19jc3YgJT4lIHNlbGVjdChldk1vdG9yKQ0KDQp2ZWhpY2xlcy50ZXN0IDwtIHZlaGljbGVzLmNzdiAlPiUgc2VsZWN0KGV2TW90b3IpICU+JSBiaW5kX2NvbHModmVoaWNsZXMuZXZNKSAlPiUgZmlsdGVyKGV2TW90b3IgIT0gIiIpDQpgYGANCg0KYGBge3J9DQp2ZWhpY2xlcy50ZXN0ICU+JSBoZWFkKDEwKQ0KYGBgDQoNCiMjIER5c3RyeWJ1YW50YSBlbXBpcnljem5hDQoNCmBgYHtyfQ0KdmVoaWNsZXMuY3N2ICU+JSANCiAgZmlsdGVyKGNpdHlFICE9MCkgJT4lDQogIGdncGxvdChhZXMoeCA9IGNpdHlFKSkgKw0KICBzdGF0X2VjZGYoKQ0KYGBgDQoNCmBgYHtyfQ0KdmVoaWNsZXMuY3N2ICU+JSANCiAgZmlsdGVyKGNpdHlFICE9MCkgJT4lDQogIGdncGxvdChhZXMoeCA9IGhpZ2h3YXlFKSkgKw0KICBzdGF0X2VjZGYoKQ0KYGBgDQojIEdydXBhIDIgaSA2IHJlYWxpem93YcWCeSB3IGRuaXUgMjYgbWFyY2EgMjAyMHIuLCBhIGdydXB5IDEgaSA1IHcgZG5pdSAyIGt3aWV0bmlhIDIwMjByLg0KDQojIyBLb3dhcmlhbmNqYSBpIGtvcmVsYWNqYS4gTWFjaWVyeiBrd2FyaWFuY2ppIGkga29yZWxhY2ppDQpgYGB7cn0NCnNldC5zZWVkKDIwMjAwMykNCmBgYA0KDQpgYGB7cn0NCnggPC0gcm5vcm0oMTAwLCBtZWFuID0gMiwgc2QgPSAzKQ0KeFsxOjEwXQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMjAyMDAzKQ0KeSA8LSBybm9ybSgxMDAsIG1lYW4gPSAyLCBzZCA9IDMpDQpgYGANCg0KYGBge3J9DQpjb3YoeCwgeSkNCmBgYA0KDQpgYGB7cn0NCmNvcih4LCB5KQ0KYGBgDQoNCmBgYHtyfQ0KY29yKHgsIC15KQ0KYGBgDQoNCkxpY3pieSBwc2V1ZG9sb3Nvd2UgeiByb3prxYJhZMOzdyBnYW1tYSBpIHQtU3R1ZGVudGEuDQpgYGB7cn0NCnJnYW1tYSgxMCwgMSwgMykNCnJ0KDEwLCAyMCkNCnJ0KDEwLCAxMDApDQpgYGANCg0KYGBge3J9DQptYWNpZXJ6IDwtIG1hdHJpeCgwLCBucm93ID0gMTAsIG5jb2wgPSA1KQ0KbWFjaWVyelssMV0gPC0geFsxOjEwXQ0KbWFjaWVyelssMl0gPC0geVsxOjEwXQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMjAyMDAzKQ0KbWFjaWVyelssM10gPC0gcmdhbW1hKDEwLCAxLCAzKQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMjAyMDAzKQ0KbWFjaWVyelssNF0gPC0gcnQoMTAsIDIwKQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMjAyMDAzKQ0KbWFjaWVyelssNV0gPC0gcnQoMTAsIDEwMCkNCmBgYA0KDQpgYGB7cn0NCm1hY2llcnoNCmBgYA0KDQpgYGB7cn0NCmNvdihtYWNpZXJ6KQ0KYGBgDQoNCmBgYHtyfQ0KY29yKG1hY2llcnopDQpgYGANCg0KIyMgVGVzdG93YW5pZSBoaXBvdGV6IA0KDQojIyMgVGVzdCBwYXJhbWV0cnljem55IGRvdHljesSFY3kgxZtyZWRuaWVqIGx1YiDFm3JlZG5pY2ggKCp0ZXN0IHQqKQ0KYGBge3J9DQp0LnRlc3QobWFjaWVyelssMV0sIGFsdGVybmF0aXZlID0gInR3by5zaWRlIiwgbXUgPSAyKQ0KYGBgDQoNCmBgYHtyfQ0KdC50ZXN0KG1hY2llcnpbLDFdLCBhbHRlcm5hdGl2ZSA9ICJ0d28uc2lkZSIsIG11ID0gNikNCmBgYA0KDQpgYGB7cn0NCnQudGVzdChtYWNpZXJ6WywxXSwgYWx0ZXJuYXRpdmUgPSAidHdvLnNpZGUiLCBtdSA9IDQuNzMpDQpgYGANCg0KYGBge3J9DQp0LnRlc3QobWFjaWVyelssMV0sIG1hY2llcnpbLDJdICkNCmBgYA0KDQojIyMgTmllcGFyYW1ldHJ5Y3pueSBkb3R5Y3rEhWN5IHBvc3RhY2kgZnVua2N5am5laiAocm96a8WCYWQgbm9ybWFsbnkpDQpgYGB7cn0NCnNoYXBpcm8udGVzdChtYWNpZXJ6WywxXSkNCmBgYA0KDQpgYGB7cn0NCnNoYXBpcm8udGVzdChtYWNpZXJ6WywzXSkNCmBgYA0KDQojIFByYWNhIGRvbW93YQ0KDQoxLiBPZHN6dWthxIcgLCx3c3p5c3RraWUnJyB0ZXN0eSB3IFIgZG8gdGVzdG93YW5pYSwgxbxlIHByw7NiYSBwb2Nob2R6aSB6IHJvemvFgmFkdSBub3JtYWxuZWdvLg0KMi4gUG9kYcSHIDMgaW5uZSB0ZXN0eSBuacW8IHPFgnXFvMSFY2UgZG8gdGVzdG93YW5pYSBub3JtYWxub8WbY2kgcm96a8WCYWR1Lg0KDQpgYGB7cn0NCmRldGFjaChwYWNrYWdlOnRpZHl2ZXJzZSkNCmRldGFjaChwYWNrYWdlOmdncGxvdDIpDQpkZXRhY2gocGFja2FnZTp0aWJibGUpDQpkZXRhY2gocGFja2FnZTp0aWR5cikNCmRldGFjaChwYWNrYWdlOnJlYWRyKQ0KZGV0YWNoKHBhY2thZ2U6cHVycnIpDQpkZXRhY2gocGFja2FnZTpkcGx5cikNCmRldGFjaChwYWNrYWdlOnN0cmluZ3IpDQpkZXRhY2gocGFja2FnZTpmb3JjYXRzKQ0KYGBg