-
Notifications
You must be signed in to change notification settings - Fork 1
/
pisa.R
107 lines (88 loc) · 2.54 KB
/
pisa.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# raw data downloaded from the pisa data explorer; first save as xlsx, as the format is Excel 2004 ffs
library(tidyverse)
pisa_math_raw = readxl::read_excel(
'data-raw/pisa/PISAExport-Jun302019.xlsx',
sheet = 1,
skip = 11,
col_names = TRUE,
na = c('—', '†'),
n_max = 234 - 11 # based on inspection
)
pisa_reading_raw = readxl::read_excel(
'data-raw/pisa/PISAExport-Jun302019.xlsx',
sheet = 2,
skip = 11,
col_names = TRUE,
na = c('—', '†'),
n_max = 234 - 11
)
pisa_science_raw = readxl::read_excel(
'data-raw/pisa/PISAExport-Jun302019.xlsx',
sheet = 3,
skip = 11,
col_names = TRUE,
na = c('—', '†'),
n_max = 234 - 11
)
pisa_interest_sci_raw = readxl::read_excel(
'data-raw/pisa/PISAExport-Jun302019.xlsx',
sheet = 4,
skip = 11,
col_names = TRUE,
na = c('—', '†'),
n_max = 234 - 11
)
pisa_support_sci_raw = readxl::read_excel(
'data-raw/pisa/PISAExport-Jun302019.xlsx',
sheet = 5,
skip = 11,
col_names = TRUE,
na = c('—', '†'),
n_max = 234 - 11
)
clean_pisa <- function(data) {
data %>%
fill(Year) %>%
mutate(Year = as.integer(Year),
Average = as.numeric(Average),
`Standard Error` = as.numeric(`Standard Error`),
Jurisdiction = if_else(Jurisdiction == 'Korea', 'South Korea', Jurisdiction)) %>%
rename(SE = `Standard Error`,
country = Jurisdiction,
year = Year,
average = Average)
}
# rm(pisa_raw) # if redoing
pisa_raw = mget(ls(pattern = '^pisa'))
# debugonce(clean_pisa)
# basic cleaning plus remove first row which just dupes the international average.
pisa = pisa_raw %>%
map(clean_pisa) %>%
map_df(function(x) x %>%
group_split(year) %>%
map(slice, -1) %>%
bind_rows(),
.id = 'variable')
pisa = pisa %>%
mutate(variable = str_remove(variable, pattern = 'pisa_'),
variable = str_remove(variable, pattern = '_raw'))
pisa = pisa %>%
select(-SE) %>%
spread(variable, average) %>%
arrange(country, year)
# join to gapminder, assumes that has already been processed with gapminder.R.
data("gapminder_2019")
# setdiff(pisa$country, gapminder_2019$country)
# intersect(pisa$country, gapminder_2019$country)
pisa = pisa %>%
left_join(gapminder_2019)
# pisa %>%
# filter(is.na(continent) & !grepl(country, pattern = 'International')) %>%
# data.frame()
# reorder
pisa = pisa %>%
select(country, continent, year,
math, reading, science,
interest_sci, support_sci,
everything())
usethis::use_data(pisa, overwrite = T)