From 873a23e148b96b47d64197554cf3f18de155e59f Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 27 Sep 2022 17:32:36 +0200 Subject: [PATCH 01/84] import {tibble}, {pillar} and {vctrs} {pillar} and {vctrs} are the backbone for customizing tibbles. They are dependencies of the {tibble} package and therefore "free" once {tibble} is used as a dependency package of {STATcubeR} --- DESCRIPTION | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e8d2e4c4..82fbf301 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,10 +18,12 @@ Imports: cli, httr, jsonlite, - magrittr + magrittr, + pillar, + tibble, + vctrs Suggests: data.tree, - pillar, rappdirs, xml2 Encoding: UTF-8 From bc98e1d08f248e7b450b11ce8d052ecbdf8d9fb2 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 27 Sep 2022 17:43:02 +0200 Subject: [PATCH 02/84] + custom vector class for schema uris try this class only with sc_table_saved_list() for now --- NAMESPACE | 2 ++ R/schema_uri.R | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ R/table_saved.R | 9 ++++--- 3 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 R/schema_uri.R diff --git a/NAMESPACE b/NAMESPACE index 5e0e91da..f26cdd94 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,8 @@ S3method(as.character,od_json) S3method(as.character,sc_json) S3method(as.data.frame,sc_data) +S3method(format,sc_schema_uri) +S3method(pillar_shaft,sc_schema_uri) S3method(print,od_cache_file) S3method(print,od_json) S3method(print,od_revisions) diff --git a/R/schema_uri.R b/R/schema_uri.R new file mode 100644 index 00000000..0d649100 --- /dev/null +++ b/R/schema_uri.R @@ -0,0 +1,67 @@ +new_schema_uri <- function(label, uri) { + vctrs::vec_assert(label, character()) + vctrs::vec_assert(uri, character()) + vctrs::new_rcrd(list(label = label, uri = uri), class = "sc_schema_uri") +} + +#' @export +format.sc_schema_uri <- function(x, ...) { + format(vctrs::field(x, "label"), ...) +} + +sc_schema_run <- function(uri) { + run <- paste0("STATcubeR::sc_schema(\"", uri, "\")") + is_table <- grep("^str:table", uri) + run[is_table] <- paste0("STATcubeR::sc_table_saved(\"", uri[is_table], "\")") + run +} + +sc_schema_url <- function(uri) { + url <- rep(NA_character_, length(uri)) + is_database <- grep("^str:database", uri) + if (length(is_database) > 0) + url[is_database] <- uri[is_database] %>% + sub("^str:database:", "", .) %>% + sc_browse_database(server = "ext") %>% + as.character() + is_table <- grepl("^str:table", uri) & + !grepl("^([0-9a-f-])+$", sub("str:table:", "", uri)) + if (length(is_table) > 0) + url[is_table] <- uri[is_table] %>% + sub("^str:table:", "", .) %>% + sc_browse_table(server = "ext") %>% + as.character() + url +} + +#' @importFrom pillar pillar_shaft +#' @export +pillar_shaft.sc_schema_uri <- function(x, ...) { + label <- vctrs::field(x, "label") + formatted <- label + short_formatted <- substr(formatted, 1, 40) + uri <- vctrs::field(x, "uri") + if (cli::ansi_hyperlink_types()$run) { + run <- sc_schema_run(uri) + template <- cli::format_inline("{.run [%s](%s)}") %>% cli::style_underline() + formatted <- sprintf(template, run, formatted) + short_formatted <- sprintf(template, run, short_formatted) + } else if (cli::ansi_has_hyperlink_support()) { + url <- sc_schema_url(uri) + formatted[!is.na(url)] <- cli::style_hyperlink(formatted[!is.na(url)], + url[!is.na(url)]) + short_formatted[!is.na(url)] <- cli::style_hyperlink( + short_formatted[!is.na(url)], url[!is.na(url)]) + } + pillar::new_pillar_shaft_simple( + formatted, + width = max(nchar(label)), + min_width = 40, + type_sum = "chr", + short_formatted = short_formatted + ) +} + +as.character.sc_schema_uri <- function(x, ...) { + format(x) +} diff --git a/R/table_saved.R b/R/table_saved.R index 522413b0..ab47d0c1 100644 --- a/R/table_saved.R +++ b/R/table_saved.R @@ -9,10 +9,13 @@ sc_table_saved_list <- function(key = NULL, server = "ext") { tables <- schema %>% sapply(function(x) x$type == "TABLE") saved_tables <- schema[tables] - data.frame( + vctrs::new_data_frame(list( label = sapply(saved_tables, function(x) x$label), - id = sapply(saved_tables, function(x) x$id) - ) + id = new_schema_uri( + sapply(saved_tables, function(x) x$id), + sapply(saved_tables, function(x) x$id) + ) + ), class = c("tbl", "tbl_df")) } #' @param table_uri Identifier of a saved table as returned by From 1ca4d78bad3b4c12ccd291be29afe28897195265 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 27 Sep 2022 18:12:52 +0200 Subject: [PATCH 03/84] sc_table_saved: normalize uri make sure the objects of class are compatible with sc_table_saved() --- R/table_saved.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/table_saved.R b/R/table_saved.R index ab47d0c1..832bcf62 100644 --- a/R/table_saved.R +++ b/R/table_saved.R @@ -24,6 +24,7 @@ sc_table_saved_list <- function(key = NULL, server = "ext") { #' @export sc_table_saved <- function(table_uri, language = NULL, key = NULL, server = "ext") { language <- sc_language(language) + table_uri <- as.character(table_uri) if (substr(table_uri, 1, 3) != "str") table_uri <- paste0("str:table:", table_uri) sc_with_cache(c("sc_table_saved", table_uri, language, key), function() { From 88d0173a61d3df2b04b0a0de7edb295356d0ca1a Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 27 Sep 2022 18:15:16 +0200 Subject: [PATCH 04/84] update namespaces - don't import {tibble} since currently, only {vctrs} and {pillar} is used - export as.character() for sc_schema_uri - re-roxygenize --- DESCRIPTION | 1 - NAMESPACE | 2 ++ R/schema_uri.R | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 82fbf301..6dd0e647 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,6 @@ Imports: jsonlite, magrittr, pillar, - tibble, vctrs Suggests: data.tree, diff --git a/NAMESPACE b/NAMESPACE index f26cdd94..2adad9a5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(as.character,od_json) S3method(as.character,sc_json) +S3method(as.character,sc_schema_uri) S3method(as.data.frame,sc_data) S3method(format,sc_schema_uri) S3method(pillar_shaft,sc_schema_uri) @@ -67,3 +68,4 @@ export(sc_tabulate) importFrom(magrittr,"%<>%") importFrom(magrittr,"%>%") importFrom(magrittr,"%T>%") +importFrom(pillar,pillar_shaft) diff --git a/R/schema_uri.R b/R/schema_uri.R index 0d649100..6763c621 100644 --- a/R/schema_uri.R +++ b/R/schema_uri.R @@ -62,6 +62,7 @@ pillar_shaft.sc_schema_uri <- function(x, ...) { ) } +#' @export as.character.sc_schema_uri <- function(x, ...) { format(x) } From a348d584b539cbd8f7c15817229972cdb3d70572 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 27 Sep 2022 18:31:10 +0200 Subject: [PATCH 05/84] update language param to sc_headers(), sc_schema_catalogue() this is now handled as in sc_table(), od_table() and so on --- R/schema.R | 5 ++--- R/utils.R | 4 ++-- man/sc_schema.Rd | 7 +------ 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/R/schema.R b/R/schema.R index 25ddfe56..2391fda1 100644 --- a/R/schema.R +++ b/R/schema.R @@ -138,8 +138,7 @@ sc_schema_flatten_impl <- function(resp, type) { #' [catalogue explorer](`r sc_browse_catalogue()`) of the STATcube GUI and reurns #' a tree-type object containing all databases and tables. #' @export -sc_schema_catalogue <- function(depth = "folder", language = c("en", "de"), - key = NULL, server = "ext") { - sc_schema(depth = depth, language = language, key = key, server = server) +sc_schema_catalogue <- function(depth = "folder", ...) { + sc_schema(id = NULL, depth = depth, ...) } diff --git a/R/utils.R b/R/utils.R index 092bf3c7..42e23a79 100644 --- a/R/utils.R +++ b/R/utils.R @@ -14,11 +14,11 @@ sc_parse_time <- function(timestamp) { (as.numeric(timestamp) / 1000) %>% as.POSIXct(origin = "1970-01-01") } -sc_headers <- function(language = c("en", "de"), key = NULL, server = "ext", ...) { +sc_headers <- function(language = NULL, key = NULL, server = "ext", ...) { if (is.null(key)) key <- sc_key(server) httr::add_headers( - APIKey = key, `Accept-Language` = match.arg(language), ..., + APIKey = key, `Accept-Language` = sc_language(language), ..., `User-Agent` = paste0("STATcubeR/", sc_version(FALSE), " (http://github.com/statistikat/STATcubeR)", " httr/", utils::packageVersion("httr"), diff --git a/man/sc_schema.Rd b/man/sc_schema.Rd index 20798384..8059f3d5 100644 --- a/man/sc_schema.Rd +++ b/man/sc_schema.Rd @@ -14,12 +14,7 @@ sc_schema(id = NULL, depth = NULL, language = NULL, key = NULL, server = "ext") sc_schema_flatten(x, type) -sc_schema_catalogue( - depth = "folder", - language = c("en", "de"), - key = NULL, - server = "ext" -) +sc_schema_catalogue(depth = "folder", ...) sc_schema_db(id, depth = "valueset", language = c("en", "de"), key = NULL) } From 3b539ebf423584e3542bddec1fb255158bdba3a5 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 27 Sep 2022 18:38:30 +0200 Subject: [PATCH 06/84] update links in docs if this package is roxygenized insie of the STAT firewall, the documentation links generated by sc_browse*() will point to the internal server re-roxygenize from the outside TODO: find a way to avoid this in the future. Maybe write a wrapper-function around devtools::document() which temporarily sets the env-var STATCUBER_IN_STAT --- man/sc_schema.Rd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/man/sc_schema.Rd b/man/sc_schema.Rd index 8059f3d5..921e5c4e 100644 --- a/man/sc_schema.Rd +++ b/man/sc_schema.Rd @@ -56,8 +56,8 @@ as well as metadata about specific databases. The main function \code{sc_schema()} can be used with any resouce id. \code{\link[=sc_schema_catalogue]{sc_schema_catalogue()}} and \code{\link[=sc_schema_db]{sc_schema_db()}} are very simple wrapper functions around \code{\link[=sc_schema]{sc_schema()}} and are comparabable to the -\href{http://sdbext:8081/statistik.at/ext/statcube/jsf/dataCatalogueExplorer.xhtml}{catalogue explorer} or the -\href{http://sdbext:8081/statistik.at/ext/statcube/opendatabase?id=deake005}{table view} of the STATcube GUI. +\href{https://portal.statistik.at/statistik.at/ext/statcube/jsf/dataCatalogueExplorer.xhtml}{catalogue explorer} or the +\href{https://portal.statistik.at/statistik.at/ext/statcube/opendatabase?id=deake005}{table view} of the STATcube GUI. The responses of the API are tree-like data structures which are wrapped into a class called \code{sc_schema} to simplify the usage in R. @@ -67,11 +67,11 @@ are wrapped into a class called \code{sc_schema} to simplify the usage in R. \item \code{sc_schema_flatten()}: turns a \code{sc_schema} object into a \code{data.frame} \item \code{sc_schema_catalogue()}: is similar to the -\href{http://sdbext:8081/statistik.at/ext/statcube/jsf/dataCatalogueExplorer.xhtml}{catalogue explorer} of the STATcube GUI and reurns +\href{https://portal.statistik.at/statistik.at/ext/statcube/jsf/dataCatalogueExplorer.xhtml}{catalogue explorer} of the STATcube GUI and reurns a tree-type object containing all databases and tables. \item \code{sc_schema_db()}: is similar to the -\href{http://sdbext:8081/statistik.at/ext/statcube/opendatabase?id=deake005}{table view} +\href{https://portal.statistik.at/statistik.at/ext/statcube/opendatabase?id=deake005}{table view} of ths STATcube GUI and gives information about all measures and classification fields for a specific database From ff8ad8a872b0f71ddf18fa78173e16243da70ed5 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 27 Sep 2022 18:40:18 +0200 Subject: [PATCH 07/84] prep NEWS for v0.5.1 --- NEWS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NEWS.md b/NEWS.md index 2073ecf3..39ebd897 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# STATcubeR 0.5.1 + +* Update print methods with the `{tibble}` package (#32) +* Add global option `STATcubeR.language` to override the default language + # STATcubeR 0.5.0 * adapt `od_list()` to data.statistik.at update ([`2249b66`](https://github.com/statistikat/STATcubeR/commit/2249b6607cb822a4aac56c6258cbe967832171f1)) From 37d84606d8b14aadeacc40c701e46ab29a16bf4d Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 27 Sep 2022 18:48:00 +0200 Subject: [PATCH 08/84] don't use ide:run in docs another tweak for cli::style_hyperlink(). Hopefully, this will get easier once these features mature --- R/zzz.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/zzz.R b/R/zzz.R index 81fbdbc7..fe5a0499 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -12,6 +12,7 @@ } cli_theme_pkgdown <- function() { + options(cli.hyperlink_run = FALSE) options(cli.theme = list( ".field" = list("color" = "#0d0d73"), ".code" = list("color" = "blue"), @@ -27,6 +28,7 @@ cli_theme_pkgdown <- function() { } cli_theme_reset <- function() { + options(cli.hyperlink_run = TRUE) Sys.unsetenv("R_CLI_HYPERLINK_MODE") options(cli.theme = NULL) options(fansi.warn = NULL) From 0147dc58f604841889a4fd8c7cca9fb114edd8be Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 27 Sep 2022 19:02:35 +0200 Subject: [PATCH 09/84] add clickable links to print.sc_schema() --- NAMESPACE | 1 + R/schema.R | 47 +++++++++++++++++++++++++++++++---------------- R/schema_uri.R | 27 +++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 16 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2adad9a5..bcfdc083 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ S3method(as.character,sc_json) S3method(as.character,sc_schema_uri) S3method(as.data.frame,sc_data) S3method(format,sc_schema_uri) +S3method(pillar_shaft,sc_schema_type) S3method(pillar_shaft,sc_schema_uri) S3method(print,od_cache_file) S3method(print,od_json) diff --git a/R/schema.R b/R/schema.R index 2391fda1..d4f48660 100644 --- a/R/schema.R +++ b/R/schema.R @@ -74,29 +74,44 @@ print.sc_schema <- function(x, tree = NULL, ..., limit = 30) { classes <- sapply(x, class) if (tree && any(classes == "sc_schema")) return(print_schema_with_tree(x, limit = limit, ...)) - cat(x$type, ": ", x$label, "\n", sep = "") - sc_schema_print_children(x, message_empty = switch( + style <- cli::make_ansi_style(sc_schema_colors()[[x$type]]) + cat(style(x$type), ": ", cli::style_bold(x$label), "\n", sep = "") + short_id <- strsplit(x$id, ":")[[1]][3] + message_empty <- switch( x$type, - DATABASE = "# Get more info with {.run STATcubeR::sc_schema_db('{x$id}')}", - TABLE = "Get the data with {.run STATcubeR::sc_table_saved('{x$id}')}", + DATABASE = c("# Get more metdata with {.run [sc_schema_db('{short_id}')]", + "(STATcubeR::sc_schema_db('{x$id}'))}"), + TABLE = c("# Get the data with {.run [sc_table_saved('{short_id}')]", + "(STATcubeR::sc_table_saved('{x$id}'))}"), NULL - )) + ) + sc_schema_print_children(x, message_empty = message_empty, ...) + invisible(x) } -sc_schema_print_children <- function(x, message_empty = NULL) { +sc_schema_print_children <- function(x, message_empty = NULL, ...) { classes <- sapply(x, class) - child_schemas <- names(x)[classes == "sc_schema"] + ind <- which(classes == "sc_schema") + child_schemas <- names(x)[ind] if (length(child_schemas) > 0) { - data.frame( - child = child_schemas, - type = sapply(x[child_schemas], function(x) x$type), - n_childs = sapply(x[child_schemas], function(x) { - sum(sapply(x, class) == "sc_schema") - }), - stringsAsFactors = FALSE - ) %>% `class<-`(c("tbl", "data.frame")) %>% `row.names<-`(NULL) %>% print() - } else if (!is.null(message_empty)) + children <- vctrs::new_data_frame(list( + child = new_schema_uri( + label = child_schemas, + uri = sapply(ind, function(i) x[[i]]$id) + ), + type = sc_schema_type(sapply(ind, function(i) x[[i]]$type)), + n = sapply(ind, function(i) { + sum(sapply(x[[i]], class) == "sc_schema") + }) + ), class = c("tbl_df", "tbl")) + if (all(children$n == 0)) + children$n <- NULL + formatted <- format(children, ...) + cat(formatted[seq(4, length(formatted))], sep = "\n") + } else if (!is.null(message_empty)) { + short_id <- strsplit(x$id, ":")[[1]][3] cat(cli::format_inline(message_empty), "\n") + } } sc_as_nested_list <- function(x) { diff --git a/R/schema_uri.R b/R/schema_uri.R index 6763c621..ccd8741e 100644 --- a/R/schema_uri.R +++ b/R/schema_uri.R @@ -66,3 +66,30 @@ pillar_shaft.sc_schema_uri <- function(x, ...) { as.character.sc_schema_uri <- function(x, ...) { format(x) } + +sc_schema_colors <- function() { + if (!is.null(getOption("STATcubeR.schema_colors"))) + return(getOption("STATcubeR.schema_colors")) + list( + "FOLDER" = "#8470FF", "DATABASE" = "cadetblue", "TABLE" = "peru", + "GROUP" = "#8470FF", "FIELD" = "cyan", "VALUESET" = "cadetblue", + "VALUE" = "#8470FF", "MEASURE" = "yellow", "STAT_FUNCTION" = "cadetblue", + "COUNT" = "cadetblue" + ) +} + +sc_schema_type <- function(type) { + stopifnot(is.character(type), all(type %in% names(sc_schema_colors()))) + vctrs::new_vctr(type, class = "sc_schema_type", inherit_base_type = TRUE) +} + +#' @export +pillar_shaft.sc_schema_type <- function(x, ...) { + type <- vctrs::vec_data(x) + stl <- sc_schema_colors() + formatted <- sapply(type, function(y) { + style <- cli::make_ansi_style(stl[[y]]) + style(y) + }) + pillar::new_pillar_shaft_simple(formatted, type_sum = "chr") +} From cc318c95e931eb8b420d9c4ec3c418368e45d180 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 30 Sep 2022 09:23:11 +0200 Subject: [PATCH 10/84] mention COUNTs in docs for sc_table_custom() ad some notes that instead of VALUE and VALUESET it is also possible to use uris for COUNT resources in the "measures" parameter of sc_table_custom() --- R/table_custom.R | 4 ++-- man/sc_table_custom.Rd | 4 ++-- vignettes/sc_table_custom.Rmd | 11 +++++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/R/table_custom.R b/R/table_custom.R index cb415811..2deedfdd 100644 --- a/R/table_custom.R +++ b/R/table_custom.R @@ -7,8 +7,8 @@ #' for more details. #' #' @param db The uid of a database -#' @param measures A character vector of uids for measures. Can be either of -#' type `MEASURE` or of type `STAT_FUNCTION` +#' @param measures A character vector of uids for measures. Each entry must be +#' of type `MEASURE`, `STAT_FUNCTION` or `COUNT`. #' @param dimensions A character vector of dimensions for the cube. Can be #' either of type `FIELD` or type `VALUESET`. Those entries are referred to #' as `fields` in the parsed API response diff --git a/man/sc_table_custom.Rd b/man/sc_table_custom.Rd index 72fb1cef..68f4ca29 100644 --- a/man/sc_table_custom.Rd +++ b/man/sc_table_custom.Rd @@ -16,8 +16,8 @@ sc_table_custom( \arguments{ \item{db}{The uid of a database} -\item{measures}{A character vector of uids for measures. Can be either of -type \code{MEASURE} or of type \code{STAT_FUNCTION}} +\item{measures}{A character vector of uids for measures. Each entry must be +of type \code{MEASURE}, \code{STAT_FUNCTION} or \code{COUNT}.} \item{dimensions}{A character vector of dimensions for the cube. Can be either of type \code{FIELD} or type \code{VALUESET}. Those entries are referred to diff --git a/vignettes/sc_table_custom.Rmd b/vignettes/sc_table_custom.Rmd index e00b2681..18116d2b 100644 --- a/vignettes/sc_table_custom.Rmd +++ b/vignettes/sc_table_custom.Rmd @@ -116,6 +116,17 @@ x$tabulate() It is possible to use a mixture of valuesets and fields in the `dimensions` parameter. +## Using Counts + +Instead of Measures and Valuesets, it is also possible to provide counts +in the measure parameter of `sc_table_ciustom()` + +```{r} +schema_pop <- sc_schema_db("debevstand") +(count <- schema_pop$`Datensätze/Records`$`F-BEVSTAND`) +sc_table_custom("str:database:debevstand", count$id) +``` + ## Filtering Data Omitting certain classification elements from the query is possible with the `recodes` parameter of the `/table` endpoint. Currently, `sc_table_custom()` does not provide support for recodes. From 2a7a963efca4cb6eff625e508e803e29b4c980a0 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 30 Sep 2022 11:07:19 +0200 Subject: [PATCH 11/84] document error: cell limit exceeded (400) this error was overlooked when the error handling vignette was first written fortunately, the API does a good job of explaining the error in the json body of the response so the error handlerst do not need an upgrade [ci skip] --- vignettes/sc_last_error.Rmd | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/vignettes/sc_last_error.Rmd b/vignettes/sc_last_error.Rmd index 3ed4d140..4cdf35de 100644 --- a/vignettes/sc_last_error.Rmd +++ b/vignettes/sc_last_error.Rmd @@ -170,6 +170,27 @@ If you encounter this error during the workflow described in the `r ticle("sc_ta This is because json request that are downloaded by the STATcube GUI should always contain valid URIs. However, if you either modify the downloaded json requests or use `sc_table_custom()`, the reason "invalid json body" is plausible. +### Cell Limit Exceeded + +This error occurs if more than 1 million cells are requested via a single +call to `sc_table()` or `sc_table_custom()`. +If you encounter this error, consider splitting up the request into multiple smaller requests. + +```{r, eval = FALSE} +sc_table_custom( + "str:database:debevstand", + "str:measure:debevstand:F-BEVSTAND:F-ISIS-1", + c("str:field:debevstand:F-BEVSTAND:C-A10-0", + "str:valueset:debevstand:F-BEVSTAND:C-GNU-2:C-GNU-2", + "str:valueset:debevstand:F-BEVSTAND:C-BESC51-0:C-BESC51-0", + "str:valueset:debevstand:F-BEVSTAND:C-BESC11-0:C-BESC11-0") +) +``` + +```{r, echo = FALSE, error = TRUE} +readRDS("sc_last_error/cell_limit.rds") %>% STATcubeR:::sc_check_response() +``` + ## Custom Error Handling If you want to use your own error-handling instead of the default `r STATcubeR` error handlers, you can get started with the following code sample from one of our `{shiny}` applications. From 876450d5d9c3f36c6d6594a139346ce7bb8ceb84 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 30 Sep 2022 11:09:25 +0200 Subject: [PATCH 12/84] typo: sc_table_ciustom() -> sc_table_custom() [ci skip] --- vignettes/sc_table_custom.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/sc_table_custom.Rmd b/vignettes/sc_table_custom.Rmd index 18116d2b..ace1937e 100644 --- a/vignettes/sc_table_custom.Rmd +++ b/vignettes/sc_table_custom.Rmd @@ -119,12 +119,12 @@ It is possible to use a mixture of valuesets and fields in the `dimensions` para ## Using Counts Instead of Measures and Valuesets, it is also possible to provide counts -in the measure parameter of `sc_table_ciustom()` +in the measure parameter of `sc_table_custom()`. ```{r} schema_pop <- sc_schema_db("debevstand") (count <- schema_pop$`Datensätze/Records`$`F-BEVSTAND`) -sc_table_custom("str:database:debevstand", count$id) +sc_table_custom(schema_pop$id, count$id) ``` ## Filtering Data From c530abfcf64360c55f3c5680d21e50a7c81f9733 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 30 Sep 2022 18:24:50 +0200 Subject: [PATCH 13/84] add gallery of german example datasets the sc_table article now showcases the print methods for all the example datasets in german [skip ci] --- vignettes/sc_table.Rmd | 47 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/vignettes/sc_table.Rmd b/vignettes/sc_table.Rmd index 7e4d9720..e8bde25e 100644 --- a/vignettes/sc_table.Rmd +++ b/vignettes/sc_table.Rmd @@ -131,6 +131,53 @@ sc_example("agriculture_prices.json") %>% sc_table() sc_example("economic_trend_monitor.json") %>% sc_table() ``` +## Choosing the Language {.tabsert .tabset-pills} + +The language which is used for labelling can be changed via the `language` +parameter of `sc_table()`. + +### Accomodation + +```{r} +sc_example("accomodation.json") %>% sc_table("de") +``` + +### STATatlas + +```{r} +sc_example("economic_atlas.json") %>% sc_table("de") +``` + +### Trade + +```{r} +sc_example("foreign_trade.json") %>% sc_table("de") +``` + +### GDP + +```{r} +sc_example("gross_regional_product.json") %>% sc_table("de") +``` + +### Working Hours + +```{r} +sc_example("labor_force_survey.json") %>% sc_table("de") +``` + +### Agriculture + +```{r} +sc_example("agriculture_prices.json") %>% sc_table("de") +``` + +### monitor.statistik.at + +```{r} +sc_example("economic_trend_monitor.json") %>% sc_table("de") +``` + ## Further reading * Functionalities of the returned object are explained in the `r ticle("sc_data")`. From 5a08065d8bd9475020e8233f7947f258991b1da6 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 30 Sep 2022 18:28:16 +0200 Subject: [PATCH 14/84] add helper function for user agent [skip ci] --- R/utils.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/R/utils.R b/R/utils.R index 42e23a79..1fc25b10 100644 --- a/R/utils.R +++ b/R/utils.R @@ -14,15 +14,19 @@ sc_parse_time <- function(timestamp) { (as.numeric(timestamp) / 1000) %>% as.POSIXct(origin = "1970-01-01") } +sc_user_agent <- function(){ + paste0("STATcubeR/", sc_version(FALSE), + " (http://github.com/statistikat/STATcubeR)", + " httr/", utils::packageVersion("httr"), + " R/", R.version$major, ".", R.version$minor) +} + sc_headers <- function(language = NULL, key = NULL, server = "ext", ...) { if (is.null(key)) key <- sc_key(server) httr::add_headers( APIKey = key, `Accept-Language` = sc_language(language), ..., - `User-Agent` = paste0("STATcubeR/", sc_version(FALSE), - " (http://github.com/statistikat/STATcubeR)", - " httr/", utils::packageVersion("httr"), - " R/", R.version$major, ".", R.version$minor)) + `User-Agent` = sc_user_agent()) } sc_language <- function(language = NULL, options = c("en", "de")) { From 3eee18f65ede57fafc00a39443a883b57836785c Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 21 Nov 2022 16:15:49 +0100 Subject: [PATCH 15/84] OGD: import de_desc and en_desc add those entries to the metadata. NOTE: columns 5 and 7 are not used in data.csv according the OGD standard but some internale datasets provide these columns and therefore they are imported as the description of the measure/classification --- R/od_resource.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/od_resource.R b/R/od_resource.R index 71dd364f..255bee1e 100644 --- a/R/od_resource.R +++ b/R/od_resource.R @@ -171,9 +171,9 @@ od_resources_check <- function(json) { od_normalize_columns <- function(x, suffix) { if (!is.null(suffix)) { - col_indices <- c(1, 2, 2, switch(suffix, HEADER = 3, c(4, 3))) + col_indices <- c(1, 2, 2, switch(suffix, HEADER = 3, c(4, 3)), 5, 7) col_names <- c("code", "label", "label_de", "label_en", - switch(suffix, HEADER = NULL, "parent")) + switch(suffix, HEADER = NULL, "parent"), "de_desc", "en_desc") x <- x[, col_indices] %>% `names<-`(col_names) x$label <- NA_character_ x$label_en <- as.character(x$label_en) From ff13176a9c431b1b3dc075ab8f74afd790ef4155 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 23 Nov 2022 11:14:31 +0100 Subject: [PATCH 16/84] v0.5.0.1, update NEWS add a patch release since the additional metadata are needed for a deployment NEWS for 0.5.0.1 and 0.5.1 will be merged when 0.5.1 is released --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6dd0e647..00bdbc8e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: STATcubeR Title: R interface for the STATcube REST API and Open Government Data -Version: 0.5.0 +Version: 0.5.0.1 Authors@R: c( person("Gregor", "de Cillia", , "Gregor.deCillia@statistik.gv.at", role = c("aut", "cre")), person("Bernhard", "Meindl", , "Bernhard.Meindl@statistik.gv.at", role = "ctb"), diff --git a/NEWS.md b/NEWS.md index 39ebd897..73217558 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,11 @@ # STATcubeR 0.5.1 * Update print methods with the `{tibble}` package (#32) + +# STATcubeR 0.5.0.1 + * Add global option `STATcubeR.language` to override the default language +* `od_table()`: Add descriptions to `x$header` and `x$field(i)` # STATcubeR 0.5.0 From e2b7c74aedaad4a85880b04a0ae992ad66dcafec Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Thu, 24 Nov 2022 17:07:13 +0100 Subject: [PATCH 17/84] update STATcube links * since json-downloads requir a login, link to the login page * link to the documentation page instead of the manual --- vignettes/sc_table.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/sc_table.Rmd b/vignettes/sc_table.Rmd index e8bde25e..4805429a 100644 --- a/vignettes/sc_table.Rmd +++ b/vignettes/sc_table.Rmd @@ -29,7 +29,7 @@ It is assumed that you already provided your API key as described in the `r ticl Use the graphical user interface of STATcube to create a table. Visit [STATcube] and select a database. This will open the table view where you can -create a table. See the [STATcube manual] for details. +create a table. See the [STATcube documentation] for details. ## Download an API request @@ -189,5 +189,5 @@ sc_example("economic_trend_monitor.json") %>% sc_table("de") [`/table` endpoint]: https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/table-endpoint [download options]: https://docs.wingarc.com.au/superstar/9.12/superweb2/user-guide/download-tables -[STATcube]: https://statcube.at/statcube/home -[STATcube manual]: http://www.statistik.at/wcm/idc/idcplg?IdcService=GET_PDF_FILE&dDocName=105692 +[STATcube]: https://www.statistik.at/datenbanken/statcube-statistische-datenbank/login +[STATcube documentation]: https://www.statistik.at/datenbanken/statcube-statistische-datenbank/dokumente-downloads From e93560b9fbe2c31c950608b267ee289113b42f0e Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Thu, 24 Nov 2022 19:47:40 +0100 Subject: [PATCH 18/84] no @internal in sc_table_custom() - remove @keywords internal - add documentation for missing params [skip ci] --- R/table_custom.R | 2 +- man/sc_table_custom.Rd | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/R/table_custom.R b/R/table_custom.R index 2deedfdd..758dffb3 100644 --- a/R/table_custom.R +++ b/R/table_custom.R @@ -12,7 +12,7 @@ #' @param dimensions A character vector of dimensions for the cube. Can be #' either of type `FIELD` or type `VALUESET`. Those entries are referred to #' as `fields` in the parsed API response -#' @keywords internal +#' @inheritParams sc_table #' @examples #' sc_table_custom("str:database:detouextregsai") #' diff --git a/man/sc_table_custom.Rd b/man/sc_table_custom.Rd index 68f4ca29..9123ce48 100644 --- a/man/sc_table_custom.Rd +++ b/man/sc_table_custom.Rd @@ -22,6 +22,17 @@ of type \code{MEASURE}, \code{STAT_FUNCTION} or \code{COUNT}.} \item{dimensions}{A character vector of dimensions for the cube. Can be either of type \code{FIELD} or type \code{VALUESET}. Those entries are referred to as \code{fields} in the parsed API response} + +\item{language}{The language to be used for labeling. \code{"en"} (the default) +will use english. \code{"de"} uses german. +The third option \code{"both"} will import both languages by sending two requests +to the \verb{/table} endpoint.} + +\item{add_totals}{Should totals be added for each classification field in +the json request?} + +\item{key}{(\code{string}) An API key. To display your key, call +\code{\link[=sc_browse_preferences]{sc_browse_preferences()}}.} } \description{ Define requests against the /table endpoint by providing @@ -50,4 +61,3 @@ sc_table_custom( ) ) } -\keyword{internal} From 341254d4faf37ad91e47f9c901a715151940aa58 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 25 Nov 2022 15:33:00 +0100 Subject: [PATCH 19/84] allow recodes in sc_table_custom() first attempt to resolve #33. Recodes can now be defined with an additional parameter. However, type-checking is very minimal. TODO: - better error handling when the request is constructed. This way users get quick and useful error messages - at least for semantic errors such as invalid usage of parameters - with this implementation, users will have to make sure that the parameters "recodes" and "dimensions" are consistent. Maybe simplify the usage - The naming sc_recode is almost conflicting with the class sc_recoder. Possibly rename this function - extend the custom tables article to showcase some usecases for recodes and add a short discussion about usage limits - maybe add sc_filter which only allows filter-type recodes and performs stricter type-checks? --- NAMESPACE | 1 + R/table_custom.R | 89 +++++++++++++++++++++++++++++++++++++++--- man/sc_table_custom.Rd | 62 +++++++++++++++++++++++++---- 3 files changed, 140 insertions(+), 12 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index bcfdc083..d35b2d69 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -57,6 +57,7 @@ export(sc_last_error_parsed) export(sc_rate_limit_schema) export(sc_rate_limit_table) export(sc_rate_limits) +export(sc_recode) export(sc_schema) export(sc_schema_catalogue) export(sc_schema_db) diff --git a/R/table_custom.R b/R/table_custom.R index 758dffb3..6fceb56e 100644 --- a/R/table_custom.R +++ b/R/table_custom.R @@ -6,13 +6,24 @@ #' See the [Custom tables article](https://statistikat.github.io/STATcubeR/articles/sc_table_custom.html) #' for more details. #' -#' @param db The uid of a database +#' @param db The uid of a database. Must be of type `DATASET` #' @param measures A character vector of uids for measures. Each entry must be #' of type `MEASURE`, `STAT_FUNCTION` or `COUNT`. #' @param dimensions A character vector of dimensions for the cube. Can be #' either of type `FIELD` or type `VALUESET`. Those entries are referred to #' as `fields` in the parsed API response +#' @param add_totals Should totals be added for each classification field in +#' the json request? Ignored if `recodes` is used. +#' @param recodes One or more recodes that were generated via [sc_recode()]. +#' If more than one recode is supplied, recodes should be concatinated with +#' [c()]. +#' @param language The language to be used for labeling. "en" +#' (the default) will use english. "de" uses german. #' @inheritParams sc_table +#' @section Schema objects in parameters: +#' it is possible to pass `sc_schema` objects (usually generated by +#' [sc_schema_db()]) instead of ids in [sc_table_custom()] and [sc_recode()]. +#' If provided, the schema objects will be converted into ids via `$id`. #' @examples #' sc_table_custom("str:database:detouextregsai") #' @@ -32,12 +43,80 @@ #' "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93SUM-0" #' ) #' ) +#' +#' schema <- sc_schema_db("detouextregsai") +#' region <- schema$`Other Classifications`$`Tourism commune [ABO]`$ +#' `Regionale Gliederung (Ebene +1)` +#' month <- schema$`Mandatory fields`$`Season/Tourism Month` +#' +#' x <- sc_table_custom( +#' schema, +#' schema$Facts$Arrivals, +#' list(month, region), +#' recodes = c( +#' sc_recode(region, total = FALSE, map = list( +#' region$Achensee, +#' list(region$Arlberg, region$`Ausseerland-Salzkammergut`) +#' )), +#' sc_recode(month, total = FALSE) +#' ) +#' ) +#' x$tabulate() #' @export -sc_table_custom <- function(db, measures = c(), dimensions = c(), language = c("en", "de"), - add_totals = TRUE, key = NULL) { - json_list <- list(database = db, measures = as.list(measures), - dimensions = lapply(dimensions, list)) +sc_table_custom <- function(db, measures = c(), dimensions = c(), + language = c("en", "de"), + add_totals = TRUE, key = NULL, recodes = NULL) { + json_list <- list(database = as_id(db), measures = as.list(as_id(measures, TRUE)), + dimensions = lapply(as_id(dimensions, TRUE), list)) + if (!is.null(recodes)) { + json_list$recodes <- recodes + add_totals <- FALSE + } + #return(jsonlite::toJSON(json_list, auto_unbox = TRUE, pretty = TRUE)) json <- jsonlite::toJSON(json_list, auto_unbox = TRUE, pretty = TRUE) + #return(json) response <- sc_table_json_post(json, language, add_totals, key) sc_table_class$new(response, toString(json)) } + +#' @describeIn sc_table_custom creates a recode object which can be used +#' for the `recode` parameter of [sc_table_custom()] +#' @param field An uid of a classification field to be recoded. The provided +#' uid should also be passed in the `dimensions` parameter of +#' [sc_table_custom()]. +#' @param map A list of ids for values (type `VALUE`) This can also be a nested +#' list if items should be grouped. See examples +#' @param total Add totals to the field? If `map` is provided, the totals +#' will correspond to the filtered data. +#' @export +sc_recode <- function(field, map = NULL, total = FALSE) { + if (is.null(map)) + return(stats::setNames(list(list(total = total)), as_id(field))) + if (inherits(map, "sc_schema")) + map <- list(map) + stats::setNames( + list(list( + map = lapply(map, function(value) { + I(as_id(value, TRUE)) + }), + total = total + )), + as_id(field) + ) +} + +as_id <- function(x, multiple = FALSE) { + if (length(x) == 0) + return(c()) + if (inherits(x, "sc_schema")) + return(x$id) + if (is.character(x) && length(x) == 1) + return(x) + if (!multiple) + stop("invalid id") + if (is.character(x)) + return(x) + if (is.list(x)) + return(sapply(x, as_id)) + stop("invalid ids") +} diff --git a/man/sc_table_custom.Rd b/man/sc_table_custom.Rd index 9123ce48..e1ccc936 100644 --- a/man/sc_table_custom.Rd +++ b/man/sc_table_custom.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/table_custom.R \name{sc_table_custom} \alias{sc_table_custom} +\alias{sc_recode} \title{Create custom tables} \usage{ sc_table_custom( @@ -10,11 +11,14 @@ sc_table_custom( dimensions = c(), language = c("en", "de"), add_totals = TRUE, - key = NULL + key = NULL, + recodes = NULL ) + +sc_recode(field, map = NULL, total = FALSE) } \arguments{ -\item{db}{The uid of a database} +\item{db}{The uid of a database. Must be of type \code{DATASET}} \item{measures}{A character vector of uids for measures. Each entry must be of type \code{MEASURE}, \code{STAT_FUNCTION} or \code{COUNT}.} @@ -23,16 +27,28 @@ of type \code{MEASURE}, \code{STAT_FUNCTION} or \code{COUNT}.} either of type \code{FIELD} or type \code{VALUESET}. Those entries are referred to as \code{fields} in the parsed API response} -\item{language}{The language to be used for labeling. \code{"en"} (the default) -will use english. \code{"de"} uses german. -The third option \code{"both"} will import both languages by sending two requests -to the \verb{/table} endpoint.} +\item{language}{The language to be used for labeling. "en" +(the default) will use english. "de" uses german.} \item{add_totals}{Should totals be added for each classification field in -the json request?} +the json request? Ignored if \code{recodes} is used.} \item{key}{(\code{string}) An API key. To display your key, call \code{\link[=sc_browse_preferences]{sc_browse_preferences()}}.} + +\item{recodes}{One or more recodes that were generated via \code{\link[=sc_recode]{sc_recode()}}. +If more than one recode is supplied, recodes should be concatinated with +\code{\link[=c]{c()}}.} + +\item{field}{An uid of a classification field to be recoded. The provided +uid should also be passed in the \code{dimensions} parameter of +\code{\link[=sc_table_custom]{sc_table_custom()}}.} + +\item{map}{A list of ids for values (type \code{VALUE}) This can also be a nested +list if items should be grouped. See examples} + +\item{total}{Add totals to the field? If \code{map} is provided, the totals +will correspond to the filtered data.} } \description{ Define requests against the /table endpoint by providing @@ -41,6 +57,19 @@ The URIs can be obtained using \code{\link[=sc_schema_db]{sc_schema_db()}}. See the \href{https://statistikat.github.io/STATcubeR/articles/sc_table_custom.html}{Custom tables article} for more details. } +\section{Functions}{ +\itemize{ +\item \code{sc_recode()}: creates a recode object which can be used +for the \code{recode} parameter of \code{\link[=sc_table_custom]{sc_table_custom()}} + +}} +\section{Schema objects in parameters}{ + +it is possible to pass \code{sc_schema} objects (usually generated by +\code{\link[=sc_schema_db]{sc_schema_db()}}) instead of ids in \code{\link[=sc_table_custom]{sc_table_custom()}} and \code{\link[=sc_recode]{sc_recode()}}. +If provided, the schema objects will be converted into ids via \verb{$id}. +} + \examples{ sc_table_custom("str:database:detouextregsai") @@ -60,4 +89,23 @@ sc_table_custom( "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93SUM-0" ) ) + +schema <- sc_schema_db("detouextregsai") +region <- schema$`Other Classifications`$`Tourism commune [ABO]`$ + `Regionale Gliederung (Ebene +1)` +month <- schema$`Mandatory fields`$`Season/Tourism Month` + +x <- sc_table_custom( + schema, + schema$Facts$Arrivals, + list(month, region), + recodes = c( + sc_recode(region, total = FALSE, map = list( + region$Achensee, + list(region$Arlberg, region$`Ausseerland-Salzkammergut`) + )), + sc_recode(month, total = FALSE) + ) +) +x$tabulate() } From 0dac8e8f21fc291a9d79e34195d303e94e41f2a3 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 9 Dec 2022 12:59:04 +0100 Subject: [PATCH 20/84] extend custom tables article with recodes showcase the usage of sc_recode in the web documentation. --- vignettes/sc_table_custom.Rmd | 84 ++++++++++++++++------------------- 1 file changed, 38 insertions(+), 46 deletions(-) diff --git a/vignettes/sc_table_custom.Rmd b/vignettes/sc_table_custom.Rmd index ace1937e..a2ee6bbf 100644 --- a/vignettes/sc_table_custom.Rmd +++ b/vignettes/sc_table_custom.Rmd @@ -129,55 +129,47 @@ sc_table_custom(schema_pop$id, count$id) ## Filtering Data -Omitting certain classification elements from the query is possible with the `recodes` parameter of the `/table` endpoint. Currently, `sc_table_custom()` does not provide support for recodes. -Please issue a [feature request] if you see this as a useful extension of `r STATcubeR`. - -
-Example - -For example, the last call to `sc_table_custom()` will send the following json -to the server. - -```json -{ - "database": "str:database:detouextregsai", - "measures": [ "str:measure:detouextregsai:F-DATA1:F-ANK", - "str:measure:detouextregsai:F-DATA1:F-UEB" ], - "dimensions": [ - [ "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93-2" ] ] -} -``` - -For comparison, this is how the same request was specified in `r STATcubeR` +Data can be filtered on the server side by using the `recodes` parameter +of `sc_table_custom()`. +Here is an example where the accomodation statistics uses a filter for +`Country of origin`. -```{r, eval = FALSE} -sc_table_custom( - "str:database:detouextregsai", - c("str:measure:detouextregsai:F-DATA1:F-ANK", - "str:measure:detouextregsai:F-DATA1:F-UEB"), - "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93-2" +```{r} +x <- sc_table_custom( + db = "str:database:detouextregsai", + measures = measures$id[1:2], + dimensions = list(schema$`Mandatory fields`$`Season/Tourism Month`, + valuesets$`Country of origin`), + recodes = c(sc_recode( + valuesets$`Country of origin`, + list(valuesets$`Country of origin`$`Italy <29>`, + valuesets$`Country of origin`$`Germany <12>`) + )) ) +x$tabulate() ``` -It is now possible to add recodes in order to only show results for Vienna. -For this, the regional classification (`C-C93`) is limited to the -code for vienna, which is `C-C93-2:01`. - -```json -{ - "database": "str:database:detouextregsai", - "measures": [ "str:measure:detouextregsai:F-DATA1:F-ANK", - "str:measure:detouextregsai:F-DATA1:F-UEB" ], - "dimensions": [ - [ "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93-2" ] ], - "recodes": [ - "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93-2": { - "map": [[ "str:value:detouextregsai:F-DATA1:C-C93-2:C-C93-2:01" ]] - } - ] -} -``` +Other options from the [recodes specification] are also available via `sc_recode()`. +It is possible to group items and specify recodes for several classifications. -
+```{r} +origin <- valuesets$`Country of origin` +month <- schema$`Mandatory fields`$`Season/Tourism Month`$`Season/Tourism Month` +x <- sc_table_custom( + db = "str:database:detouextregsai", + measures = measures$id[1:2], + dimensions = list(month, origin), + recodes = c( + sc_recode(origin, list( + list(origin$`Germany <12>`, origin$`Netherlands <25>`), + list(origin$`Italy <29>`, origin$`France (incl.Monaco) <14>`) + )), + sc_recode(month, list( + month$Nov.99, month$Feb.00, month$Apr.09, month$`Jan. 22` + )) + ) +) +x$tabulate() +``` -[feature request]: https://github.com/statistikat/STATcubeR/issues +[recodes specification]: https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/table-endpoint From 8d59925033ea77efd78bacce7d190f5057224fbd Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 9 Dec 2022 14:15:27 +0100 Subject: [PATCH 21/84] add typechecks to sc_table_custom() there are now several checks in place that throw warnings if inputs in sc_table_custom() or sc_recode() are of the wrong schema-type or if other inconsistencies are suspected. See the section called "error handling" in ?sc_table_custom for more details some of those warnings might be replaced with errors in the future part of #33 --- R/table_custom.R | 44 +++++++++++++++++++++++++++++++++++++----- man/sc_table_custom.Rd | 20 +++++++++++++++++++ 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/R/table_custom.R b/R/table_custom.R index 6fceb56e..2d8cc704 100644 --- a/R/table_custom.R +++ b/R/table_custom.R @@ -24,6 +24,21 @@ #' it is possible to pass `sc_schema` objects (usually generated by #' [sc_schema_db()]) instead of ids in [sc_table_custom()] and [sc_recode()]. #' If provided, the schema objects will be converted into ids via `$id`. +#' @section Error handling: +#' Unfortunately, the API gives fairly vague error messages in case a +#' custom table request is ill defined. For this reason, [sc_table_custom()] +#' applies some simple heuristics and throws warnings if inconsistencies +#' in the provided parameters are recognized. The following conditions are +#' currently checked +#' * the parameter `db` is of type `DATABASE` +#' * all entries in `measures` are of type `MEASURE`, `COUNT` or +#' `STATFN` +#' * all entries in `dimensions` are of type `VALUESET` or `FIELD` +#' * all entries in `field` are of type `VALUESET` or `FIELD` +#' * all entries in `map` are of type `VALUE` +#' * all fields in `recodes` are also present in `dimensions` +#' * the first two arguments of `sc_recode()` are consistent, i.e. +#' if the provided `VALUE`s belong to the `VALUESET/FIELD` #' @examples #' sc_table_custom("str:database:detouextregsai") #' @@ -66,15 +81,25 @@ sc_table_custom <- function(db, measures = c(), dimensions = c(), language = c("en", "de"), add_totals = TRUE, key = NULL, recodes = NULL) { - json_list <- list(database = as_id(db), measures = as.list(as_id(measures, TRUE)), - dimensions = lapply(as_id(dimensions, TRUE), list)) + db <- as_id(db) + measures <- as_id(measures, TRUE) + dimensions <- as_id(dimensions, TRUE) + json_list <- list(database = db, measures = as.list(measures), + dimensions = lapply(dimensions, list)) if (!is.null(recodes)) { json_list$recodes <- recodes add_totals <- FALSE } - #return(jsonlite::toJSON(json_list, auto_unbox = TRUE, pretty = TRUE)) json <- jsonlite::toJSON(json_list, auto_unbox = TRUE, pretty = TRUE) - #return(json) + if (!all(names(recodes) %in% dimensions)) + warning("`recodes` and `dimensions` might be inconsistent") + if (!all(grepl("^str:valueset", dimensions) | grepl("^str:field", dimensions))) + warning("parameter `dimensions` is not of type `FIELD` or `VALUESET`") + if (!all(grepl("^str:measure", measures) | grepl("^str:statfn", measures) | + grepl("^str:count", measures))) + warning("parameter `measures` is not of type `MEASURE`, `STATFN` or `COUNT`") + if (!grepl("^str:database", db)) + warning("parameter `db` is not of type `DATABASE`") response <- sc_table_json_post(json, language, add_totals, key) sc_table_class$new(response, toString(json)) } @@ -94,7 +119,7 @@ sc_recode <- function(field, map = NULL, total = FALSE) { return(stats::setNames(list(list(total = total)), as_id(field))) if (inherits(map, "sc_schema")) map <- list(map) - stats::setNames( + recode <- stats::setNames( list(list( map = lapply(map, function(value) { I(as_id(value, TRUE)) @@ -103,6 +128,15 @@ sc_recode <- function(field, map = NULL, total = FALSE) { )), as_id(field) ) + code_parent <- gsub("^.*:", "", names(recode)) + codes_children <- unlist(recode[[1]]$map) + if (!all(grepl(code_parent, codes_children))) + warning("parameters `field` and `map` might be inconsistent") + if (!all(grepl("^str:value:", codes_children))) + warning("some entries in `map` are not of type VALUE") + if (!grepl("^str:valueset", names(recode)) && !grepl("^str:field", names(recode))) + warning("parameter `field` is not of type `FIELD` or `VALUESET`") + recode } as_id <- function(x, multiple = FALSE) { diff --git a/man/sc_table_custom.Rd b/man/sc_table_custom.Rd index e1ccc936..940e2745 100644 --- a/man/sc_table_custom.Rd +++ b/man/sc_table_custom.Rd @@ -70,6 +70,26 @@ it is possible to pass \code{sc_schema} objects (usually generated by If provided, the schema objects will be converted into ids via \verb{$id}. } +\section{Error handling}{ + +Unfortunately, the API gives fairly vague error messages in case a +custom table request is ill defined. For this reason, \code{\link[=sc_table_custom]{sc_table_custom()}} +applies some simple heuristics and throws warnings if inconsistencies +in the provided parameters are recognized. The following conditions are +currently checked +\itemize{ +\item the parameter \code{db} is of type \code{DATABASE} +\item all entries in \code{measures} are of type \code{MEASURE}, \code{COUNT} or +\code{STATFN} +\item all entries in \code{dimensions} are of type \code{VALUESET} or \code{FIELD} +\item all entries in \code{field} are of type \code{VALUESET} or \code{FIELD} +\item all entries in \code{map} are of type \code{VALUE} +\item all fields in \code{recodes} are also present in \code{dimensions} +\item the first two arguments of \code{sc_recode()} are consistent, i.e. +if the provided \code{VALUE}s belong to the \code{VALUESET/FIELD} +} +} + \examples{ sc_table_custom("str:database:detouextregsai") From f157a407042197847478efeb9206994603b7e280 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 9 Dec 2022 14:42:33 +0100 Subject: [PATCH 22/84] require pillar 1.5.0 add a minimum requirement to pillar for the version from 2021-02-22 to make sure the S3 generics format_tbl_footer() is available --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 00bdbc8e..8972bca5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,7 @@ Imports: httr, jsonlite, magrittr, - pillar, + pillar (>= 1.5.0), vctrs Suggests: data.tree, From a0fbe4c421c76a3c31a61e980e9bee6f9189fac8 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 9 Dec 2022 14:54:15 +0100 Subject: [PATCH 23/84] import tibble generics via @import don't use the .onLoad hook with base::registerS3method but use the import via NAMESPACE (roxygen) instead [skip ci] --- NAMESPACE | 5 +++++ R/print.R | 6 ++++++ R/zzz.R | 8 -------- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d35b2d69..90cf2a03 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,9 @@ S3method(print,sc_schema) S3method(print,sc_table) S3method(print,sc_tibble_meta) S3method(print,sc_url) +S3method(tbl_format_footer,sc_meta) +S3method(tbl_sum,sc_meta) +S3method(tbl_sum,sc_tibble) export("%>%") export(od_cache_clear) export(od_cache_dir) @@ -71,3 +74,5 @@ importFrom(magrittr,"%<>%") importFrom(magrittr,"%>%") importFrom(magrittr,"%T>%") importFrom(pillar,pillar_shaft) +importFrom(pillar,tbl_format_footer) +importFrom(pillar,tbl_sum) diff --git a/R/print.R b/R/print.R index 03c7c3d9..8098103b 100644 --- a/R/print.R +++ b/R/print.R @@ -8,6 +8,8 @@ sc_tibble_meta <- function(x, names_keep = c()) { x } +#' @importFrom pillar tbl_sum +#' @export tbl_sum.sc_meta <- function(x, ...) { paste0("STATcubeR metadata: ", format(nrow(x), big.mark = ","), " x ", ncol(x) + length(attr(x, "names_skip"))) @@ -15,6 +17,8 @@ tbl_sum.sc_meta <- function(x, ...) { style_subtle <- cli::make_ansi_style('#999999') +#' @importFrom pillar tbl_format_footer +#' @export tbl_format_footer.sc_meta <- function(x, setup, ...) { names_skip <- attr(x, "names_skip") c(NextMethod(), if (length(names_skip)) style_subtle( @@ -39,6 +43,8 @@ sc_tibble <- function(x) { x } +#' @importFrom pillar tbl_sum +#' @export tbl_sum.sc_tibble <- function(x, ...) { paste0("A STATcubeR tibble: ", format(nrow(x), big.mark = ","), " x ", ncol(x)) } diff --git a/R/zzz.R b/R/zzz.R index fe5a0499..80d5e635 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,12 +1,4 @@ .onLoad <- function(...) { - if (requireNamespace("pillar", quietly = TRUE)) { - register_s3 <- function(method, class, fun, pkg = "pillar") - registerS3method(method, class, fun, asNamespace(pkg)) - register_s3("tbl_format_footer", "sc_meta", tbl_format_footer.sc_meta) - register_s3("tbl_sum", "sc_meta", tbl_sum.sc_meta) - register_s3("tbl_sum", "sc_tibble", tbl_sum.sc_tibble) - } - if (in_pkgdown()) cli_theme_pkgdown() } From dc009c983520598ff480b7a93f27b415e549de74 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 9 Dec 2022 15:24:59 +0100 Subject: [PATCH 24/84] prep NEWS and README for upcoming release [skip ci] --- NEWS.md | 6 ++---- README.md | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 73217558..96be491e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,7 @@ -# STATcubeR 0.5.1 +# STATcubeR 0.6.0 * Update print methods with the `{tibble}` package (#32) - -# STATcubeR 0.5.0.1 - +* Add filters and other recodes to `sc_table_custom()` (#33) * Add global option `STATcubeR.language` to override the default language * `od_table()`: Add descriptions to `x$header` and `x$field(i)` diff --git a/README.md b/README.md index e884bd04..0c95e8c4 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ you can also download the package as a tar archive from https://github.com/stati The package can then be installed by providing a path to the downloaded archive file. ```r -install.packages('STATcubeR-0.4.3.tar.gz', repos = NULL) +install.packages('STATcubeR-0.6.0.tar.gz', repos = NULL) ``` From 6b63a60f33cd0d5e42056243ae0d2c8861ce18f0 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 16 Dec 2022 09:41:34 +0100 Subject: [PATCH 25/84] allow json strings in sc_table() reimplements #36 with a slightly different approach in regards to naming --- NEWS.md | 1 + R/table.R | 36 +++++++++++++++++++++++++++--------- man/sc_table.Rd | 11 +++++++---- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/NEWS.md b/NEWS.md index 96be491e..2b5ab577 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ * Add filters and other recodes to `sc_table_custom()` (#33) * Add global option `STATcubeR.language` to override the default language * `od_table()`: Add descriptions to `x$header` and `x$field(i)` +* Allow json strings in `sc_table()` (@matmo, #36) # STATcubeR 0.5.0 diff --git a/R/table.R b/R/table.R index 809977af..3f51a54c 100644 --- a/R/table.R +++ b/R/table.R @@ -92,8 +92,10 @@ sc_table_class <- R6::R6Class( #' provided by the API. #' @param annotations Include separate annotation columns in the returned #' table. This parameter is currently broken and needs to be re-implemented - tabulate = function(..., round = TRUE, annotations = FALSE) { - sc_table_tabulate(self, ..., round = round, annotations = annotations) + #' @param recode_zeros interpret zero values as missings? + tabulate = function(..., round = FALSE, annotations = FALSE, recode_zeros = FALSE) { + sc_table_tabulate(self, ..., round = round, annotations = annotations, + recode_zeros = recode_zeros) }, #' @description open the dataset in a browser browse = function() { @@ -168,8 +170,9 @@ sc_table_class <- R6::R6Class( #' * [sc_table_saved()] uses a table uri of a saved table. #' #' Those three functions all return an object of class `"sc_table"`. -#' @param json_file path to a json file, which was downloaded via the STATcube -#' GUI ("Open Data API Abfrage") +#' @param json Path to a json file, which was downloaded via the STATcube +#' GUI ("Open Data API Request"). Alternatively, a json string which +#' passes [jsonlite::validate()]. #' @param add_totals Should totals be added for each classification field in #' the json request? #' @return An object of class `sc_table` which contains the return @@ -181,9 +184,10 @@ sc_table_class <- R6::R6Class( #' will use english. `"de"` uses german. #' The third option `"both"` will import both languages by sending two requests #' to the `/table` endpoint. +#' @param json_file Depricated. Use `json` instead #' @family functions for /table #' @examplesIf sc_key_exists() -#' my_table <- sc_table(json_file = sc_example("population_timeseries.json")) +#' my_table <- sc_table(json = sc_example("population_timeseries.json")) #' #' # print #' my_table @@ -206,14 +210,15 @@ sc_table_class <- R6::R6Class( #' my_response <- sc_table_saved(table_uri) #' as.data.frame(my_response) #' @export -sc_table <- function(json_file, language = NULL, add_totals = TRUE, - key = NULL) { +sc_table <- function(json, language = NULL, add_totals = TRUE, key = NULL, + json_file = NA) { + json <- normalize_json(json, json_file) language <- sc_language(language, c("en", "de", "both")) both <- language == "both" if (both) language <- "de" - res <- sc_table_json_post(readLines(json_file, warn = FALSE), language, add_totals, key) %>% - sc_table_class$new(file = json_file, add_totals = add_totals) + res <- sc_table_json_post(json$string, language, add_totals, key) %>% + sc_table_class$new(json$string, json$file, add_totals) if (both) res$add_language("en", key) res @@ -239,6 +244,19 @@ print.sc_table <- function(x, ...) { cat(format(x, ...), sep = "\n") } +normalize_json <- function(json, json_file) { + if (!is.na(json_file)) { + json <- json_file + warning("parameter `json_file` was renamed to `json`") + } + file <- NULL + if (length(json) == 1 && !jsonlite::validate(json)) { + file <- json + json <- readLines(file) + } + list(file = file, string = json) +} + format.sc_table <- function(x, ...) { c( cli::style_bold(strwrap(x$meta$source$label)), diff --git a/man/sc_table.Rd b/man/sc_table.Rd index dd937647..2c0f9b8b 100644 --- a/man/sc_table.Rd +++ b/man/sc_table.Rd @@ -8,7 +8,7 @@ \alias{sc_table_saved} \title{Create a request against the /table endpoint} \usage{ -sc_table(json_file, language = NULL, add_totals = TRUE, key = NULL) +sc_table(json, language = NULL, add_totals = TRUE, key = NULL, json_file = NA) sc_examples_list() @@ -19,8 +19,9 @@ sc_table_saved_list(key = NULL, server = "ext") sc_table_saved(table_uri, language = NULL, key = NULL, server = "ext") } \arguments{ -\item{json_file}{path to a json file, which was downloaded via the STATcube -GUI ("Open Data API Abfrage")} +\item{json}{path to a json file, which was downloaded via the STATcube +GUI ("Open Data API Request"). Alternatively, a json string which +passes \code{\link[jsonlite:validate]{jsonlite::validate()}}.} \item{language}{The language to be used for labeling. \code{"en"} (the default) will use english. \code{"de"} uses german. @@ -33,6 +34,8 @@ the json request?} \item{key}{(\code{string}) An API key. To display your key, call \code{\link[=sc_browse_preferences]{sc_browse_preferences()}}.} +\item{json_file}{Depricated. Use \code{json} instead} + \item{filename}{The name of an example json file.} \item{server}{A STATcube API server. Defaults to the external Server via @@ -61,7 +64,7 @@ Those three functions all return an object of class \code{"sc_table"}. } \examples{ \dontshow{if (sc_key_exists()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -my_table <- sc_table(json_file = sc_example("population_timeseries.json")) +my_table <- sc_table(json = sc_example("population_timeseries.json")) # print my_table From d7e0833361b2d122537b55e95d611c2bad73510e Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 16 Dec 2022 11:19:34 +0100 Subject: [PATCH 26/84] improve print method for OGD resouces links to cache files are now clickable and last_modified and cached can will be abbreviated if there is not enough horizontal space --- NAMESPACE | 3 +++ R/od_resource.R | 3 +++ R/od_table.R | 6 +++++- R/print.R | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 90cf2a03..9aea99e8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,10 @@ S3method(as.character,od_json) S3method(as.character,sc_json) S3method(as.character,sc_schema_uri) S3method(as.data.frame,sc_data) +S3method(format,pillar_shaft_ogd_file) S3method(format,sc_schema_uri) +S3method(pillar_shaft,ogd_file) +S3method(pillar_shaft,sc_dttm) S3method(pillar_shaft,sc_schema_type) S3method(pillar_shaft,sc_schema_uri) S3method(print,od_cache_file) diff --git a/R/od_resource.R b/R/od_resource.R index 255bee1e..ccbc2223 100644 --- a/R/od_resource.R +++ b/R/od_resource.R @@ -224,5 +224,8 @@ od_resource_all <- function(id, json = od_json(id), server = "ext") { check_header(out$data[[2]]) out$data[[2]] %<>% od_normalize_columns("HEADER") out$data[seq(3, nrow(out))] %<>% lapply(od_normalize_columns, "FIELD") + class(out$name) <- c("ogd_file", "character") + class(out$last_modified) <- c("sc_dttm", class(out$last_modified)) + class(out$cached) <- c("sc_dttm", class(out$cached)) out %>% `class<-`(c("tbl", "data.frame")) } diff --git a/R/od_table.R b/R/od_table.R index ee0c0cb4..39059dbd 100644 --- a/R/od_table.R +++ b/R/od_table.R @@ -108,7 +108,11 @@ od_table_class <- R6::R6Class( #' @field resources #' lists all files downloaded from the server to contruct this table resources = function() { - private$cache$resources %>% `class<-`(c("tbl", "data.frame")) + resources <- private$cache$resources + class(resources$name) <- c("ogd_file", "character") + class(resources$last_modified) <- c("sc_dttm", class(resources$last_modified)) + class(resources$cached) <- c("sc_dttm", class(resources$cached)) + resources %>% `class<-`(c("tbl", "data.frame")) }, #' @field od_server #' The server used for initialization (see to `?od_table`) diff --git a/R/print.R b/R/print.R index 8098103b..409966fb 100644 --- a/R/print.R +++ b/R/print.R @@ -48,3 +48,53 @@ sc_tibble <- function(x) { tbl_sum.sc_tibble <- function(x, ...) { paste0("A STATcubeR tibble: ", format(nrow(x), big.mark = ","), " x ", ncol(x)) } + +#' @importFrom pillar pillar_shaft +#' @export +pillar_shaft.sc_dttm <- function(x, ...) { + ymd <- format(x, "%Y-%m-%d") + hms <- cli::col_silver(format(x, "%H:%M:%S")) + short <- ymd + ind <- as.numeric(Sys.time()) - as.numeric(x) < 60*24 + short[ind] <- hms[ind] + pillar::new_pillar_shaft_simple( + paste(ymd, hms), + width = 19, + min_width = 10, + short_formatted = short, + type_sum = "dttm" + ) +} + +#' @importFrom pillar pillar_shaft +#' @export +pillar_shaft.ogd_file <- function(x, ...) { + pillar::new_pillar_shaft( + list(x = x), + width = pillar::get_max_extent(x), + min_width = 20, + class = "pillar_shaft_ogd_file", + type_sum = "chr" + ) +} + +#' @export +format.pillar_shaft_ogd_file <- function(x, width, ...) { + files <- x$x + if (in_pkgdown()) { + id <- substr(files[1], 1, nchar(files[1]) - 5) + files[1:2] <- c("meta.json", "data.csv") + files <- gsub(paste0(id, "_"), "", files, fixed = TRUE) + } + too_long <- nchar(files) > width + files[too_long] <- paste0(substring(files[too_long], 1, width - 2), + cli::symbol$ellipsis) + if (in_pkgdown()) { + files <- cli::style_hyperlink( + files, paste0("https://data.statistik.gv.at/data/", x$x)) + } else { + files <- cli::style_hyperlink(files, paste0("file://", path.expand( + od_cache_dir()), x$x)) %>% as.character() + } + pillar::new_ornament(files, align = "left") +} From 38db4057d673ae0e48b808ec097aace80e131e43 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 16 Dec 2022 11:35:59 +0100 Subject: [PATCH 27/84] cistomize print() for sc_schema_flatten() the resouce uris are now displayed similar to sc_schema() --- NAMESPACE | 1 + R/schema.R | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 9aea99e8..24eba8d7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ S3method(print,od_revisions) S3method(print,od_table) S3method(print,sc_rate_limit_table) S3method(print,sc_schema) +S3method(print,sc_schema_flatten) S3method(print,sc_table) S3method(print,sc_tibble_meta) S3method(print,sc_url) diff --git a/R/schema.R b/R/schema.R index d4f48660..e861b886 100644 --- a/R/schema.R +++ b/R/schema.R @@ -132,11 +132,20 @@ sc_schema_flatten <- function(x, type) { stopifnot(!is.null(response)) response <- httr::content(response) flattened <- sc_schema_flatten_impl(response, type) - flattened <- as.data.frame(flattened, stringsAsFactors = FALSE) - class(flattened) <- c("tbl", "data.frame") + flattened <- vctrs::new_data_frame(flattened, + class = c("sc_schema_flatten", "tbl", "tbl_df")) flattened } +#' @export +print.sc_schema_flatten <- function(x, ...) { + y <- x + y$id <- new_schema_uri(x$id, x$id) + class(y) <- setdiff(class(x), "sc_schema_flatten") + print(y, ...) + invisible(x) +} + sc_schema_flatten_impl <- function(resp, type) { if (resp$type == type) return(list(id = resp$id, label = resp$label)) From 5aa847af7f4d3501c8490d7230f6e96de385f611 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 16 Dec 2022 11:36:50 +0100 Subject: [PATCH 28/84] devtools::document() re-sync the roxygen-generated files --- man/sc_table.Rd | 2 +- man/sc_table_class.Rd | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/man/sc_table.Rd b/man/sc_table.Rd index 2c0f9b8b..baff1283 100644 --- a/man/sc_table.Rd +++ b/man/sc_table.Rd @@ -19,7 +19,7 @@ sc_table_saved_list(key = NULL, server = "ext") sc_table_saved(table_uri, language = NULL, key = NULL, server = "ext") } \arguments{ -\item{json}{path to a json file, which was downloaded via the STATcube +\item{json}{Path to a json file, which was downloaded via the STATcube GUI ("Open Data API Request"). Alternatively, a json string which passes \code{\link[jsonlite:validate]{jsonlite::validate()}}.} diff --git a/man/sc_table_class.Rd b/man/sc_table_class.Rd index 415a5d3a..16e44fb9 100644 --- a/man/sc_table_class.Rd +++ b/man/sc_table_class.Rd @@ -97,7 +97,12 @@ added to a timeseries An extension of \code{\link[=sc_tabulate]{sc_tabulate()}} with additional parameters. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{sc_table_class$tabulate(..., round = TRUE, annotations = FALSE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{sc_table_class$tabulate( + ..., + round = FALSE, + annotations = FALSE, + recode_zeros = FALSE +)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -110,6 +115,8 @@ provided by the API.} \item{\code{annotations}}{Include separate annotation columns in the returned table. This parameter is currently broken and needs to be re-implemented} + +\item{\code{recode_zeros}}{interpret zero values as missings?} } \if{html}{\out{}} } From 3fb82be07c26e4b933f531217a66a63d1c6cfca1 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Sat, 17 Dec 2022 17:28:19 +0100 Subject: [PATCH 29/84] sc_table_custom(dry_run) add a new parameter `dry_run` to sc_table_custom() which allows to see what request is generated without actually sending it to the API with this option, all type-checks are still applied --- R/table_custom.R | 10 ++++++++-- man/sc_table_custom.Rd | 7 ++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/R/table_custom.R b/R/table_custom.R index 2d8cc704..b6985434 100644 --- a/R/table_custom.R +++ b/R/table_custom.R @@ -19,6 +19,9 @@ #' [c()]. #' @param language The language to be used for labeling. "en" #' (the default) will use english. "de" uses german. +#' @param dry_run If `TRUE`, no request is sent to the API. Instead, type +#' checks are performed and the json request is returned as a string. +#' Defaults to `FALSE`. #' @inheritParams sc_table #' @section Schema objects in parameters: #' it is possible to pass `sc_schema` objects (usually generated by @@ -80,12 +83,13 @@ #' @export sc_table_custom <- function(db, measures = c(), dimensions = c(), language = c("en", "de"), - add_totals = TRUE, key = NULL, recodes = NULL) { + add_totals = TRUE, key = NULL, recodes = NULL, + dry_run = FALSE) { db <- as_id(db) measures <- as_id(measures, TRUE) dimensions <- as_id(dimensions, TRUE) json_list <- list(database = db, measures = as.list(measures), - dimensions = lapply(dimensions, list)) + dimensions = lapply(dimensions, I)) if (!is.null(recodes)) { json_list$recodes <- recodes add_totals <- FALSE @@ -100,6 +104,8 @@ sc_table_custom <- function(db, measures = c(), dimensions = c(), warning("parameter `measures` is not of type `MEASURE`, `STATFN` or `COUNT`") if (!grepl("^str:database", db)) warning("parameter `db` is not of type `DATABASE`") + if (dry_run) + return(json) response <- sc_table_json_post(json, language, add_totals, key) sc_table_class$new(response, toString(json)) } diff --git a/man/sc_table_custom.Rd b/man/sc_table_custom.Rd index 940e2745..f0dbc4bf 100644 --- a/man/sc_table_custom.Rd +++ b/man/sc_table_custom.Rd @@ -12,7 +12,8 @@ sc_table_custom( language = c("en", "de"), add_totals = TRUE, key = NULL, - recodes = NULL + recodes = NULL, + dry_run = FALSE ) sc_recode(field, map = NULL, total = FALSE) @@ -40,6 +41,10 @@ the json request? Ignored if \code{recodes} is used.} If more than one recode is supplied, recodes should be concatinated with \code{\link[=c]{c()}}.} +\item{dry_run}{If \code{TRUE}, no request is sent to the API. Instead, type +checks are performed and the json request is returned as a string. +Defaults to \code{FALSE}.} + \item{field}{An uid of a classification field to be recoded. The provided uid should also be passed in the \code{dimensions} parameter of \code{\link[=sc_table_custom]{sc_table_custom()}}.} From 7ce363f69372cc427147dd8aac0e19ae83a1529a Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Sat, 17 Dec 2022 17:38:20 +0100 Subject: [PATCH 30/84] update custom tables article the new version now contains a section about typechecks heading levels are restructured to include

and

the intro paragraph correctly lists COUNT as a possible schema type for measures after most of the sc_table_custom() calls, a
tag is added which can be expanded to show the generated json request the recodes section now contains an introduction paragraph that outlines why filtering data on the server side might be desireable add

further reading

and link to the schema article as well as the article about the base class showcase more recoding features (total and "recoding accross hierarchies") --- vignettes/sc_table_custom.Rmd | 258 ++++++++++++++++++++++++++++------ 1 file changed, 213 insertions(+), 45 deletions(-) diff --git a/vignettes/sc_table_custom.Rmd b/vignettes/sc_table_custom.Rmd index a2ee6bbf..e1190946 100644 --- a/vignettes/sc_table_custom.Rmd +++ b/vignettes/sc_table_custom.Rmd @@ -13,21 +13,44 @@ source("R/setup.R")$value ```{r, include = FALSE} if (!sc_key_exists()) knitr::opts_chunk$set(eval = FALSE) +show_json <- function(x) { + paste( + "
", + "Show json request", + "", + "```r", + "x$json", + "```", + "", + x$json$.__enclos_env__$private$json_content %>% + paste("```json\n", . , "\n```") %>% knitr::asis_output(), + "", + "
", + sep = "\n" + ) %>% knitr::asis_output() +} ``` The function `sc_table_custom()` allows you to define requests against the `/table` endpoint programatically. This can be useful to automate the generation of `/table` request rather than relying on the GUI to do so. -The function accepts the three arguments. +The function accepts the four arguments. -* A database id -* ids of measures to be imported (type `MEASURE` or `STAT_FUNCTION`) -* ids of fields to be imported (type `FIELD` or `VALUESET`) +- A **database** id +- ids of **measures** to be imported (type `MEASURE`, `STAT_FUNCTION` or `COUNT`) +- ids of **fields** to be imported (type `FIELD` or `VALUESET`) +- a list of **recodes** that can be used customize fields -## Starting Simple +## Bulding a Custom Table Step by Step {#step-by-step} -First, we want to just send a database id to `sc_table_custom()`. -We will use a [database about accomodation](`r sc_browse_database("detouextregsai")`) througout this article. -This returns a table with one single row. +The first part of this Article will showcase how custom tables can be +created with a [database about tourism](`r sc_browse_database("detouextregsai")`). +This database will also be used in most other examples of this article. + +### Starting Simple {#database} + +First, we want to just send the database id to `sc_table_custom()`. +This will request only the mandirory fields and default measures for that database. +In case of the tourism database, a table with one single row is returned. ```{r} database <- "str:database:detouextregsai" @@ -35,54 +58,77 @@ x <- sc_table_custom(database) x$tabulate() ``` -we see that `r format(x$tabulate()[[2]], big.mark = " ")` nights were spent in austrian tourism establishments in the month of `r x$tabulate()[[1]]`. +```{r, echo=FALSE} +show_json(x) +``` + +We see that `r format(x$tabulate()[[2]], big.mark = " ")` nights were spent in austrian tourism establishments in the month of `r x$tabulate()[[1]]`. -## Adding Countries +### Adding Countries {#field} Now we want to add a classification to the table. This can be done by getting the database schema and showing all classification fields. ```{r} -schema <- sc_schema_db(database) -(fields <- sc_schema_flatten(schema, "FIELD")) +tourism <- sc_schema_db(database) +(fields <- sc_schema_flatten(tourism, "FIELD")) ``` If we want to add "Country of origin" we need to include the fouth entry of the `id` column in our request. ```{r} -x <- sc_table_custom(database, dimensions = fields$id[4]) +x <- sc_table_custom(tourism, dimensions = fields$id[4]) x$tabulate() ``` -## Adding Tourism Communes +```{r, echo=FALSE} +show_json(x) +``` + +Alternatively, we could also pass the schema object for "country of origin". + +```{r} +origin <- tourism$`Other Classifications`$`Country of origin` +x <- sc_table_custom(tourism, dimensions = origin) +``` + +### Adding Tourism Communes {#fields} The `dimensions` parameter in `sc_schema_custom()` accepts vectors of field ids. Therefore, we can add the communes easily. ```{r} -x <- sc_table_custom(database, dimensions = fields$id[c(2, 4)]) +x <- sc_table_custom(tourism, dimensions = fields$id[c(2, 4)]) x$tabulate() ``` -## Add Another Measure +```{r, echo=FALSE} +show_json(x) +``` + +### Add Another Measure {#measure} Currently, the table only returns the default measure for the database which is the number of nights spent. We can add a second measure by again using the database schema and passing a measure id ```{r} -(measures <- sc_schema_flatten(schema, "MEASURE")) +(measures <- sc_schema_flatten(tourism, "MEASURE")) ``` We can add both measures to the request by using `measures$id`. Just like the `dimensions` parameter, the `measures` parameters accepts vectors of resource ids. ```{r} -x <- sc_table_custom(database, measures = measures$id, +x <- sc_table_custom(tourism, measures = measures$id, dimensions = fields$id[c(2, 4)]) x$tabulate() ``` -## Using Valuesets +```{r, echo=FALSE} +show_json(x) +``` + +### Changing the hierarchy level {#hierarchy} We can see in [the GUI](`r sc_browse_database("detouextregsai")`) that "Country of origin" is a hierarchical classification. If we look at the table above, only the top level of the hierarchy (Austria, Germany, other) is used. @@ -95,69 +141,89 @@ knitr::include_graphics("img/hierarchical_classification.png") The different valuesets for "country of origin" can be compared by browsing the database schema. ```{r} -(valuesets <- schema$`Other Classifications`$`Country of origin`) +(valuesets <- tourism$`Other Classifications`$`Country of origin`) ``` We can see that the two levels of the hierarchy are represented by the two valuesets. The valueset "Herkunftsland" uses 3 classification elements and represents the top level of the hierarchy (Austria, Germany, Other). The valueset "Country of origin" uses 87 (10+8+69) classification elements and is the bottom level of the hierarchy. -For classification with more levels of hierarchies, more valuesets will be present. +For classifications with more levels of hierarchies, more valuesets will be present. We will now use the id for the first valueset in the `dimensions` parmaeter of `sc_table_custom`. ```{r} x <- sc_table_custom( - db = "str:database:detouextregsai", - measures = measures$id[1:2], - dimensions = valuesets$`Country of origin`$id + db = tourism, + measures = measures$id, + dimensions = valuesets$`Country of origin` ) x$tabulate() ``` +```{r, echo=FALSE} +show_json(x) +``` + It is possible to use a mixture of valuesets and fields in the `dimensions` parameter. -## Using Counts +## Using Counts {#counts} Instead of Measures and Valuesets, it is also possible to provide counts -in the measure parameter of `sc_table_custom()`. +in the `measure` parameter of `sc_table_custom()`. ```{r} -schema_pop <- sc_schema_db("debevstand") -(count <- schema_pop$`Datensätze/Records`$`F-BEVSTAND`) -sc_table_custom(schema_pop$id, count$id) +population <- sc_schema_db("debevstand") +(count <- population$`Datensätze/Records`$`F-BEVSTAND`) +sc_table_custom(population, count) ``` -## Filtering Data +## Recodes {#recodes} Data can be filtered on the server side by using the `recodes` parameter of `sc_table_custom()`. -Here is an example where the accomodation statistics uses a filter for -`Country of origin`. +This might be more complicated than filtering the data in R but offers some +important advantages. + +- **performance** Traffic between the client and server is reduced which might + lead to consierably faster API responses. +- **cell limits** Apart from rate limits (see `?sc_rate_limits`), STATcube also + limits the amount of cells that can be fetched per user. + Filtering data can be useful to preserve this quota. + +### Filtering Data {#filter} + +As an example for fiitering data, we can request a table from the tourism +database and only select some countries for `Country of origin`. ```{r} +origin <- tourism$`Other Classifications`$`Country of origin`$`Country of origin` +month <- tourism$`Mandatory fields`$`Season/Tourism Month`$`Season/Tourism Month` x <- sc_table_custom( - db = "str:database:detouextregsai", - measures = measures$id[1:2], - dimensions = list(schema$`Mandatory fields`$`Season/Tourism Month`, - valuesets$`Country of origin`), - recodes = c(sc_recode( - valuesets$`Country of origin`, - list(valuesets$`Country of origin`$`Italy <29>`, - valuesets$`Country of origin`$`Germany <12>`) - )) + db = tourism, + measures = measures$id, + dimensions = list(month, origin), + recodes = sc_recode(origin, list(origin$`Italy <29>`, origin$`Germany <12>`)) ) x$tabulate() ``` +```{r, echo=FALSE} +show_json(x) +``` + +This table only contains two countries rather than +`r length(valuesets[["Country of origin"]])-4` so the amount of cells in the +table is also 40 times less compared to a table that would omit this filter. + +### Grouping items {#group} + Other options from the [recodes specification] are also available via `sc_recode()`. It is possible to group items and specify recodes for several classifications. ```{r} -origin <- valuesets$`Country of origin` -month <- schema$`Mandatory fields`$`Season/Tourism Month`$`Season/Tourism Month` x <- sc_table_custom( - db = "str:database:detouextregsai", - measures = measures$id[1:2], + db = tourism, + measures = measures$id, dimensions = list(month, origin), recodes = c( sc_recode(origin, list( @@ -172,4 +238,106 @@ x <- sc_table_custom( x$tabulate() ``` +```{r, echo=FALSE} +show_json(x) +``` + +This table contains data for two country-groups and two months. +In this case, the cell values for Gemany and the Netherlands are just added +to calculate the etries for Arrivals and Nights spent. +However, in other cases STATcubeR might decide it is more appropriate +to use weighted means or other more complicated aggregation methods. + +### Adding Totals {#totals} + +The `total` parameter in `sc_recode()` can be used to request totals for +classifications. As an example, let's look at the tourism acivity in the +capital cities of austria + +```{r} +destination <- tourism$`Other Classifications`$`Tourism commune [ABO]`$ + `Regionale Gliederung (Ebene +1)` +x <- sc_table_custom( + tourism, + measures = measures$id, + dimensions = list(month, destination), + recodes = c( + sc_recode(destination, total = TRUE, list( + destination$Wien, destination$`Stadt Salzburg`, destination$Linz)), + sc_recode(month, total = FALSE, list(month$Nov.99, month$Apr.09)) + ) +) +as.data.frame(x) +``` + +```{r, echo=FALSE} +show_json(x) +``` + +We see that there are two rows in the table where Tourism commune is set +to "Total". The corresponding values represent the sum of all Arrivals +or Nights spent in either of these three cities durng that month. + +### Recoding across hierarchies {#recode-hierarchy} + +To use a recode that includes several hierarchy levels, the corresponding +`FIELD` should be used as the first parameter of `sc_recode()`. +For example, a recode with countries and federal states from the "Country of origin" +classification can be defined as follows. + + +```{r} +origin1 <- tourism$`Other Classifications`$`Country of origin` +origin2 <- origin1$`Country of origin` +origin3 <- origin1$`Herkunftsland (Ebene +1)` +x <- sc_table_custom( + tourism, measures$id, origin1, + recodes = sc_recode(origin1, list( + origin2$`Vienna <01>`, origin3$Germany, + list(origin2$`Bavaria (beg.05/03) <80>`, origin3$`other countries`)) + ) +) +x$tabulate() +``` + +```{r, echo=FALSE} +show_json(x) +``` + +## Typechecks + +Since custom tables can become quite complicated, `sc_table_custom()` performs +type-checks before sending the request to the API. +If inconsistencies are detected, warnings will be generated. +See `?sc_table_custom` for a comprehensive list of the performed checks. + +```{r} +sc_table_custom(tourism, measures = tourism, dry_run = TRUE) +``` + +
+Advanced example + +```{r} +sc_table_custom("A", measures = "B", dimensions = "C", + recodes = sc_recode("D", "E"), dry_run = TRUE) +``` + +
+ +If `dry_run` is set to `FALSE` (the default), STATcubeR will send the request +to the API even if inconsistencies are detected. +This will likely lead to an error of the form "expected json but got html". + +If you get spurious warnings or have suggestions on how these typechecks might +be improved, please issue a feature request to the STATcubeR bugtracker. + +## Further Reading + +* If you've come this far, you are probably already familiar with `sc_schema()`. + But in case you are not, the `r ticle("sc_schema")` contains more information + on how to get metadata from the API. +* The `r ticle("sc_data")` showcases different ways to extract data and metadata + from the return value of `sc_table_custom()`. + [recodes specification]: https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/table-endpoint From 4da0e915913d5a12f0a17d00a8e1d53868c081f8 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Sat, 17 Dec 2022 17:39:31 +0100 Subject: [PATCH 31/84] fix usuage example the parameter `json_file` was recently renamed to `json`. Reflect this in the sc_table article --- vignettes/sc_table.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vignettes/sc_table.Rmd b/vignettes/sc_table.Rmd index 4805429a..e8f4955b 100644 --- a/vignettes/sc_table.Rmd +++ b/vignettes/sc_table.Rmd @@ -48,7 +48,8 @@ This means that the current user is not permitted to use the API. Provide the path to the downloaded json file as a string in `sc_table()`. ``` r -my_table <- sc_table(json_file = "path/to/api_request.json") +library(STATcubeR) +my_table <- sc_table(json = "path/to/api_request.json") ``` This will send the json-request to the [`/table` endpoint] of the API and return an object of class `sc_table`. From 8568b1e498140b0e50f3e0f2030416809a73107a Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Sat, 17 Dec 2022 17:41:26 +0100 Subject: [PATCH 32/84] add "further reading" too sc_schema link to the custom tables article and the saved tables article which are both closely related to the schema endpoint --- vignettes/sc_schema.Rmd | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vignettes/sc_schema.Rmd b/vignettes/sc_schema.Rmd index 01db5ed6..4f5b4cda 100644 --- a/vignettes/sc_schema.Rmd +++ b/vignettes/sc_schema.Rmd @@ -230,3 +230,11 @@ The following example shows all available measures from the [economic trend moni sc_schema_db("dekonjunkturmonitor") %>% sc_schema_flatten("MEASURE") ``` + +## Further Reading + +* Schemas can be used to construct table requests as described in + the `r ticle("sc_table_custom")` +* See the `r ticle("sc_table_saved")` to get access to the data for table + nodes in the schema. + . From 079159d507267b21264cc4f6c682c8476d0e926b Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Sun, 18 Dec 2022 14:02:39 +0100 Subject: [PATCH 33/84] + crossreferences last_error, table_custom link between those two articles in regards to the cell_count error --- vignettes/sc_last_error.Rmd | 5 +++-- vignettes/sc_table_custom.Rmd | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/vignettes/sc_last_error.Rmd b/vignettes/sc_last_error.Rmd index 4cdf35de..f0811135 100644 --- a/vignettes/sc_last_error.Rmd +++ b/vignettes/sc_last_error.Rmd @@ -170,11 +170,12 @@ If you encounter this error during the workflow described in the `r ticle("sc_ta This is because json request that are downloaded by the STATcube GUI should always contain valid URIs. However, if you either modify the downloaded json requests or use `sc_table_custom()`, the reason "invalid json body" is plausible. -### Cell Limit Exceeded +### Cell Limit Exceeded {#CELL_COUNT} This error occurs if more than 1 million cells are requested via a single call to `sc_table()` or `sc_table_custom()`. -If you encounter this error, consider splitting up the request into multiple smaller requests. +If you encounter this error, consider splitting up the request into multiple smaller requests or defining a filter in the gui or via a +[custom table filter](sc_table_custom.html#filter). ```{r, eval = FALSE} sc_table_custom( diff --git a/vignettes/sc_table_custom.Rmd b/vignettes/sc_table_custom.Rmd index e1190946..d00b3621 100644 --- a/vignettes/sc_table_custom.Rmd +++ b/vignettes/sc_table_custom.Rmd @@ -186,9 +186,12 @@ important advantages. - **performance** Traffic between the client and server is reduced which might lead to consierably faster API responses. -- **cell limits** Apart from rate limits (see `?sc_rate_limits`), STATcube also +- **cell limits (user)** Apart from rate limits (see `?sc_rate_limits`), + STATcube also limits the amount of cells that can be fetched per user. Filtering data can be useful to preserve this quota. +- **cell limits (equest)** If a single request would contain more than 1 million + cells, a [cell count error](sc_last_error.html#CELL_COUNT) is thrown. ### Filtering Data {#filter} From 694bc10377e714ecf91ce5dabd218494ecf140b4 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Sun, 18 Dec 2022 14:06:06 +0100 Subject: [PATCH 34/84] sc_table_custom.Rmd: show json for all requests for consistency, also add show_json for the example request that uses COUNT-URIs --- vignettes/sc_table_custom.Rmd | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vignettes/sc_table_custom.Rmd b/vignettes/sc_table_custom.Rmd index d00b3621..1a886a08 100644 --- a/vignettes/sc_table_custom.Rmd +++ b/vignettes/sc_table_custom.Rmd @@ -174,7 +174,12 @@ in the `measure` parameter of `sc_table_custom()`. ```{r} population <- sc_schema_db("debevstand") (count <- population$`Datensätze/Records`$`F-BEVSTAND`) -sc_table_custom(population, count) +x <- sc_table_custom(population, count) +x$tabulate() +``` + +```{r, echo=FALSE} +show_json(x) ``` ## Recodes {#recodes} From e0a1993072a09b4ec81441fed454b3bf19d1b11e Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Sun, 18 Dec 2022 14:07:31 +0100 Subject: [PATCH 35/84] add more links to sc_table_custom.Rmd link to the issue page and to the invalid json error message --- vignettes/sc_table_custom.Rmd | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vignettes/sc_table_custom.Rmd b/vignettes/sc_table_custom.Rmd index 1a886a08..a77ff2ea 100644 --- a/vignettes/sc_table_custom.Rmd +++ b/vignettes/sc_table_custom.Rmd @@ -335,10 +335,10 @@ sc_table_custom("A", measures = "B", dimensions = "C", If `dry_run` is set to `FALSE` (the default), STATcubeR will send the request to the API even if inconsistencies are detected. -This will likely lead to an error of the form "expected json but got html". +This will likely lead to an error of the form ["expected json but got html"]. If you get spurious warnings or have suggestions on how these typechecks might -be improved, please issue a feature request to the STATcubeR bugtracker. +be improved, please issue a feature request to the [STATcubeR bugtracker]. ## Further Reading @@ -349,3 +349,5 @@ be improved, please issue a feature request to the STATcubeR bugtracker. from the return value of `sc_table_custom()`. [recodes specification]: https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/table-endpoint +[STATcubeR bugtracker]: https://github.com/statistikat/STATcubeR/issues +["expected json but got html"]: sc_last_error.html#invalid-json From 00312de21656b4381bf86a593ba978a66ca1bde9 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Sun, 18 Dec 2022 14:09:07 +0100 Subject: [PATCH 36/84] update

ids in sc_last_error.Rmd in case the error response from the API containes an "errorType", use this error type as a css id in the error handling article --- vignettes/sc_last_error.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/sc_last_error.Rmd b/vignettes/sc_last_error.Rmd index f0811135..1153108d 100644 --- a/vignettes/sc_last_error.Rmd +++ b/vignettes/sc_last_error.Rmd @@ -53,7 +53,7 @@ See `?httr::content` to and `?httr::headers` to get started. This section showcases the most common types of errors that you might encounter when interacting with the API. Please feel free to open a new issue on the [STATcubeR issue tracker] if you get an error which is not listed here. -### Invalid API Key +### Invalid API Key {#AUTHENTICATION} If an invalid API key is used for a request, a 401 status is returned. @@ -110,7 +110,7 @@ If you encounter this error, please check if the rate limits are in fact a plaui Unfortunately, the response for exceeded rate limits is very generic and can not be differentiated from the response for invalid json-bodies (see below). This is why the error message lists two possible reasons. -### Schema +### Schema {#SCHEMA_COMPONENT_NOT_FOUND} Invalid URIs used with `sc_schema` will be displayed with a special error type `SCHEMA_COMPONENT_NOT_FOUND`. @@ -122,7 +122,7 @@ sc_schema("invalid_uri") readRDS("sc_last_error/schema.rds") %>% STATcubeR:::sc_check_response() ``` -### Saved Tables +### Saved Tables {#TXD_NOT_FOUND} As mentioned in the `r ticle("sc_table_saved")`, the function `sc_table_saved()` can only access default tables and tables that are saved under the current user. From adc3a83ba54725c8e9b3e27c5671e913d802957a Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Sun, 18 Dec 2022 14:14:22 +0100 Subject: [PATCH 37/84] mention #35 in NEWS strictly speaking, this change is only available in master and not in this branch this will however be appropriate as soon as this branch is merged --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 2b5ab577..9e192130 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ * Add filters and other recodes to `sc_table_custom()` (#33) * Add global option `STATcubeR.language` to override the default language * `od_table()`: Add descriptions to `x$header` and `x$field(i)` +* Depend on cli >= 3.4.1 (@matmo, #35) * Allow json strings in `sc_table()` (@matmo, #36) # STATcubeR 0.5.0 From 8701f0f9dc391ce0fb9104035b11230b2a44fc7e Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 30 Dec 2022 13:40:20 +0100 Subject: [PATCH 38/84] add support for iso dates time variables which use YYYYMMDD are now parsed into R datetime classes during import if classifications with 8 characters and only numbers in the codes are detected such datasets do not exist on the external OGD portal or STATcube yet but there are some internal datasets which are going to utilize these parsers --- R/table_field.R | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/R/table_field.R b/R/table_field.R index 28f2a528..f0a72faa 100644 --- a/R/table_field.R +++ b/R/table_field.R @@ -22,19 +22,22 @@ sc_field_type <- function(field) { codes_numeric <- all(!is.na(suppressWarnings(as.numeric(varcodes)))) if (!codes_numeric) return("Category") - if (!all(diff(nchar(varcodes)) == 0) || !(nchar(varcodes)[1] %in% 4:6)) + if (!all(diff(nchar(varcodes)) == 0) || !(nchar(varcodes)[1] %in% c(4:6, 8))) return("Category") year <- as.numeric(substr(varcodes, 1, 4)) if (!all(year %in% 1900:2150)) return("Category") time_type <- switch( as.character(nchar(varcodes[1])), - `4` = "year", `5` = "quarter", `6` = "month" + `4` = "year", `5` = "quarter", `6` = "month", `8` = "date" ) if ((time_type == "quarter") && all(substr(varcodes, 5, 5) %in% 5:6)) time_type <- "half-year" if ((time_type == "month") && any(as.numeric(substr(varcodes, 5, 6)) > 12)) time_type <- "week" + if (time_type == "date" && (any(as.numeric(substr(varcodes, 5, 6)) > 12) || + any(as.numeric(substr(varcodes, 7, 8)) > 31))) + return("Category") paste0("Time (", time_type, ")") } @@ -69,6 +72,12 @@ sc_field_parse_week <- function(year, week) { first_day + 7 * (as.numeric(week) - 1) } +sc_field_parse_iso_date <- function(year, remainder) { + month <- substr(remainder, 1, 2) + day <- substr(remainder, 3, 4) + as.Date(paste(year, month, day, sep = "-")) +} + sc_field_parse_time <- function(field) { if (is.character(field)) varcodes <- sapply(field, function(x) utils::tail(strsplit(x, "-")[[1]], 1)) @@ -77,12 +86,16 @@ sc_field_parse_time <- function(field) { varcodes[varcodes == "SC_TOTAL"] <- NA year <- substr(varcodes, 1, 4) remainder <- substr(varcodes, 5, 8) + ind <- is.na(varcodes) + nc <- nchar(remainder[!ind][1]) + stopifnot(all(nchar(remainder[!ind]) == nc)) + if (nc == 4) + return(sc_field_parse_iso_date(year, remainder)) if (any(as.numeric(remainder) > 12, na.rm = TRUE)) return(sc_field_parse_week(year, remainder)) month <- sc_field_parse_time_month(remainder) parsed <- as.Date(rep(NA, length(varcodes))) - ind <- is.na(varcodes) parsed[!ind] <- sc_as_time(year, month, ind) parsed } From 89755b4938448ee1dd5a54fc4970539f61ec0613 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 9 Jan 2023 12:25:44 +0100 Subject: [PATCH 39/84] v0.5.1 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8972bca5..b4f2d3be 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: STATcubeR Title: R interface for the STATcube REST API and Open Government Data -Version: 0.5.0.1 +Version: 0.5.1 Authors@R: c( person("Gregor", "de Cillia", , "Gregor.deCillia@statistik.gv.at", role = c("aut", "cre")), person("Bernhard", "Meindl", , "Bernhard.Meindl@statistik.gv.at", role = "ctb"), From 34327120f4302cc51be362f7c3e72e6eea6eacd9 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 30 Jan 2023 12:26:33 +0100 Subject: [PATCH 40/84] CI/CD: deploy dev branch --- config/jenkins.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/jenkins.yaml b/config/jenkins.yaml index 89e10158..9d4b9fc0 100644 --- a/config/jenkins.yaml +++ b/config/jenkins.yaml @@ -16,3 +16,4 @@ pkg_options: features: build_branches: - statbucket + - tibble_pkg From ea6bf24a69c6c6abf3083dda67dc441f89d5ac1c Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 30 Jan 2023 12:31:08 +0100 Subject: [PATCH 41/84] upodate jenkins config --- config/jenkins.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/config/jenkins.yaml b/config/jenkins.yaml index 9d4b9fc0..9ed6156d 100644 --- a/config/jenkins.yaml +++ b/config/jenkins.yaml @@ -15,5 +15,4 @@ pkg_options: features: build_branches: - - statbucket - tibble_pkg From c49a6498f5d94aee2573862f616f4c0218dfe347 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 20 Feb 2023 15:59:07 +0100 Subject: [PATCH 42/84] devtools::spell_check() apply spellchecks with the {spelling} package and define a wordlist so that the current codebase is typo-free TODO: there are now discrepancies between the wording and the codebase - sc_schema_catalogue() uses the english spelling catalogue instead of catalog. The same is true for od_catalogue() - inst/json_examples/accomodation.json should be accommodation (double m) Think about whether it makes sense to rename these things. --- DESCRIPTION | 6 ++- NEWS.md | 4 +- R/browse.R | 2 +- R/cache.R | 8 ++-- R/error.R | 6 +-- R/od_cache.R | 2 +- R/od_list.R | 4 +- R/od_revisions.R | 4 +- R/od_table.R | 6 +-- R/other_endpoints.R | 4 +- R/recoder.R | 6 +-- R/sc_data.R | 6 +-- R/schema.R | 8 ++-- R/schema_db.R | 2 +- R/table.R | 12 +++--- R/table_custom.R | 4 +- R/tabulate.R | 6 +-- README.md | 6 +-- inst/WORDLIST | 72 +++++++++++++++++++++++++++++++++++ man/od_cache.Rd | 2 +- man/od_catalogue.Rd | 2 +- man/od_list.Rd | 2 +- man/od_revisions.Rd | 4 +- man/od_table.Rd | 2 +- man/od_table_class.Rd | 4 +- man/other_endpoints.Rd | 4 +- man/sc_browse.Rd | 2 +- man/sc_cache.Rd | 8 ++-- man/sc_data.Rd | 6 +-- man/sc_last_error.Rd | 6 +-- man/sc_recoder.Rd | 6 +-- man/sc_schema.Rd | 10 ++--- man/sc_table.Rd | 4 +- man/sc_table_class.Rd | 8 ++-- man/sc_table_custom.Rd | 4 +- man/sc_tabulate.Rd | 6 +-- tests/spelling.R | 3 ++ vignettes/od_list.Rmd | 10 ++--- vignettes/od_resources.Rmd | 8 ++-- vignettes/od_table.Rmd | 30 +++++++-------- vignettes/sc_cache.Rmd | 10 ++--- vignettes/sc_data.Rmd | 8 ++-- vignettes/sc_info.Rmd | 2 +- vignettes/sc_key.Rmd | 2 +- vignettes/sc_last_error.Rmd | 10 ++--- vignettes/sc_schema.Rmd | 26 ++++++------- vignettes/sc_table.Rmd | 18 ++++----- vignettes/sc_table_custom.Rmd | 52 ++++++++++++------------- vignettes/sc_tabulate.Rmd | 18 ++++----- 49 files changed, 261 insertions(+), 184 deletions(-) create mode 100644 inst/WORDLIST create mode 100644 tests/spelling.R diff --git a/DESCRIPTION b/DESCRIPTION index 8972bca5..5d1aa4b6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,11 +21,13 @@ Imports: magrittr, pillar (>= 1.5.0), vctrs -Suggests: +Suggests: + spelling, data.tree, rappdirs, xml2 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 +Language: en-US diff --git a/NEWS.md b/NEWS.md index 9e192130..b85e82ba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -47,7 +47,7 @@ * Allow recodes of `sc_data` objects (#17) * Better parsing of time variables (#15, #16) * Use bootstrap 5 and `{pkgdown}` 2.0.0 for the website -* Allow export and import of open data using tar archves (#20) +* Allow export and import of open data using tar archives (#20) # STATcubeR 0.2.4 @@ -94,7 +94,7 @@ This version finalizes #11 https://data.statistik.gv.at/ * new class `od_table` to get OGD data -* methods to tabulate reponses +* methods to tabulate responses * caching * four new pkgdown articles for `od_table()`, `od_list()`, `od_resource()` and `sc_data` diff --git a/R/browse.R b/R/browse.R index b31a541d..55184f72 100644 --- a/R/browse.R +++ b/R/browse.R @@ -14,7 +14,7 @@ sc_browse <- function(server = "ext") { sc_url(sc_url_gui(server), "home") } -#' @describeIn sc_browse opens the preference menu with the api key +#' @describeIn sc_browse opens the preference menu with the API key #' @examples #' sc_browse_preferences() #' @export diff --git a/R/cache.R b/R/cache.R index 9d471d52..0f9860e9 100644 --- a/R/cache.R +++ b/R/cache.R @@ -10,7 +10,7 @@ #' Caching can be set up using environment variables. To set up a persistent cache #' for both Open Data and the REST API, the following lines in `.Renviron` can #' be used. -#' The paths in this example are only applicalble for UNIX-based operating systems. +#' The paths in this example are only applicable for UNIX-based operating systems. #' #' ```sh #' STATCUBE_KEY_EXT = YOUR_API_KEY_GOES_HERE @@ -23,7 +23,7 @@ #' Caching is not implemented for the #' endpoints [sc_info()] and [sc_rate_limit_table()]. #' @rdname sc_cache -#' @param verbose print instuctions on how to set up caching persistently +#' @param verbose print instructions on how to set up caching persistently #' via environment variables? #' @name sc_cache NULL @@ -49,14 +49,14 @@ sc_cache_disable <- function() { Sys.unsetenv("STATCUBE_CACHE") } -#' @describeIn sc_cache informs wether the cache is currently enabled +#' @describeIn sc_cache informs whether the cache is currently enabled #' @export sc_cache_enabled <- function() { Sys.getenv("STATCUBE_CACHE") != "" } #' @export -#' @param dir a chace directory +#' @param dir a cache directory #' @describeIn sc_cache get/set the directory used for caching sc_cache_dir <- function(dir = NULL) { if (is.null(dir)) diff --git a/R/error.R b/R/error.R index cf04b9e1..47595ec7 100644 --- a/R/error.R +++ b/R/error.R @@ -1,15 +1,15 @@ #' Error handling for the STATcube REST API #' #' @description -#' In case API requests are unsuccessfull, `STATcubeR` will throw errors +#' In case API requests are unsuccessful, `STATcubeR` will throw errors #' to summarize the httr error type and its meaning. -#' Requests are considered unsuccessfull if one of the following applies +#' Requests are considered unsuccessful if one of the following applies #' * The response returns `TRUE` for `httr::http_error()`. #' * The response is not of type `"application/json"` #' #' In some cases it is useful to get direct access to a faulty response object. #' For that purpose, it is possible to use [sc_last_error()] which will provide -#' the httr response object for the last unsuccessfull request. +#' the httr response object for the last unsuccessful request. #' @return The return value from `httr::GET()` or `httr::POST()`. #' @examplesIf sc_key_exists() #' try(sc_table_saved("invalid_id")) diff --git a/R/od_cache.R b/R/od_cache.R index 3d7a7e51..11e56a14 100644 --- a/R/od_cache.R +++ b/R/od_cache.R @@ -15,7 +15,7 @@ #' od_downloads() #' @details #' [od_cache_summary()] provides an overview of all contents of the cache through -#' a data.frame. It hasone row for each dataset and the following columns. +#' a data.frame. It has one row for each dataset and the following columns. #' All file sizes are given in bytes #' - **`id`** the dataset id #' - **`updated`** the last modified time for `${id}.json` diff --git a/R/od_list.R b/R/od_list.R index 2eff9fd8..f1966056 100644 --- a/R/od_list.R +++ b/R/od_list.R @@ -3,7 +3,7 @@ #' [od_list()] returns a `data.frame ` containing all datasets published at #' [data.statistik.gv.at](https://data.statistik.gv.at) #' -#' @param unique some datasets are pulbished under multiple groups. +#' @param unique some datasets are published under multiple groups. #' They will only be listed once with the first group they appear in unless #' this parameter is set to `FALSE`. #' @param server the open data server to use. Either `ext` for the external @@ -95,7 +95,7 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) { #' |json |`list`| Full json metadata #' #' The type `datetime` refers to the `POSIXct` format as returned by [Sys.time()]. -#' The last column `"json"` containes the full json metadata as returned by +#' The last column `"json"` contains the full json metadata as returned by #' [od_json()]. #' #' @inheritParams od_table diff --git a/R/od_revisions.R b/R/od_revisions.R index e3a7122f..e97c0c0d 100644 --- a/R/od_revisions.R +++ b/R/od_revisions.R @@ -3,7 +3,7 @@ #' Use the `/revision` endpoint of the OGD server to get a list #' of all datasets that have changed since a certain timestamp. #' @param since (optional) A timestamp. If supplied, only datasets updated -#' later will be returned. Otherwise, all datasets are retured. +#' later will be returned. Otherwise, all datasets are returned. #' Can be in either one of the following formats #' * a native R time type that is compatible with `strftime()` #' such as the return values of `Sys.Date()`, `Sys.time()` and `file.mtime()`. @@ -11,7 +11,7 @@ #' * a string of the form `YYYY-MM-DDThh:mm:ss` to specify a day and a time. #' @param exclude_ext If `TRUE` (default) exclude all results that have #' `OGDEXT_` as a prefix -#' @return a character verctor with dataset ids +#' @return a character vector with dataset ids #' @inheritParams od_list #' @examples #' # get all datasets (including OGDEXT_*) diff --git a/R/od_table.R b/R/od_table.R index 39059dbd..9d646902 100644 --- a/R/od_table.R +++ b/R/od_table.R @@ -14,7 +14,7 @@ #' `$field(code) `| `https://data.statistik.gv.at/data/${id}_${code}.csv` #' `$json `| `https://data.statistik.gv.at/ogd/json?dataset=${id}` #' -#' @param id the id of the data-set that should be accessed +#' @param id the id of the dataset that should be accessed #' @param language language to be used for labeling. `"en"` or `"de"` #' @param server the OGD-server to be used. `"ext"` (the default) for the #' external server or `prod` for the production server @@ -67,7 +67,7 @@ od_table_class <- R6::R6Class( public = list( #' @description This class is not exported. Use [od_table()] to #' initialize objects of class `od_table`. - #' @param id the id of the data-set that should be accessed + #' @param id the id of the dataset that should be accessed #' @param language language to be used for labeling. `"en"` or `"de"` #' @param server the OGD-Server server to be used initialize = function(id, language = NULL, server = "ext") { @@ -106,7 +106,7 @@ od_table_class <- R6::R6Class( private$cache$header %>% sc_tibble_meta(c("label_de", "label_en")) }, #' @field resources - #' lists all files downloaded from the server to contruct this table + #' lists all files downloaded from the server to construct this table resources = function() { resources <- private$cache$resources class(resources$name) <- c("ogd_file", "character") diff --git a/R/other_endpoints.R b/R/other_endpoints.R index b8b33872..35672bc6 100644 --- a/R/other_endpoints.R +++ b/R/other_endpoints.R @@ -34,8 +34,8 @@ sc_info <- function(language = c("en", "de"), key = NULL, server = "ext") { #' * `remaining` how much requests can be sent to the `/table` #' endpoint until the rate limit is reached. #' * `limit` the number of requests allowed per hour. -#' * `reset` a tiestamp when the rate limit will be reset. -#' Ususally, this should be less than one hour `after the current time. +#' * `reset` a timestamp when the rate limit will be reset. +#' Usually, this should be less than one hour `after the current time. #' @export sc_rate_limit_table <- function(language = c("en", "de"), key = NULL, server = "ext") { response <- httr::GET( diff --git a/R/recoder.R b/R/recoder.R index 2c45cc9d..e5f58f9c 100644 --- a/R/recoder.R +++ b/R/recoder.R @@ -1,7 +1,7 @@ #' @title Recode sc_table objects #' @description #' A collection of methods that can be used to modify an object of class -#' sc_table by reference. Typical usage is to acces the `recode` binding +#' sc_table by reference. Typical usage is to access the `recode` binding #' of an `sc_table` object and then use method chaining to perform recode #' operations. #' @@ -80,7 +80,7 @@ sc_recoder <- R6::R6Class( private$x$p_fields[[i]][j, private$l(language)] <- new invisible(self) }, - #' @description Cheange the total code for a field + #' @description Change the total code for a field #' @param field a field code #' @param new a level code for the field or `NA`. Will be used as the #' new total code. In case of `NA`, the total code will be unset. @@ -91,7 +91,7 @@ sc_recoder <- R6::R6Class( invisible(self) }, #' @description set the visibility of a level. Invisible levels are - #' ommited in the output of `$tabulate()` but don't affect aggregation + #' omitted in the output of `$tabulate()` but don't affect aggregation #' @param field a field code #' @param level a level code for the field #' @param new visibility. `TRUE` or `FALSE` diff --git a/R/sc_data.R b/R/sc_data.R index 7b4565ef..0845e78c 100644 --- a/R/sc_data.R +++ b/R/sc_data.R @@ -2,7 +2,7 @@ #' #' @description #' This class represents a common interface for datasets returned from the -#' STATcube REST API and OGD datasets. `sc_data` obects are usually created with +#' STATcube REST API and OGD datasets. `sc_data` objects are usually created with #' [od_table()] or [sc_table()]. #' @examples #' ## create a new sc_data object via od_table() @@ -40,9 +40,9 @@ sc_data <- R6::R6Class( private$recoder <- sc_recoder$new(private) }, #' @description get information about a specific field. The format of - #' the reurn value is similar to `$meta`. A `data.frame` that includes + #' the return value is similar to `$meta`. A `data.frame` that includes #' codes and labels for each level of the field. - #' @param i specifier for the field. Integer or character. If an interger + #' @param i specifier for the field. Integer or character. If an integer #' is provided, it should match the row number in `$meta$fields`. If #' a character is provided, the field is matched using [pmatch()] on #' all available codes and labels. diff --git a/R/schema.R b/R/schema.R index e861b886..7ab893ac 100644 --- a/R/schema.R +++ b/R/schema.R @@ -4,9 +4,9 @@ #' This endpoint can be used to get all available databases and tables #' as well as metadata about specific databases. #' -#' The main function `sc_schema()` can be used with any resouce id. +#' The main function `sc_schema()` can be used with any resource id. #' [sc_schema_catalogue()] and [sc_schema_db()] are very simple -#' wrapper functions around [`sc_schema()`] and are comparabable to the +#' wrapper functions around [`sc_schema()`] and are comparable to the #' [catalogue explorer](`r sc_browse_catalogue()`) or the #' [table view](`r sc_browse_database('deake005', open = TRUE)`) of the STATcube GUI. #' @@ -56,7 +56,7 @@ print_schema_with_tree <- function(x, ...) { #' @rdname sc_schema #' @param x an object of class `sc_schema()` i.e. the return value of #' [sc_schema()], [sc_schema_db()] or [sc_schema_catalogue()]. -#' @param tree wether to use the [`data.tree`](https://rdrr.io/cran/data.tree/man/data.tree.html) package for printing. +#' @param tree whether to use the [`data.tree`](https://rdrr.io/cran/data.tree/man/data.tree.html) package for printing. #' @param limit,... passed to [data.tree::print.Node()] if `tree` is set #' to `TRUE`. Ignored otherwise. #' @section Printing with data.tree: @@ -159,7 +159,7 @@ sc_schema_flatten_impl <- function(resp, type) { } #' @describeIn sc_schema is similar to the -#' [catalogue explorer](`r sc_browse_catalogue()`) of the STATcube GUI and reurns +#' [catalogue explorer](`r sc_browse_catalogue()`) of the STATcube GUI and returns #' a tree-type object containing all databases and tables. #' @export sc_schema_catalogue <- function(depth = "folder", ...) { diff --git a/R/schema_db.R b/R/schema_db.R index 0eeb0a2a..223b5988 100644 --- a/R/schema_db.R +++ b/R/schema_db.R @@ -30,7 +30,7 @@ #' print(tree = TRUE) #' @describeIn sc_schema is similar to the #' [table view](`r sc_browse_database('deake005', open = TRUE)`) -#' of ths STATcube GUI and gives information about all measures and +#' of the STATcube GUI and gives information about all measures and #' classification fields for a specific database #' @export sc_schema_db <- function(id, depth = "valueset", language = c("en", "de"), diff --git a/R/table.R b/R/table.R index 3f51a54c..e82e79f9 100644 --- a/R/table.R +++ b/R/table.R @@ -26,7 +26,7 @@ sc_table_class <- R6::R6Class( cloneable = FALSE, inherit = sc_data, public = list( - #' @description Ususally, objects of class `sc_table` are generated with + #' @description Usually, objects of class `sc_table` are generated with #' one of the factory methods [sc_table()], [sc_table_saved()] or #' [sc_table_custom()]. If this constructor is invoked directly, #' either omit the parameters `json` and `file` or make sure that they @@ -36,7 +36,7 @@ sc_table_class <- R6::R6Class( #' @param json the json file used in the request as a string. #' @param file the file path to the json file #' @param add_totals was the json request modified by adding totals via - #' the add_toals parameter in one of the factory functions (`sc_table()`, + #' the add_totals parameter in one of the factory functions (`sc_table()`, #' `sc_table_custom()`). Necessary, in order to also request totals via #' the `$add_language()` method. initialize = function(response, json = NULL, file = NULL, add_totals = FALSE) { @@ -88,7 +88,7 @@ sc_table_class <- R6::R6Class( #' @description An extension of [sc_tabulate()] with additional #' parameters. #' @param ... Parameters which are passed down to [sc_tabulate()] - #' @param round apply rounding to each measure accoring to the precision + #' @param round apply rounding to each measure according to the precision #' provided by the API. #' @param annotations Include separate annotation columns in the returned #' table. This parameter is currently broken and needs to be re-implemented @@ -131,7 +131,7 @@ sc_table_class <- R6::R6Class( #' the raw response content raw = function() httr::content(self$response), #' @field annotation_legend - #' list of all annotations occuring in the data as a `data.frame` with + #' list of all annotations occurring in the data as a `data.frame` with #' two columns for the annotation keys and annotation labels. annotation_legend = function() { am <- self$raw$annotationMap @@ -181,10 +181,10 @@ sc_table_class <- R6::R6Class( #' [sc_table_class] for the class documentation. #' @inheritParams sc_key #' @param language The language to be used for labeling. `"en"` (the default) -#' will use english. `"de"` uses german. +#' will use english. `"de"` uses German. #' The third option `"both"` will import both languages by sending two requests #' to the `/table` endpoint. -#' @param json_file Depricated. Use `json` instead +#' @param json_file Deprecated. Use `json` instead #' @family functions for /table #' @examplesIf sc_key_exists() #' my_table <- sc_table(json = sc_example("population_timeseries.json")) diff --git a/R/table_custom.R b/R/table_custom.R index b6985434..8a1db6c4 100644 --- a/R/table_custom.R +++ b/R/table_custom.R @@ -15,10 +15,10 @@ #' @param add_totals Should totals be added for each classification field in #' the json request? Ignored if `recodes` is used. #' @param recodes One or more recodes that were generated via [sc_recode()]. -#' If more than one recode is supplied, recodes should be concatinated with +#' If more than one recode is supplied, recodes should be concatenated with #' [c()]. #' @param language The language to be used for labeling. "en" -#' (the default) will use english. "de" uses german. +#' (the default) will use English. "de" uses German. #' @param dry_run If `TRUE`, no request is sent to the API. Instead, type #' checks are performed and the json request is returned as a string. #' Defaults to `FALSE`. diff --git a/R/tabulate.R b/R/tabulate.R index 1a8abbdb..5a2858d8 100644 --- a/R/tabulate.R +++ b/R/tabulate.R @@ -21,14 +21,14 @@ #' @param .list allows to define the arguments for `...` as a character vector. #' @param raw If FALSE (the default), apply labeling to the dataset. #' Otherwise, return codes. -#' @param language The language to be used for labelling. By default, the +#' @param language The language to be used for labeling. By default, the #' dataset language (`table$language`) is used. #' @param sort If `TRUE`, the resulting data will be sorted by all provided #' field values #' @details #' Aggregation is done as follows #' -#' * First, all columns that priovide a total code via `table$total_codes()` +#' * First, all columns that provide a total code via `table$total_codes()` #' will be used to filter for `column == total_code` or `column != total_code` #' * Then, the remaining data is aggregated using [rowsum()] #' @@ -37,7 +37,7 @@ #' #' For objects of class `sc_table` two additional operations are performed. #' * zeros are recoded to `NA`s -#' * rounding is done according to the precision of each measure. Ronding +#' * rounding is done according to the precision of each measure. Rounding #' happens after the recoding to `NA` values #' @seealso sc_table_class #' @examples diff --git a/README.md b/README.md index 0c95e8c4..ab7a085b 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ install.packages('STATcubeR-0.6.0.tar.gz', repos = NULL) ## Open Data To import datasets from https://data.statistik.gv.at, pass the dataset -id to the `od_table()` function. For example, OGD data about the [austrian population in 2020](https://data.statistik.gv.at/web/meta.jsp?dataset=OGD_bevstandjbab2002_BevStand_2020) +id to the `od_table()` function. For example, OGD data about the [Austrian population in 2020](https://data.statistik.gv.at/web/meta.jsp?dataset=OGD_bevstandjbab2002_BevStand_2020) can be accessed as follows. ```r @@ -67,8 +67,8 @@ an overview of the 315 datasets that are compatible with `od_table()`. ## STATcube API In order to use the REST API, it is required to set up an API key. As mentioned in the -[api key article](https://statistikat.github.io/STATcubeR/articles/sc_key.html), -this requres a STATcube subscription. +[API key article](https://statistikat.github.io/STATcubeR/articles/sc_key.html), +this requires a STATcube subscription. There are four main functions that interact with the API diff --git a/inst/WORDLIST b/inst/WORDLIST new file mode 100644 index 00000000..33ac485a --- /dev/null +++ b/inst/WORDLIST @@ -0,0 +1,72 @@ +Bundesländer +CLI +CMD +Catalogue +EOL +GRP +Herkunftsland +Infos +JSON +Lifecycle +MDN +OGD +Opendata +Recode +Recodes +Recoding +Rmd +SES +STATatlas +STATcube +Schemas +Typechecks +URI +URIs +catalogue +cli +cloneable +csv +datasources +de +df +english +fileserver +github +gui +gv +http +https +httr +infopage +infos +json +jsons +microdata +od +opendata +pkgdown +programmatically +quartiles +readme +recode +recoded +recoder +recodes +recoding +recurse +sc +schemas +statistik +tabset +tibbles +timeseries +tooltip +tooltips +uid +uids +unparsed +uri +webscraping +wingarc +yaml +dataset diff --git a/man/od_cache.Rd b/man/od_cache.Rd index 5711c5b6..57f2c72e 100644 --- a/man/od_cache.Rd +++ b/man/od_cache.Rd @@ -19,7 +19,7 @@ Functions to inspect the contents of the current cache. } \details{ \code{\link[=od_cache_summary]{od_cache_summary()}} provides an overview of all contents of the cache through -a data.frame. It hasone row for each dataset and the following columns. +a data.frame. It has one row for each dataset and the following columns. All file sizes are given in bytes \itemize{ \item \strong{\code{id}} the dataset id diff --git a/man/od_catalogue.Rd b/man/od_catalogue.Rd index a74b8b4c..17d1dd93 100644 --- a/man/od_catalogue.Rd +++ b/man/od_catalogue.Rd @@ -39,7 +39,7 @@ Currently, the following columns are provided.\tabular{lll}{ The type \code{datetime} refers to the \code{POSIXct} format as returned by \code{\link[=Sys.time]{Sys.time()}}. -The last column \code{"json"} containes the full json metadata as returned by +The last column \code{"json"} contains the full json metadata as returned by \code{\link[=od_json]{od_json()}}. } \examples{ diff --git a/man/od_list.Rd b/man/od_list.Rd index afd336b1..f037d752 100644 --- a/man/od_list.Rd +++ b/man/od_list.Rd @@ -7,7 +7,7 @@ od_list(unique = TRUE, server = c("ext", "red")) } \arguments{ -\item{unique}{some datasets are pulbished under multiple groups. +\item{unique}{some datasets are published under multiple groups. They will only be listed once with the first group they appear in unless this parameter is set to \code{FALSE}.} diff --git a/man/od_revisions.Rd b/man/od_revisions.Rd index 3a1df1ce..f7ae5957 100644 --- a/man/od_revisions.Rd +++ b/man/od_revisions.Rd @@ -8,7 +8,7 @@ od_revisions(since = NULL, exclude_ext = TRUE, server = "ext") } \arguments{ \item{since}{(optional) A timestamp. If supplied, only datasets updated -later will be returned. Otherwise, all datasets are retured. +later will be returned. Otherwise, all datasets are returned. Can be in either one of the following formats \itemize{ \item a native R time type that is compatible with \code{strftime()} @@ -25,7 +25,7 @@ server (the default) or \code{red} for the editing server. The editing server is only accessible for employees of Statistics Austria} } \value{ -a character verctor with dataset ids +a character vector with dataset ids } \description{ Use the \verb{/revision} endpoint of the OGD server to get a list diff --git a/man/od_table.Rd b/man/od_table.Rd index aefcbfad..2a458cd2 100644 --- a/man/od_table.Rd +++ b/man/od_table.Rd @@ -7,7 +7,7 @@ od_table(id, language = NULL, server = "ext") } \arguments{ -\item{id}{the id of the data-set that should be accessed} +\item{id}{the id of the dataset that should be accessed} \item{language}{language to be used for labeling. \code{"en"} or \code{"de"}} diff --git a/man/od_table_class.Rd b/man/od_table_class.Rd index 9e602e03..e33bbdb8 100644 --- a/man/od_table_class.Rd +++ b/man/od_table_class.Rd @@ -19,7 +19,7 @@ R6 Class open data datasets. Similar contents can be found in \verb{$meta}.} -\item{\code{resources}}{lists all files downloaded from the server to contruct this table} +\item{\code{resources}}{lists all files downloaded from the server to construct this table} \item{\code{od_server}}{The server used for initialization (see to \code{?od_table})} } @@ -54,7 +54,7 @@ initialize objects of class \code{od_table}. \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{id}}{the id of the data-set that should be accessed} +\item{\code{id}}{the id of the dataset that should be accessed} \item{\code{language}}{language to be used for labeling. \code{"en"} or \code{"de"}} diff --git a/man/other_endpoints.Rd b/man/other_endpoints.Rd index 5e9fc11a..e7e5bbc6 100644 --- a/man/other_endpoints.Rd +++ b/man/other_endpoints.Rd @@ -43,8 +43,8 @@ for calls against the \verb{/table} endpoint. \item \code{remaining} how much requests can be sent to the \verb{/table} endpoint until the rate limit is reached. \item \code{limit} the number of requests allowed per hour. -\item \code{reset} a tiestamp when the rate limit will be reset. -Ususally, this should be less than one hour `after the current time. +\item \code{reset} a timestamp when the rate limit will be reset. +Usually, this should be less than one hour `after the current time. } \item \code{sc_rate_limits()}: gets rate limits from response headers diff --git a/man/sc_browse.Rd b/man/sc_browse.Rd index c9d8603d..4d8b74e6 100644 --- a/man/sc_browse.Rd +++ b/man/sc_browse.Rd @@ -40,7 +40,7 @@ A collection of links, to browse important STATcube pages. \itemize{ \item \code{sc_browse()}: opens the home menu of STATcube -\item \code{sc_browse_preferences()}: opens the preference menu with the api key +\item \code{sc_browse_preferences()}: opens the preference menu with the API key \item \code{sc_browse_table()}: shows the info page for a table diff --git a/man/sc_cache.Rd b/man/sc_cache.Rd index 9b566c9a..d879d37f 100644 --- a/man/sc_cache.Rd +++ b/man/sc_cache.Rd @@ -23,10 +23,10 @@ sc_cache_files(x) sc_cache_clear() } \arguments{ -\item{verbose}{print instuctions on how to set up caching persistently +\item{verbose}{print instructions on how to set up caching persistently via environment variables?} -\item{dir}{a chace directory} +\item{dir}{a cache directory} \item{x}{an object of class \code{sc_table} or \code{sc_schema}} } @@ -42,7 +42,7 @@ old cache entries. Caching can be set up using environment variables. To set up a persistent cache for both Open Data and the REST API, the following lines in \code{.Renviron} can be used. -The paths in this example are only applicalble for UNIX-based operating systems. +The paths in this example are only applicable for UNIX-based operating systems. \if{html}{\out{
}}\preformatted{STATCUBE_KEY_EXT = YOUR_API_KEY_GOES_HERE STATCUBE_CACHE = TRUE @@ -62,7 +62,7 @@ endpoints \code{\link[=sc_info]{sc_info()}} and \code{\link[=sc_rate_limit_table \item \code{sc_cache_disable()}: disables caching for the current R session sc_cache_disable() -\item \code{sc_cache_enabled()}: informs wether the cache is currently enabled +\item \code{sc_cache_enabled()}: informs whether the cache is currently enabled \item \code{sc_cache_dir()}: get/set the directory used for caching diff --git a/man/sc_data.Rd b/man/sc_data.Rd index 2d7a3cf9..a83663ba 100644 --- a/man/sc_data.Rd +++ b/man/sc_data.Rd @@ -5,7 +5,7 @@ \title{Common interface for STATcubeR datasets} \description{ This class represents a common interface for datasets returned from the -STATcube REST API and OGD datasets. \code{sc_data} obects are usually created with +STATcube REST API and OGD datasets. \code{sc_data} objects are usually created with \code{\link[=od_table]{od_table()}} or \code{\link[=sc_table]{sc_table()}}. } \examples{ @@ -105,7 +105,7 @@ Do not use directly but initialize objects with \code{\link[=sc_table]{sc_table( \if{latex}{\out{\hypertarget{method-sc_data-field}{}}} \subsection{Method \code{field()}}{ get information about a specific field. The format of -the reurn value is similar to \verb{$meta}. A \code{data.frame} that includes +the return value is similar to \verb{$meta}. A \code{data.frame} that includes codes and labels for each level of the field. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{sc_data$field(i = 1)}\if{html}{\out{
}} @@ -114,7 +114,7 @@ codes and labels for each level of the field. \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{i}}{specifier for the field. Integer or character. If an interger +\item{\code{i}}{specifier for the field. Integer or character. If an integer is provided, it should match the row number in \verb{$meta$fields}. If a character is provided, the field is matched using \code{\link[=pmatch]{pmatch()}} on all available codes and labels.} diff --git a/man/sc_last_error.Rd b/man/sc_last_error.Rd index f518d7a4..de058bf0 100644 --- a/man/sc_last_error.Rd +++ b/man/sc_last_error.Rd @@ -13,9 +13,9 @@ sc_last_error_parsed() The return value from \code{httr::GET()} or \code{httr::POST()}. } \description{ -In case API requests are unsuccessfull, \code{STATcubeR} will throw errors +In case API requests are unsuccessful, \code{STATcubeR} will throw errors to summarize the httr error type and its meaning. -Requests are considered unsuccessfull if one of the following applies +Requests are considered unsuccessful if one of the following applies \itemize{ \item The response returns \code{TRUE} for \code{httr::http_error()}. \item The response is not of type \code{"application/json"} @@ -23,7 +23,7 @@ Requests are considered unsuccessfull if one of the following applies In some cases it is useful to get direct access to a faulty response object. For that purpose, it is possible to use \code{\link[=sc_last_error]{sc_last_error()}} which will provide -the httr response object for the last unsuccessfull request. +the httr response object for the last unsuccessful request. } \section{Functions}{ \itemize{ diff --git a/man/sc_recoder.Rd b/man/sc_recoder.Rd index a886c54d..63d024f7 100644 --- a/man/sc_recoder.Rd +++ b/man/sc_recoder.Rd @@ -5,7 +5,7 @@ \title{Recode sc_table objects} \description{ A collection of methods that can be used to modify an object of class -sc_table by reference. Typical usage is to acces the \code{recode} binding +sc_table by reference. Typical usage is to access the \code{recode} binding of an \code{sc_table} object and then use method chaining to perform recode operations. @@ -142,7 +142,7 @@ Change the labels of a level \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-sc_recoder-total_codes}{}}} \subsection{Method \code{total_codes()}}{ -Cheange the total code for a field +Change the total code for a field \subsection{Usage}{ \if{html}{\out{
}}\preformatted{sc_recoder$total_codes(field, new)}\if{html}{\out{
}} } @@ -163,7 +163,7 @@ new total code. In case of \code{NA}, the total code will be unset.} \if{latex}{\out{\hypertarget{method-sc_recoder-visible}{}}} \subsection{Method \code{visible()}}{ set the visibility of a level. Invisible levels are -ommited in the output of \verb{$tabulate()} but don't affect aggregation +omitted in the output of \verb{$tabulate()} but don't affect aggregation \subsection{Usage}{ \if{html}{\out{
}}\preformatted{sc_recoder$visible(field, level, new)}\if{html}{\out{
}} } diff --git a/man/sc_schema.Rd b/man/sc_schema.Rd index 921e5c4e..b19b1126 100644 --- a/man/sc_schema.Rd +++ b/man/sc_schema.Rd @@ -40,7 +40,7 @@ the production server. External users should always use the default option \code \item{x}{an object of class \code{sc_schema()} i.e. the return value of \code{\link[=sc_schema]{sc_schema()}}, \code{\link[=sc_schema_db]{sc_schema_db()}} or \code{\link[=sc_schema_catalogue]{sc_schema_catalogue()}}.} -\item{tree}{wether to use the \href{https://rdrr.io/cran/data.tree/man/data.tree.html}{\code{data.tree}} package for printing.} +\item{tree}{whether to use the \href{https://rdrr.io/cran/data.tree/man/data.tree.html}{\code{data.tree}} package for printing.} \item{limit, ...}{passed to \code{\link[data.tree:print.Node]{data.tree::print.Node()}} if \code{tree} is set to \code{TRUE}. Ignored otherwise.} @@ -53,9 +53,9 @@ Invoke the \href{https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-d This endpoint can be used to get all available databases and tables as well as metadata about specific databases. -The main function \code{sc_schema()} can be used with any resouce id. +The main function \code{sc_schema()} can be used with any resource id. \code{\link[=sc_schema_catalogue]{sc_schema_catalogue()}} and \code{\link[=sc_schema_db]{sc_schema_db()}} are very simple -wrapper functions around \code{\link[=sc_schema]{sc_schema()}} and are comparabable to the +wrapper functions around \code{\link[=sc_schema]{sc_schema()}} and are comparable to the \href{https://portal.statistik.at/statistik.at/ext/statcube/jsf/dataCatalogueExplorer.xhtml}{catalogue explorer} or the \href{https://portal.statistik.at/statistik.at/ext/statcube/opendatabase?id=deake005}{table view} of the STATcube GUI. @@ -67,12 +67,12 @@ are wrapped into a class called \code{sc_schema} to simplify the usage in R. \item \code{sc_schema_flatten()}: turns a \code{sc_schema} object into a \code{data.frame} \item \code{sc_schema_catalogue()}: is similar to the -\href{https://portal.statistik.at/statistik.at/ext/statcube/jsf/dataCatalogueExplorer.xhtml}{catalogue explorer} of the STATcube GUI and reurns +\href{https://portal.statistik.at/statistik.at/ext/statcube/jsf/dataCatalogueExplorer.xhtml}{catalogue explorer} of the STATcube GUI and returns a tree-type object containing all databases and tables. \item \code{sc_schema_db()}: is similar to the \href{https://portal.statistik.at/statistik.at/ext/statcube/opendatabase?id=deake005}{table view} -of ths STATcube GUI and gives information about all measures and +of the STATcube GUI and gives information about all measures and classification fields for a specific database }} diff --git a/man/sc_table.Rd b/man/sc_table.Rd index baff1283..fc205904 100644 --- a/man/sc_table.Rd +++ b/man/sc_table.Rd @@ -24,7 +24,7 @@ GUI ("Open Data API Request"). Alternatively, a json string which passes \code{\link[jsonlite:validate]{jsonlite::validate()}}.} \item{language}{The language to be used for labeling. \code{"en"} (the default) -will use english. \code{"de"} uses german. +will use english. \code{"de"} uses German. The third option \code{"both"} will import both languages by sending two requests to the \verb{/table} endpoint.} @@ -34,7 +34,7 @@ the json request?} \item{key}{(\code{string}) An API key. To display your key, call \code{\link[=sc_browse_preferences]{sc_browse_preferences()}}.} -\item{json_file}{Depricated. Use \code{json} instead} +\item{json_file}{Deprecated. Use \code{json} instead} \item{filename}{The name of an example json file.} diff --git a/man/sc_table_class.Rd b/man/sc_table_class.Rd index 16e44fb9..d0c55907 100644 --- a/man/sc_table_class.Rd +++ b/man/sc_table_class.Rd @@ -18,7 +18,7 @@ STATcube REST API. \item{\code{raw}}{the raw response content} -\item{\code{annotation_legend}}{list of all annotations occuring in the data as a \code{data.frame} with +\item{\code{annotation_legend}}{list of all annotations occurring in the data as a \code{data.frame} with two columns for the annotation keys and annotation labels.} \item{\code{rate_limit}}{how much requests were left after the POST request for this table was sent? @@ -50,7 +50,7 @@ Uses the same format as \code{\link[=sc_rate_limit_table]{sc_rate_limit_table()} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-sc_table-new}{}}} \subsection{Method \code{new()}}{ -Ususally, objects of class \code{sc_table} are generated with +Usually, objects of class \code{sc_table} are generated with one of the factory methods \code{\link[=sc_table]{sc_table()}}, \code{\link[=sc_table_saved]{sc_table_saved()}} or \code{\link[=sc_table_custom]{sc_table_custom()}}. If this constructor is invoked directly, either omit the parameters \code{json} and \code{file} or make sure that they @@ -70,7 +70,7 @@ endpoint.} \item{\code{file}}{the file path to the json file} \item{\code{add_totals}}{was the json request modified by adding totals via -the add_toals parameter in one of the factory functions (\code{sc_table()}, +the add_totals parameter in one of the factory functions (\code{sc_table()}, \code{sc_table_custom()}). Necessary, in order to also request totals via the \verb{$add_language()} method.} } @@ -110,7 +110,7 @@ parameters. \describe{ \item{\code{...}}{Parameters which are passed down to \code{\link[=sc_tabulate]{sc_tabulate()}}} -\item{\code{round}}{apply rounding to each measure accoring to the precision +\item{\code{round}}{apply rounding to each measure according to the precision provided by the API.} \item{\code{annotations}}{Include separate annotation columns in the returned diff --git a/man/sc_table_custom.Rd b/man/sc_table_custom.Rd index f0dbc4bf..8dcaf9f6 100644 --- a/man/sc_table_custom.Rd +++ b/man/sc_table_custom.Rd @@ -29,7 +29,7 @@ either of type \code{FIELD} or type \code{VALUESET}. Those entries are referred as \code{fields} in the parsed API response} \item{language}{The language to be used for labeling. "en" -(the default) will use english. "de" uses german.} +(the default) will use English. "de" uses German.} \item{add_totals}{Should totals be added for each classification field in the json request? Ignored if \code{recodes} is used.} @@ -38,7 +38,7 @@ the json request? Ignored if \code{recodes} is used.} \code{\link[=sc_browse_preferences]{sc_browse_preferences()}}.} \item{recodes}{One or more recodes that were generated via \code{\link[=sc_recode]{sc_recode()}}. -If more than one recode is supplied, recodes should be concatinated with +If more than one recode is supplied, recodes should be concatenated with \code{\link[=c]{c()}}.} \item{dry_run}{If \code{TRUE}, no request is sent to the API. Instead, type diff --git a/man/sc_tabulate.Rd b/man/sc_tabulate.Rd index 3c93406f..2b2ec328 100644 --- a/man/sc_tabulate.Rd +++ b/man/sc_tabulate.Rd @@ -30,7 +30,7 @@ Ignored if \code{raw} is set to \code{TRUE}.} \item{recode_zeros}{turn zero values into \code{NA}s} -\item{language}{The language to be used for labelling. By default, the +\item{language}{The language to be used for labeling. By default, the dataset language (\code{table$language}) is used.} \item{sort}{If \code{TRUE}, the resulting data will be sorted by all provided @@ -52,7 +52,7 @@ is true for fields. \details{ Aggregation is done as follows \itemize{ -\item First, all columns that priovide a total code via \code{table$total_codes()} +\item First, all columns that provide a total code via \code{table$total_codes()} will be used to filter for \code{column == total_code} or \code{column != total_code} \item Then, the remaining data is aggregated using \code{\link[=rowsum]{rowsum()}} } @@ -63,7 +63,7 @@ See Examples For objects of class \code{sc_table} two additional operations are performed. \itemize{ \item zeros are recoded to \code{NA}s -\item rounding is done according to the precision of each measure. Ronding +\item rounding is done according to the precision of each measure. Rounding happens after the recoding to \code{NA} values } } diff --git a/tests/spelling.R b/tests/spelling.R new file mode 100644 index 00000000..6713838f --- /dev/null +++ b/tests/spelling.R @@ -0,0 +1,3 @@ +if(requireNamespace('spelling', quietly = TRUE)) + spelling::spell_check_test(vignettes = TRUE, error = FALSE, + skip_on_cran = TRUE) diff --git a/vignettes/od_list.Rmd b/vignettes/od_list.Rmd index 0faedab2..58c9d342 100644 --- a/vignettes/od_list.Rmd +++ b/vignettes/od_list.Rmd @@ -20,7 +20,7 @@ At the time of writing this article, there are 315 datasets that are assumed to ## Interactive overview -Since some of the metadata contained in the OGD JSON files is only available in german, the following overview uses german labels. +Since some of the metadata contained in the OGD JSON files is only available in German, the following overview uses German labels. Click on the individual table cells to get more information. ```{r, echo = FALSE} @@ -118,8 +118,8 @@ od_index %>% ## CLI usage -To get a simplified viersion of this summary, use the `od_list()` function. -It uses webscraping techniques to get dataset ids and german labels based on the contents of https://data.statistik.gv.at/web/catalog.jsp. +To get a simplified version of this summary, use the `od_list()` function. +It uses webscraping techniques to get dataset ids and German labels based on the contents of https://data.statistik.gv.at/web/catalog.jsp. ```{r, eval = FALSE} all_datasets <- od_list() @@ -141,7 +141,7 @@ json <- od_json(id) json ``` -This output is generated from `r style_resource(id, ext = "json")` and shows a summary of the available metatata. Other parts of the metadata can be extracted with `$` using the keys from the json specification. +This output is generated from `r style_resource(id, ext = "json")` and shows a summary of the available metadata. Other parts of the metadata can be extracted with `$` using the keys from the json specification. ```{r} json$extras$update_frequency @@ -155,7 +155,7 @@ json$extras$update_frequency x <- od_table("OGD_bevstandjbab2002_BevStand_2020") ``` -The `r tippy_dataset(x, "population dataset")` measures the austrian population +The `r tippy_dataset(x, "population dataset")` measures the Austrian population for `r nrow(x$field("Commune"))` different regions. ```{r} diff --git a/vignettes/od_resources.Rmd b/vignettes/od_resources.Rmd index 842a4f5b..48ef4b99 100644 --- a/vignettes/od_resources.Rmd +++ b/vignettes/od_resources.Rmd @@ -27,7 +27,7 @@ By default, `r STATcubeR` caches all accessed resources from `r ogd_portal` in t od_cache_dir() ``` -Let's examine for example what happens when the data from the structure of earnings suvey (SES) is requested. +Let's examine for example what happens when the data from the structure of earnings survey (SES) is requested. ```{r} earnings <- od_table("OGD_veste309_Veste309_1") @@ -42,12 +42,12 @@ earnings$resources ``` `last_modified` tells us when the resource was changed on the fileserver. -If a resource does not exist in the cache or if the last modified entry in the json is newer than the cached file, it will be dowloaded from the server. +If a resource does not exist in the cache or if the last modified entry in the json is newer than the cached file, it will be downloaded from the server. Otherwise, the cached version is reused. ## Access and Updates -Cached files can be acessed with `od_cache_file()`. +Cached files can be accessed with `od_cache_file()`. If the specified file exists in the cache, a path to the file will be returned. Otherwise, the file is downloaded to the cache and then the path is returned. The files use the same naming conventions as the open data fileserver. @@ -68,7 +68,7 @@ od_resource("OGD_veste309_Veste309_1", "C-A11-0") ``` The parser behaves differently for header files, data files and fields. -Json files can be acessed with `od_json()`. +Json files can be accessed with `od_json()`. ```{r} json <- od_json("OGD_veste309_Veste309_1") diff --git a/vignettes/od_table.Rmd b/vignettes/od_table.Rmd index c9e08978..e7249ac9 100644 --- a/vignettes/od_table.Rmd +++ b/vignettes/od_table.Rmd @@ -39,7 +39,7 @@ To import a dataset, provide the dataset id as an argument. table <- od_table("OGD_krebs_ext_KREBS_1") ``` -This returns an object of class [`od_table`], which bundles all the data from the OGD portal that correspons to this dataset. +This returns an object of class [`od_table`], which bundles all the data from the OGD portal that corresponds to this dataset. Printing the object will show a summary of the contents. ```{r} @@ -50,20 +50,20 @@ The dataset contains the number of cancer patients by several classification fie - **tumor type** differentiates **<95>** types of cancers - The **reporting period** spans **<37>** years (1983 to 2019). -- The **regional** variable contains the **<9>** NUTS-2 regions of austria. +- The **regional** variable contains the **<9>** NUTS-2 regions of Austria. - The **demographic** variable "Sex" is reported with **<2>** levels ## Convert to a data frame The method `$tabulate()` can be used to turn the object into a `data.frame` in long format, -which contains labled data. +which contains labeled data. ```{r} table$tabulate() ``` The dataset contains `r nrow(table$data)` rows. -If every combination of tumor type, year, region and sex would contain a seperate row the number of rows would be the following. +If every combination of tumor type, year, region and sex would contain a separate row the number of rows would be the following. \[ 95\times37\times9\times2 = 63270 @@ -98,13 +98,13 @@ options(tibble.print_min = 5) ``` The method `table$field()` can be used to get information about specific classification fields. -Thise contain data from `{dataset_id}_{field_code}.csv`. -Unlike the metadata in `sc_table`, the `od_table` class always contains german and english labels. +These contain data from `{dataset_id}_{field_code}.csv`. +Unlike the metadata in `sc_table`, the `od_table` class always contains German and English labels. Both can be used to label the dataset. #### Tumor type -The following call gives access to the german and english labels for the 95 differen tumor types in the `"cancer type"` classification. +The following call gives access to the German and English labels for the 95 different tumor types in the `"cancer type"` classification. Click `"Year"` above to see information about the years. ```{r} @@ -127,7 +127,7 @@ table$field("C-BERJ-0") #### Province -The regional classification contains 9 elements which correspond to the NUTS2 regions ("Bundesländer") of austria. +The regional classification contains 9 elements which correspond to the NUTS2 regions ("Bundesländer") of Austria. ```{r} table$field("C-BUNDESLAND-0") @@ -137,7 +137,7 @@ table$field("C-BUNDESLAND-0") #### Sex -Sex is coded as a ditochome variable with the classification elements `"male"` and `"female"`. +Sex is coded as a dichotomous variable with the classification elements `"male"` and `"female"`. ```{r} table$field("C-KRE_GESCHLECHT-0") @@ -147,7 +147,7 @@ table$field("C-KRE_GESCHLECHT-0") ### json Metadata {.tabset .tabset-pills} -The json metadatafile `r style_resource("OGD_krebs_ext_KREBS_1", ext = "json")` is available via the `$json` binding. +The json metadata file `r style_resource("OGD_krebs_ext_KREBS_1", ext = "json")` is available via the `$json` binding. #### Cancer @@ -193,8 +193,8 @@ table$data levels(table$data$`C-BUNDESLAND-0`) == table$field("C-BUNDESLAND-0")$code ``` -As mentioned above, a labelled version of the data can be obtained via `table$tabulate()`. -The labelling is done by taking the raw dataset and then joining the labes from `$header` and `$field()`. +As mentioned above, a labeled version of the data can be obtained via `table$tabulate()`. +The labeling is done by taking the raw dataset and then joining the labels from `$header` and `$field()`. ```{r} table$tabulate() @@ -205,9 +205,9 @@ You can read more about `$tabulate()` in the `r ticle('sc_tabulate')`. ## A Trip to Germany {#sauerkraut} -It is possible to switch the language used for labelling the dataset using the `$language` field. +It is possible to switch the language used for labeling the dataset using the `$language` field. This field can be used to get and set the language. -Allowed options are `"en"` for english and `"de"` for german. +Allowed options are `"en"` for English and `"de"` for German. ```{r, collapse=TRUE} table$language @@ -216,7 +216,7 @@ table$language ``` This option affects the `print()` method as well as the output of `$tabulate()`. -If no english labels are available, the german labels are used as a fallback mechanism. +If no English labels are available, the German labels are used as a fallback mechanism. ```{r} table diff --git a/vignettes/sc_cache.Rmd b/vignettes/sc_cache.Rmd index a81948fb..5c7cc1eb 100644 --- a/vignettes/sc_cache.Rmd +++ b/vignettes/sc_cache.Rmd @@ -40,11 +40,11 @@ Caching will affect all calls to `sc_table()` and `sc_schema()` as well as their `sc_table_saved()`, `sc_table_custom()`, `sc_schema_db()`, `sc_schema_catalogue()`. If the same resource is requested several times, the last valid API response is reused. -Invalid resposes (such as 404 responses) will not be added to the cache. +Invalid responses (such as 404 responses) will not be added to the cache. Cache files always contain unparsed API responses as returned by `httr::GET()` or `httr::POST()`. Responses are stored in an `rds` format. -If caching is enabled, the corresponding cache files to an object of class `sc_schema` or `sc_table` can be retieved using `sc_cache_files()`. +If caching is enabled, the corresponding cache files to an object of class `sc_schema` or `sc_table` can be retrieved using `sc_cache_files()`. ```{r} sc_example("accomodation") %>% sc_table(language = "both") %>% sc_cache_files() @@ -52,7 +52,7 @@ sc_schema_catalogue() %>% sc_cache_files() ``` Note that the first call to `sc_cache_files()` returned two paths. -Since the table was requested in two languages, two api responses are necessary to construct the table object. +Since the table was requested in two languages, two API responses are necessary to construct the table object. The content of the cache files can be parsed using `readRDS()` and `httr::content()`. This gives direct access to the API response in a `list()` format. @@ -76,8 +76,8 @@ sc_cache_clear() ## Should I use caching? If you are using `r STATcubeR` interactively, the answer is probably no. -However, when building applications that rely on STATcube data caching can be a usefiul way to decrease the traffic with the STATcube server. -Another usecase for caching is if you are writing `{rmarkdown}` documents that rely on STATcube data. +However, when building applications that rely on STATcube data caching can be a useful way to decrease the traffic with the STATcube server. +Another use case for caching is if you are writing `{rmarkdown}` documents that rely on STATcube data. Caching makes those documents both reproducible and quicker to render. Please note that there is currently no reliable way to invalidate the cache. diff --git a/vignettes/sc_data.Rmd b/vignettes/sc_data.Rmd index 322807e9..1a1c6049 100644 --- a/vignettes/sc_data.Rmd +++ b/vignettes/sc_data.Rmd @@ -15,7 +15,7 @@ options(tibble.print_min = 5) ``` The class [sc_data] defines a common interface for open data datasets and responses from the `/table` endpoint of the STATcube REST API. -It defines methods that are applicable to both datasources like aquiring metadata, labeling the data and aggregating results. +It defines methods that are applicable to both datasources like acquiring metadata, labeling the data and aggregating results. ## Constructing sc_data objects @@ -29,8 +29,8 @@ Therefore, objects of the class should be created with one of the following func * `sc_table_saved()` and `sc_table_custom()` also use the `/table` endpoint. However, the request is specified via ids rather than a json file. -To illustrate, we will use one of the OGD datasets to showcase the functionalities of this class. -Notice however, that objects created with `sc_table()` can be used interchangibly. +To illustrate, we will use one of the OGD datasets to showcase the functionality of this class. +Notice however, that objects created with `sc_table()` can be used interchangeably. ```{r} x <- od_table("OGD_krebs_ext_KREBS_1") @@ -117,7 +117,7 @@ x$field("Sex") ## Tabulation The method `$tabulate()` can be used to turn `sc_table` objects into tidy data.frames. -See the `r ticle("sc_tabulate")` for more defails. +See the `r ticle("sc_tabulate")` for more defaults. ```{r} x$tabulate() diff --git a/vignettes/sc_info.Rmd b/vignettes/sc_info.Rmd index be2d6b74..e8069472 100644 --- a/vignettes/sc_info.Rmd +++ b/vignettes/sc_info.Rmd @@ -98,7 +98,7 @@ request to `sc_table()` is sent several times, this will not count towards the rate-limit (100 requests per hour).~~ Server-Side caching of [`/table`] responses is currently disabled due to security reasons. -Therefore, all requests against the [`/table`] endpoint count towards the ratelimit. +Therefore, all requests against the [`/table`] endpoint count towards the rate-limit. [`/info`]: https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/info-endpoint [`/rate_limit`]: https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/rate-limit diff --git a/vignettes/sc_key.Rmd b/vignettes/sc_key.Rmd index 6098103e..7ee2978e 100644 --- a/vignettes/sc_key.Rmd +++ b/vignettes/sc_key.Rmd @@ -49,7 +49,7 @@ You can set the key persistently by modifying `.Renviron`. This can be done easily with `usethis::edit_r_environ()`. By default, `sc_key_set()` will perform a test request to verify that the key is valid -and throw an error if the test request is unsuccessfull. +and throw an error if the test request is unsuccessful. ```r sc_key_set('wrong key') diff --git a/vignettes/sc_last_error.Rmd b/vignettes/sc_last_error.Rmd index 1153108d..2ea267d0 100644 --- a/vignettes/sc_last_error.Rmd +++ b/vignettes/sc_last_error.Rmd @@ -15,7 +15,7 @@ This article explains how `r STATcubeR` deals with situations where the communic ## Throw all the Errors All http errors codes from the REST API will be turned into R errors. -More precisely, the following conditions are checked to determine wether a request to the STATcube API was successful +More precisely, the following conditions are checked to determine whether a request to the STATcube API was successful * The response returns `FALSE` for `httr::http_error()` which means the response status is less than 400. See the [MDN reference] for more information about http status codes. @@ -70,7 +70,7 @@ In practice, you should set up your key according to the `r ticle('sc_key')`. ### API Not Accessible -This occurs if `r STATcubeR` tries to send requests to a server which is not accessible for the current environment. This will result in a timout error. +This occurs if `r STATcubeR` tries to send requests to a server which is not accessible for the current environment. This will result in a timeout error. ``` Error in curl::curl_fetch_memory(url, handle = handle) : @@ -92,7 +92,7 @@ Reasons this error might occur 4. (Statistics Austria Employees only) You downloaded a json request from one of our internal STATcube servers and try to use this request with `sc_table()` from outside. -Note to future-self: It might be a good idea to set up some environment variables on Statistic Austrias internal R servers to avoid (3) and (4). +Note to future-self: It might be a good idea to set up some environment variables on Statistic Austria's internal R servers to avoid (3) and (4). ### Rate Limit Exceeded @@ -106,7 +106,7 @@ sc_table_saved("defaulttable_deake005") readRDS("sc_last_error/rate_limit.rds") %>% STATcubeR:::sc_check_response() ``` -If you encounter this error, please check if the rate limits are in fact a plauible reason by using `sc_rate_limit_table()`. +If you encounter this error, please check if the rate limits are in fact a plausible reason by using `sc_rate_limit_table()`. Unfortunately, the response for exceeded rate limits is very generic and can not be differentiated from the response for invalid json-bodies (see below). This is why the error message lists two possible reasons. @@ -209,7 +209,7 @@ shiny::observeEvent(input$button_load_data, { ``` `try()` will turn errors into "error-objects" of class `"try-error"`. -A conditional is then used to perform different actions for successfull and unsccessfull requests. +A conditional is then used to perform different actions for successful and unsuccessful requests. If an error occurs, the error details are fetched via `sc_last_error_parsed()` and then sent to an error handler. Otherwise, the return value from `sc_table_saved()` is processed by the success handler. diff --git a/vignettes/sc_schema.Rmd b/vignettes/sc_schema.Rmd index 4f5b4cda..71cd0265 100644 --- a/vignettes/sc_schema.Rmd +++ b/vignettes/sc_schema.Rmd @@ -24,16 +24,16 @@ endpoint. ## Browsing the Catalogue -The first function shows the catalogue, which lists all available +The first function shows the catalog, which lists all available databases in a tree form. The tree structure is determined by the API and -closely resembles the "Catalogue" view in the GUI. +closely resembles the "Catalog" view in the GUI. ```{r} my_catalogue <- sc_schema_catalogue() my_catalogue ``` -We see that the catalog has 8 child nodes: Four childs of type `FOLDER` and four childs of type `TABLE`. +We see that the catalog has 8 child nodes: Four children of type `FOLDER` and four children of type `TABLE`. The table nodes correspond to the saved tables as described in the `r ticle("sc_table_saved")`. The folders include all folders from the root level in the [catalogue explorer](`r sc_browse_catalogue()`): "Statistics", "Publication and Services" as well as "Examples". @@ -53,13 +53,13 @@ options(tibble.print_max = 5) my_catalogue$Statistics ``` -The child node `Statistics` is also of class `sc_schema` and shows all entries of the subfolder. +The child node `Statistics` is also of class `sc_schema` and shows all entries of the sub-folder. ```{r,fig.align='center', out.width='50%', echo=FALSE} knitr::include_graphics("img/catalogue3.png") ``` -This syntax can be used to navigate through folders and subfolders. +This syntax can be used to navigate through folders and sub-folders. ```{r} my_catalogue$Statistics$`Foreign Trade` @@ -77,7 +77,7 @@ my_catalogue$Statistics$`Foreign Trade`$Außenhandelsindizes ## Databases and Tables -Inside the catalogue, the leafs^[In the context of tree-like data structures, leafs are used to describe nodes of a tree which have no child nodes] of the tree are mostly of type `DATABASE` and `TABLE`. +Inside the catalog, the leafs^[In the context of tree-like data structures, leafs are used to describe nodes of a tree which have no child nodes] of the tree are mostly of type `DATABASE` and `TABLE`. ```{r} my_catalogue$Statistics$`Foreign Trade`$`Regional data by federal provinces` @@ -138,7 +138,7 @@ The leafs of database schemas are mostly of type `VALUE` and `MEASURE`. ## Data Structure of sc_schema Objects -As shown above, `sc_schema` objets have a tree like structure. +As shown above, `sc_schema` objects have a tree like structure. Each `sc_schema` object has `id`, `label`, `location` and `type` as the last four entries ```{r} @@ -149,11 +149,11 @@ str(tail(my_catalogue$Statistics, 4)) Schema objects can have an arbitrary amount of children. Children are always of type `sc_schema`. `x$type` contains the type of the schema object. -A complete list of schema types is avilable in the [API reference](https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/schema-endpoint). +A complete list of schema types is available in the [API reference](https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/schema-endpoint). ## Other Resources -Information about resources other than databases and the catalogue can +Information about resources other than databases and the catalog can be obtained by passing the resource id to `sc_schema()`. ```{r, collapse=TRUE} @@ -184,7 +184,7 @@ group_info <- my_db_info$`Demographic Characteristics`$id %>% ## Printing with data.tree If the `{data.tree}` package is installed, it can be used for an alternative -print mehtod. +print method. ```{r} print(group_info, tree = TRUE) @@ -199,7 +199,7 @@ options(STATcubeR.print_tree = TRUE) ## Flatten a Schema The function `sc_schema_flatten()` can be used to turn responses from the `/schema` endpoint into `data.frame`s. -The following call extracts all databasess from the catalogue and displays their ids and labels. +The following call extracts all databases from the catalog and displays their ids and labels. ```{r} sc_schema_catalogue() %>% @@ -208,9 +208,9 @@ sc_schema_catalogue() %>% The string `"DATABASE"` in the previous example acts as a filter to make sure only nodes with the schema type `DATABASE` are included in the table. -If `"DATABASE"` is relaced with `"TABLE"`, all tables will be displayed. This includes +If `"DATABASE"` is replaced with `"TABLE"`, all tables will be displayed. This includes -* All the defaulttables on STATcube. +* All the default-tables on STATcube. Most databases have an associated default table. * All saved tables for the current user as described in the `r ticle("sc_table_saved")`. * Other saved tables. diff --git a/vignettes/sc_table.Rmd b/vignettes/sc_table.Rmd index e8f4955b..0c4ba006 100644 --- a/vignettes/sc_table.Rmd +++ b/vignettes/sc_table.Rmd @@ -84,13 +84,13 @@ If you prefer to use codes rather than labels, use `my_table$data` instead. my_table$data ``` -## Example datasets {.tabsert .tabset-pills} +## Example datasets {.tabset .tabset-pills} -This article used a dataset about the austrian populatio n via `sc_example()`. +This article used a dataset about the Austrian population via `sc_example()`. `r STATcubeR` contains more example jsons to get started. The datasets can be listed with `sc_examples_list()`. -### Accomodation +### Accommodation ```{r} sc_example("accomodation.json") %>% sc_table() @@ -132,12 +132,12 @@ sc_example("agriculture_prices.json") %>% sc_table() sc_example("economic_trend_monitor.json") %>% sc_table() ``` -## Choosing the Language {.tabsert .tabset-pills} +## Choosing the Language {.tabset .tabset-pills} -The language which is used for labelling can be changed via the `language` +The language which is used for labeling can be changed via the `language` parameter of `sc_table()`. -### Accomodation +### Accommodation ```{r} sc_example("accomodation.json") %>% sc_table("de") @@ -181,12 +181,12 @@ sc_example("economic_trend_monitor.json") %>% sc_table("de") ## Further reading -* Functionalities of the returned object are explained in the `r ticle("sc_data")`. -* `sc_tabulate()` provides a more flixble way of turning STATcube tables into +* The functionality of the returned object are explained in the `r ticle("sc_data")`. +* `sc_tabulate()` provides a more flexible way of turning STATcube tables into `data.frame`s. See the `r ticle("sc_tabulate")` for more details. * The `r ticle("sc_table_saved")` shows an alternative way of importing tables. * If you are interested in other API endpoints, see the `r ticle("sc_schema")` - ot the `r ticle("sc_info")` + or the `r ticle("sc_info")` [`/table` endpoint]: https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/table-endpoint [download options]: https://docs.wingarc.com.au/superstar/9.12/superweb2/user-guide/download-tables diff --git a/vignettes/sc_table_custom.Rmd b/vignettes/sc_table_custom.Rmd index a77ff2ea..fec167f7 100644 --- a/vignettes/sc_table_custom.Rmd +++ b/vignettes/sc_table_custom.Rmd @@ -1,7 +1,7 @@ --- title: "Define Custom Tables" description: > - Define custom requests against the `/table` endpoint programatically + Define custom requests against the `/table` endpoint programmatically by providing character vectors with ids of databases, measures and classification fields. link_text: "custom tables article" --- @@ -31,7 +31,7 @@ show_json <- function(x) { } ``` -The function `sc_table_custom()` allows you to define requests against the `/table` endpoint programatically. +The function `sc_table_custom()` allows you to define requests against the `/table` endpoint programmatically. This can be useful to automate the generation of `/table` request rather than relying on the GUI to do so. The function accepts the four arguments. @@ -40,7 +40,7 @@ The function accepts the four arguments. - ids of **fields** to be imported (type `FIELD` or `VALUESET`) - a list of **recodes** that can be used customize fields -## Bulding a Custom Table Step by Step {#step-by-step} +## Building a Custom Table Step by Step {#step-by-step} The first part of this Article will showcase how custom tables can be created with a [database about tourism](`r sc_browse_database("detouextregsai")`). @@ -49,7 +49,7 @@ This database will also be used in most other examples of this article. ### Starting Simple {#database} First, we want to just send the database id to `sc_table_custom()`. -This will request only the mandirory fields and default measures for that database. +This will request only the mandatory fields and default measures for that database. In case of the tourism database, a table with one single row is returned. ```{r} @@ -62,7 +62,7 @@ x$tabulate() show_json(x) ``` -We see that `r format(x$tabulate()[[2]], big.mark = " ")` nights were spent in austrian tourism establishments in the month of `r x$tabulate()[[1]]`. +We see that `r format(x$tabulate()[[2]], big.mark = " ")` nights were spent in Austrian tourism establishments in the month of `r x$tabulate()[[1]]`. ### Adding Countries {#field} @@ -74,7 +74,7 @@ tourism <- sc_schema_db(database) (fields <- sc_schema_flatten(tourism, "FIELD")) ``` -If we want to add "Country of origin" we need to include the fouth entry of the `id` column in our request. +If we want to add "Country of origin" we need to include the fourth entry of the `id` column in our request. ```{r} x <- sc_table_custom(tourism, dimensions = fields$id[4]) @@ -132,24 +132,24 @@ show_json(x) We can see in [the GUI](`r sc_browse_database("detouextregsai")`) that "Country of origin" is a hierarchical classification. If we look at the table above, only the top level of the hierarchy (Austria, Germany, other) is used. -This can be changed by providing the the valueset that corresponds to the more granular classification of "country of origin" +This can be changed by providing the the value-set that corresponds to the more granular classification of "country of origin" ```{r,fig.align='center', out.width='50%', echo=FALSE} knitr::include_graphics("img/hierarchical_classification.png") ``` -The different valuesets for "country of origin" can be compared by browsing the database schema. +The different value-sets for "country of origin" can be compared by browsing the database schema. ```{r} (valuesets <- tourism$`Other Classifications`$`Country of origin`) ``` -We can see that the two levels of the hierarchy are represented by the two valuesets. -The valueset "Herkunftsland" uses 3 classification elements and represents the top level of the hierarchy (Austria, Germany, Other). -The valueset "Country of origin" uses 87 (10+8+69) classification elements and is the bottom level of the hierarchy. -For classifications with more levels of hierarchies, more valuesets will be present. +We can see that the two levels of the hierarchy are represented by the two value-sets. +The value-set "Herkunftsland" uses 3 classification elements and represents the top level of the hierarchy (Austria, Germany, Other). +The value-set "Country of origin" uses 87 (10+8+69) classification elements and is the bottom level of the hierarchy. +For classifications with more levels of hierarchies, more value-sets will be present. -We will now use the id for the first valueset in the `dimensions` parmaeter of `sc_table_custom`. +We will now use the id for the first value-set in the `dimensions` parameter of `sc_table_custom`. ```{r} x <- sc_table_custom( @@ -164,11 +164,11 @@ x$tabulate() show_json(x) ``` -It is possible to use a mixture of valuesets and fields in the `dimensions` parameter. +It is possible to use a mixture of value-sets and fields in the `dimensions` parameter. ## Using Counts {#counts} -Instead of Measures and Valuesets, it is also possible to provide counts +Instead of Measures and Value-sets, it is also possible to provide counts in the `measure` parameter of `sc_table_custom()`. ```{r} @@ -190,17 +190,17 @@ This might be more complicated than filtering the data in R but offers some important advantages. - **performance** Traffic between the client and server is reduced which might - lead to consierably faster API responses. + lead to considerably faster API responses. - **cell limits (user)** Apart from rate limits (see `?sc_rate_limits`), STATcube also limits the amount of cells that can be fetched per user. Filtering data can be useful to preserve this quota. -- **cell limits (equest)** If a single request would contain more than 1 million +- **cell limits (request)** If a single request would contain more than 1 million cells, a [cell count error](sc_last_error.html#CELL_COUNT) is thrown. ### Filtering Data {#filter} -As an example for fiitering data, we can request a table from the tourism +As an example for filtering data, we can request a table from the tourism database and only select some countries for `Country of origin`. ```{r} @@ -251,16 +251,16 @@ show_json(x) ``` This table contains data for two country-groups and two months. -In this case, the cell values for Gemany and the Netherlands are just added -to calculate the etries for Arrivals and Nights spent. -However, in other cases STATcubeR might decide it is more appropriate +In this case, the cell values for Germany and the Netherlands are just added +to calculate the entries for Arrivals and Nights spent. +However, in other cases STATcube might decide it is more appropriate to use weighted means or other more complicated aggregation methods. ### Adding Totals {#totals} The `total` parameter in `sc_recode()` can be used to request totals for -classifications. As an example, let's look at the tourism acivity in the -capital cities of austria +classifications. As an example, let's look at the tourism activity in the +capital cities of Austria ```{r} destination <- tourism$`Other Classifications`$`Tourism commune [ABO]`$ @@ -284,7 +284,7 @@ show_json(x) We see that there are two rows in the table where Tourism commune is set to "Total". The corresponding values represent the sum of all Arrivals -or Nights spent in either of these three cities durng that month. +or Nights spent in either of these three cities during that month. ### Recoding across hierarchies {#recode-hierarchy} @@ -337,8 +337,8 @@ If `dry_run` is set to `FALSE` (the default), STATcubeR will send the request to the API even if inconsistencies are detected. This will likely lead to an error of the form ["expected json but got html"]. -If you get spurious warnings or have suggestions on how these typechecks might -be improved, please issue a feature request to the [STATcubeR bugtracker]. +If you get spurious warnings or have suggestions on how these type-checks might +be improved, please issue a feature request to the [STATcubeR bug tracker]. ## Further Reading diff --git a/vignettes/sc_tabulate.Rmd b/vignettes/sc_tabulate.Rmd index f9f57bfd..f85e05c9 100644 --- a/vignettes/sc_tabulate.Rmd +++ b/vignettes/sc_tabulate.Rmd @@ -166,11 +166,11 @@ x <- sc_table(sc_example("accomodation")) x$meta$fields ``` -### Including totals in the oputput +### Including totals in the output It is not necessary that all fields have totals. For example, suppose we want to include the totals for `Sex` in the output table. -We can just remove the toal code before running `sc_tabulate()`. +We can just remove the total code before running `sc_tabulate()`. The special symbol `NA` can be used to unset a total code. ```{r} @@ -188,21 +188,21 @@ earnings$language <- "de" earnings$tabulate("Geschlecht") ``` -To skip labelling altogether and use variable codes in the output, use `raw=TRUE`. +To skip labeling altogether and use variable codes in the output, use `raw=TRUE`. ```{r} earnings$tabulate("Geschlecht", raw = TRUE) ``` Switching languages is always available for `od_table()` objects. -For `sc_table()`, it depends on whcih languages were requested. +For `sc_table()`, it depends on which languages were requested. ```{r} -# default: get labels in german and english +# default: get labels in German and English x <- sc_table(sc_example("accomodation")) -# only get english labels +# only get English labels x <- sc_table(sc_example("accomodation"), lang = "en") -# only get german labels +# only get German labels x <- sc_table(sc_example("accomodation"), lang = "de") ``` @@ -220,10 +220,10 @@ In the above example, `"2. Quartil"` was matched to `"2. Quartil (Median)"`. ## Programmatic usage -Notice that we used the german label for the column `"Sex"` in the last calls +Notice that we used the German label for the column `"Sex"` in the last calls to `tabulate()`. This is necessary because only the "active" labels are available to define the tabulation. If you want to use `r STATcubeR` -programatically, always use codes to define the tabulation and also use the +programmatically, always use codes to define the tabulation and also use the `.list` parameter if you want to pass several codes. ```{r, echo = FALSE} From 8b32c88d2c39c698822cca1da9da168b35e96049 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Thu, 23 Feb 2023 13:32:37 +0100 Subject: [PATCH 43/84] don't use data.frame() add a new helper function data_frame() which generates data frame objects and automatically takes care of some common points - to avoid problems with stringsAsFactors, use the vctrs constructor - add the "tbl" class to enable printing the data frames similar to tibbles this makes it possible to skip ssetting strngsAsFactors repedetly and also avoids some places where `class<-` was used previously --- R/od_cache.R | 13 +++++++------ R/od_list.R | 15 ++++++--------- R/od_resource.R | 7 +++---- R/od_table.R | 2 +- R/od_utils.R | 11 +++++------ R/other_endpoints.R | 2 +- R/print.R | 2 +- R/table.R | 7 +++---- R/table_as_data_frame.R | 2 +- R/table_meta.R | 20 ++++++++------------ R/utils.R | 4 ++++ 11 files changed, 40 insertions(+), 45 deletions(-) diff --git a/R/od_cache.R b/R/od_cache.R index 11e56a14..57a19a30 100644 --- a/R/od_cache.R +++ b/R/od_cache.R @@ -39,7 +39,7 @@ od_cache_summary <- function(server = "ext") { field <- substr(files[is_field], 1 + pos_underscore[is_field], nchar(files[is_field]) - 4) id <- substr(files[is_field], 1, pos_underscore[is_field] - 1) sizes_fields <- file.size(file.path(od_cache_dir(), files[is_field])) %>% split(id) %>% sapply(sum) - fields <- data.frame(id, field, stringsAsFactors = FALSE) + fields <- list(id = id, field = field) files <- files[!is_field] pos_underscore <- as.integer(gregexpr("_HEADER", files)) @@ -48,17 +48,18 @@ od_cache_summary <- function(server = "ext") { files <- files[!is_header] id_data <- substr(files, 1, nchar(files) - 4) all_ids <- unique(c(id_data, id_header, fields$id)) - data.frame( - id = all_ids, + res <- data_frame( + id = all_ids %>% `class<-`(c("ogd_id", "character")), updated = file.mtime(paste0(cache_dir, all_ids, ".json")), json = file.size(paste0(cache_dir, all_ids, ".json")), data = file.size(paste0(cache_dir, all_ids, ".csv")), header = file.size(paste0(cache_dir, all_ids, "_HEADER.csv")), fields = sizes_fields[match(unique(fields$id), all_ids)], n_fields = match(fields$id, all_ids) %>% factor(seq_along(all_ids)) %>% - table() %>% as.integer(), - row.names = NULL, stringsAsFactors = FALSE - ) %>% `class<-`(c("tbl", "data.frame")) + table() %>% as.integer() + ) + class(res$updated) <- c("sc_dttm", class(res$updated)) + res } diff --git a/R/od_list.R b/R/od_list.R index f1966056..98c984cb 100644 --- a/R/od_list.R +++ b/R/od_list.R @@ -43,11 +43,10 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) { xml2::xml_find_all(".//a") # ids - df <- data.frame( - category = "NA", + df <- data_frame( + category = rep("NA", length(el)), id = el %>% xml2::xml_attr("aria-label"), - label = el %>% xml2::xml_text(), - stringsAsFactors = FALSE + label = el %>% xml2::xml_text() ) ignored_labels <- c("[Alle \u00f6ffnen]", "[Alle schlie\u00dfen]", @@ -67,7 +66,7 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) { df <- df[!(df$id %in% od_resource_blacklist), ] rownames(df) <- NULL attr(df, "od") <- r$times[["total"]] - df %>% `class<-`(c("tbl", "data.frame")) + df } #' Get a catalogue for OGD datasets @@ -130,7 +129,7 @@ as_df_jsons <- function(jsons) { } descs <- sapply(jsons, function(x) x$extras$attribute_description) %>% paste0(";", .) - out <- data.frame( + out <- data_frame( title = sapply(jsons, function(x) x$title), measures = gregexpr(";F-", descs) %>% sapply(length), fields = gregexpr(";C-", descs) %>% sapply(length), @@ -145,12 +144,10 @@ as_df_jsons <- function(jsons) { update_frequency = sapply(jsons, function(x) x$extras$update_frequency), tags = I(lapply(jsons, function(x) unlist(x$tags))), categorization = sapply(jsons, function(x) unlist(x$extras$categorization[1])), - json = I(jsons), - stringsAsFactors = FALSE + json = I(jsons) ) out$modified <- parse_time(out$modified) out$created <- parse_time(out$created) - class(out) <- c("tbl", class(out)) out } diff --git a/R/od_resource.R b/R/od_resource.R index ccbc2223..55d82907 100644 --- a/R/od_resource.R +++ b/R/od_resource.R @@ -141,15 +141,14 @@ od_resource_parse_all <- function(resources, server = "ext") { }) od <- lapply(parsed, attr, "od") - data.frame( + data_frame( name = sapply(resources, function(x) x$name), last_modified = lapply(od, function(x) x$last_modified) %>% do.call(c, .), cached = lapply(od, function(x) x$cached) %>% do.call(c, .), size = sapply(od, function(x) x$size), download = vapply(od, function(x) x$download, 1.0), parsed = sapply(od, function(x) x$parsed), - data = I(parsed %>% lapply(`attr<-`, "od", NULL)), - stringsAsFactors = FALSE + data = I(parsed %>% lapply(`attr<-`, "od", NULL)) ) } @@ -227,5 +226,5 @@ od_resource_all <- function(id, json = od_json(id), server = "ext") { class(out$name) <- c("ogd_file", "character") class(out$last_modified) <- c("sc_dttm", class(out$last_modified)) class(out$cached) <- c("sc_dttm", class(out$cached)) - out %>% `class<-`(c("tbl", "data.frame")) + out } diff --git a/R/od_table.R b/R/od_table.R index 9d646902..c94cdf82 100644 --- a/R/od_table.R +++ b/R/od_table.R @@ -112,7 +112,7 @@ od_table_class <- R6::R6Class( class(resources$name) <- c("ogd_file", "character") class(resources$last_modified) <- c("sc_dttm", class(resources$last_modified)) class(resources$cached) <- c("sc_dttm", class(resources$cached)) - resources %>% `class<-`(c("tbl", "data.frame")) + resources }, #' @field od_server #' The server used for initialization (see to `?od_table`) diff --git a/R/od_utils.R b/R/od_utils.R index 0093e4db..04a34934 100644 --- a/R/od_utils.R +++ b/R/od_utils.R @@ -29,7 +29,7 @@ od_attr <- function(json) { code <- c(code, substr(desc, index_code[i] + 1, next_col - 1)) label <- c(label, substr(desc, next_col + 1, index_end[i])) } - data.frame(label = label, code = code, stringsAsFactors = FALSE) + data_frame(label = label, code = code) } od_create_data <- function(id, json = od_json(id), lang = NULL, @@ -39,9 +39,8 @@ od_create_data <- function(id, json = od_json(id), lang = NULL, dat <- resources$data[[1]] header <- resources$data[[2]] meta <- list( - source = data.frame(code = id, label = NA, label_de = json$title, - label_en = json$extras$en_title_and_desc, - stringsAsFactors = FALSE), + source = data_frame(code = id, label = NA, label_de = json$title, + label_en = json$extras$en_title_and_desc), measures = header[substr(header$code, 1, 1) == "F", ], fields = header[substr(header$code, 1, 1) == "C", ] ) @@ -81,10 +80,10 @@ od_create_data <- function(id, json = od_json(id), lang = NULL, resources$name <- paste0(resources$name, ".csv") od <- attr(json, "od") - resources <- rbind(data.frame( + resources <- rbind(data_frame( name = paste0(id, ".json"), last_modified = json$extras$metadata_modified %>% as.POSIXct(format = "%Y-%m-%dT%H:%M:%OS"), cached = od$cached, - size = od$size, download = od$download, parsed = NA, stringsAsFactors = FALSE), resources[1:6] + size = od$size, download = od$download, parsed = NA), resources[1:6] ) list(data = dat, meta = meta, fields = fields, resources = resources, diff --git a/R/other_endpoints.R b/R/other_endpoints.R index 35672bc6..6cd40fe6 100644 --- a/R/other_endpoints.R +++ b/R/other_endpoints.R @@ -25,7 +25,7 @@ sc_info <- function(language = c("en", "de"), key = NULL, server = "ext") { info_content <- httr::content(response) info_content$languages %>% lapply(function(x) - data.frame(locale = x$locale, displayName = x$displayName)) %>% + data_frame(locale = x$locale, displayName = x$displayName)) %>% do.call(rbind, .) } diff --git a/R/print.R b/R/print.R index 409966fb..bbb648fb 100644 --- a/R/print.R +++ b/R/print.R @@ -39,7 +39,7 @@ print.sc_tibble_meta <- function(x, ...) { } sc_tibble <- function(x) { - class(x) <- c("sc_tibble", "tbl", class(x)) + class(x) <- unique(c("sc_tibble", "tbl", class(x))) x } diff --git a/R/table.R b/R/table.R index e82e79f9..8a51453a 100644 --- a/R/table.R +++ b/R/table.R @@ -135,18 +135,17 @@ sc_table_class <- R6::R6Class( #' two columns for the annotation keys and annotation labels. annotation_legend = function() { am <- self$raw$annotationMap - data.frame(annotation = names(am), label = unlist(am), row.names = NULL) + data_frame(annotation = names(am), label = unlist(am)) }, #' @field rate_limit #' how much requests were left after the POST request for this table was sent? #' Uses the same format as [sc_rate_limit_table()]. rate_limit = function() { headers <- self$response$headers - res <- data.frame( + res <- list( remaining = headers$`x-ratelimit-remaining-table`, limit = headers$`x-ratelimit-table`, - reset = headers$`x-ratelimit-reset-table`, - stringsAsFactors = FALSE + reset = headers$`x-ratelimit-reset-table` ) class(res) <- "sc_rate_limit_table" res diff --git a/R/table_as_data_frame.R b/R/table_as_data_frame.R index dde9d200..82765381 100644 --- a/R/table_as_data_frame.R +++ b/R/table_as_data_frame.R @@ -28,7 +28,7 @@ sc_model_matrix <- function(dims) { rep(times = times, each = each) times <- times * dims[i] } - as.data.frame(out) + vctrs::new_data_frame(out) } sc_table_create_data <- function(content) { diff --git a/R/table_meta.R b/R/table_meta.R index 8235578d..d9f17224 100644 --- a/R/table_meta.R +++ b/R/table_meta.R @@ -36,42 +36,38 @@ summarize_annotations <- function(content, i) { sc_meta <- function(content) { measure_info <- lapply(seq_along(content$measures), function(i) { measure <- content$measures[[i]] - data.frame( + data_frame( label = measure$label, code = get_var_code(measure$measure), fun = measure$`function`, precision = content$cubes[[i]]$precision, annotations = summarize_annotations(content, i), - NAs = sum(unlist(content$cubes[[i]]$values) == 0), - stringsAsFactors = FALSE + NAs = sum(unlist(content$cubes[[i]]$values) == 0) ) }) %>% do.call(rbind, .) field_info <- lapply(content$fields, function(field) { has_total <- field$items[[length(field$items)]]$type == "Total" - data.frame( + data_frame( label = field$label, code = get_var_code(field$uri), nitems = length(field$items), type = sc_field_type(field), - total_code = ifelse(has_total, "SC_TOTAL", NA_character_), - stringsAsFactors = FALSE + total_code = ifelse(has_total, "SC_TOTAL", NA_character_) ) }) %>% do.call(rbind, .) - db_info <- data.frame( + db_info <- data_frame( label = content$database$label, - code = content$database$id, - stringsAsFactors = FALSE + code = content$database$id ) list(source = db_info, measures = measure_info, fields = field_info) } sc_meta_field <- function(field) { res <- lapply(field$items, function(item) { - data.frame( + data_frame( label = paste(item$labels, collapse = ";"), - code = get_item_code(item), - stringsAsFactors = FALSE + code = get_item_code(item) ) }) %>% do.call(rbind, .) res$parsed <- sc_field_parse(field) diff --git a/R/utils.R b/R/utils.R index 1fc25b10..d0d2a380 100644 --- a/R/utils.R +++ b/R/utils.R @@ -34,3 +34,7 @@ sc_language <- function(language = NULL, options = c("en", "de")) { language <- getOption("STATcubeR.language") match.arg(language, options) } + +data_frame <- function(...) { + vctrs::new_data_frame(list(...), class = "tbl") +} From 93e74a11bc05793b46400dacec569185919e6849 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 05:59:20 +0100 Subject: [PATCH 44/84] forward server argument to sc_key() --- R/key.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/key.R b/R/key.R index 4535cdf2..119fca8a 100644 --- a/R/key.R +++ b/R/key.R @@ -48,7 +48,7 @@ sc_key_set <- function(key, server = "ext", test = TRUE) { #' an error is thrown. #' @export sc_key_get <- function(server = "ext") { - if (!sc_key_exists()) + if (!sc_key_exists(server)) stop("No STATcube key available. Set key with sc_key_set()") invisible(Sys.getenv(sc_key_env_var(server))) } From d2d9992dc62b64e875b990fe1c7f1856a9a47a1f Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 06:00:41 +0100 Subject: [PATCH 45/84] fix printing of NAs in timestamps --- R/print.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/print.R b/R/print.R index bbb648fb..6ce0b955 100644 --- a/R/print.R +++ b/R/print.R @@ -55,10 +55,13 @@ pillar_shaft.sc_dttm <- function(x, ...) { ymd <- format(x, "%Y-%m-%d") hms <- cli::col_silver(format(x, "%H:%M:%S")) short <- ymd - ind <- as.numeric(Sys.time()) - as.numeric(x) < 60*24 + ind <- !is.na(x) & as.numeric(Sys.time()) - as.numeric(x) < 60*24 short[ind] <- hms[ind] + long <- paste(ymd, hms) + long[is.na(x)] <- NA + short[is.na(x)] <- NA pillar::new_pillar_shaft_simple( - paste(ymd, hms), + long, width = 19, min_width = 10, short_formatted = short, From bed0b8e6cfbabc45b0cbff4554a89da631fe95e7 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 06:02:09 +0100 Subject: [PATCH 46/84] skip example if no key available use another examplesIf clause to make sure devtoos::check() can be run without an API key --- R/table_custom.R | 2 +- man/sc_table_custom.Rd | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/table_custom.R b/R/table_custom.R index 8a1db6c4..3d0e2aa3 100644 --- a/R/table_custom.R +++ b/R/table_custom.R @@ -42,7 +42,7 @@ #' * all fields in `recodes` are also present in `dimensions` #' * the first two arguments of `sc_recode()` are consistent, i.e. #' if the provided `VALUE`s belong to the `VALUESET/FIELD` -#' @examples +#' @examplesIf sc_key_exists() #' sc_table_custom("str:database:detouextregsai") #' #' sc_table_custom( diff --git a/man/sc_table_custom.Rd b/man/sc_table_custom.Rd index 8dcaf9f6..15035197 100644 --- a/man/sc_table_custom.Rd +++ b/man/sc_table_custom.Rd @@ -96,6 +96,7 @@ if the provided \code{VALUE}s belong to the \code{VALUESET/FIELD} } \examples{ +\dontshow{if (sc_key_exists()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} sc_table_custom("str:database:detouextregsai") sc_table_custom( @@ -133,4 +134,5 @@ x <- sc_table_custom( ) ) x$tabulate() +\dontshow{\}) # examplesIf} } From 908ce3bdad6c49b4b842f1e1efdc356468394e4e Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 06:04:08 +0100 Subject: [PATCH 47/84] drop names in sc_recode if map is passed as a named vector, this is never supported. therefore, drop the names. This can be useful for subsetting schema objects via single square brackets --- R/table_custom.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/table_custom.R b/R/table_custom.R index 3d0e2aa3..e5ae249b 100644 --- a/R/table_custom.R +++ b/R/table_custom.R @@ -125,6 +125,8 @@ sc_recode <- function(field, map = NULL, total = FALSE) { return(stats::setNames(list(list(total = total)), as_id(field))) if (inherits(map, "sc_schema")) map <- list(map) + else + map <- stats::setNames(map, NULL) recode <- stats::setNames( list(list( map = lapply(map, function(value) { From a6292965346889aa1b31be8746af06e3f26a915c Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 06:07:42 +0100 Subject: [PATCH 48/84] don't drop unused levels in ogd fields dropping those levels causes hierachy information to become unavailable. some new client code now makes use of the hierarchies. The new behavior will generate the fiels as factor columns with unused factor levels. --- R/od_utils.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/od_utils.R b/R/od_utils.R index 04a34934..b5bd1009 100644 --- a/R/od_utils.R +++ b/R/od_utils.R @@ -48,12 +48,6 @@ od_create_data <- function(id, json = od_json(id), lang = NULL, fields <- lapply(seq_along(meta$fields$code), function(i) { code <- meta$fields$code[i] fld <- resources$data[[2 + i]] - udc <- unique(dat[[code]]) - stopifnot(all(udc %in% fld$code)) - if (verbose && length(udc) != nrow(fld)) - message("dropping unused levels in ", shQuote(code), ": ", - paste(shQuote(setdiff(fld$code, udc)), collapse = ", ")) - fld <- fld[fld$code %in% udc, ] fld$label_en[is.na(fld$label_en)] <- fld$label_de[is.na(fld$label_en)] fld }) From d3fbd984585985067451f44b7f858735244dfff8 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 06:19:36 +0100 Subject: [PATCH 49/84] add print method fror ogd_id --- R/print.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/R/print.R b/R/print.R index 6ce0b955..fc3c1b78 100644 --- a/R/print.R +++ b/R/print.R @@ -101,3 +101,20 @@ format.pillar_shaft_ogd_file <- function(x, width, ...) { } pillar::new_ornament(files, align = "left") } + +pillar_shaft.ogd_id <- function(x, ...) { + pillar::new_pillar_shaft(list(x = x), width = pillar::get_max_extent(x), + min_width = 20, class = "pillar_shaft_ogd_id", + type_sum = "chr") +} + +format.pillar_shaft_ogd_id <- function(x, width, ...) { + id <- x$x + too_long <- nchar(id) > width + id[too_long] <- paste0(substring(id[too_long], 1, width - 2), + cli::symbol$ellipsis) + id <- cli::style_hyperlink(id, paste0( + "https://data.statistik.gv.at/web/meta.jsp?dataset=", x$x)) + pillar::new_ornament(id, align = "left") +} + From b2fbe085a72eabcfeadeb20b08c9e2abad29e8f7 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 06:20:13 +0100 Subject: [PATCH 50/84] use "ogd_id" class in od_list() --- R/od_list.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/od_list.R b/R/od_list.R index 98c984cb..bb6c1842 100644 --- a/R/od_list.R +++ b/R/od_list.R @@ -66,6 +66,8 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) { df <- df[!(df$id %in% od_resource_blacklist), ] rownames(df) <- NULL attr(df, "od") <- r$times[["total"]] + class(df$id) <- c("ogd_id", "character") + class(df) <- c("tbl_df", class(df)) df } From 47ddfccea5d87364d29d09b80770df7ad2eebad1 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 06:23:21 +0100 Subject: [PATCH 51/84] show progress in od_catalogue() if json files are downloaded, use cli::progres_along() to show how many json files are still remaining --- R/od_list.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/R/od_list.R b/R/od_list.R index bb6c1842..8956893e 100644 --- a/R/od_list.R +++ b/R/od_list.R @@ -121,7 +121,15 @@ od_catalogue <- function(server = "ext", local = TRUE) { ids <- od_revisions(server = server) } timestamp <- switch(as.character(local), "TRUE" = NULL, "FALSE" = Sys.time()) - jsons <- lapply(ids, od_json, timestamp, server) + jsons <- lapply( + cli::cli_progress_along( + ids, type = "tasks", "downloading json metadata files"), + function(i) { + od_json(ids[i], timestamp, server) + } + ) + if (!local) + cli::cli_text("\rDownloaded {.field {length(ids)}} metadata files with {.fn od_json}") as_df_jsons(jsons) } From dcdd0c0b5a2755889ded4a75b777993a7edb62a0 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 06:33:55 +0100 Subject: [PATCH 52/84] use ogd_id class in catalogue --- R/od_list.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/od_list.R b/R/od_list.R index 8956893e..937ed554 100644 --- a/R/od_list.R +++ b/R/od_list.R @@ -158,6 +158,7 @@ as_df_jsons <- function(jsons) { ) out$modified <- parse_time(out$modified) out$created <- parse_time(out$created) + class(out$id) <- c("ogd_id", "character") out } From cc90ae3e4f495af9ed2986d2ba46cf21f2e31824 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 06:39:43 +0100 Subject: [PATCH 53/84] drop unused parameter the (internal) function for parsing open data datasets used to have a parameter to check if levels were dropped. This is no longer necessary since a629296 [ci skip] --- R/od_utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/od_utils.R b/R/od_utils.R index b5bd1009..7e432f5b 100644 --- a/R/od_utils.R +++ b/R/od_utils.R @@ -33,7 +33,7 @@ od_attr <- function(json) { } od_create_data <- function(id, json = od_json(id), lang = NULL, - server = "ext", verbose = FALSE) { + server = "ext") { lang <- sc_language(lang) resources <- od_resource_all(id, json, server) dat <- resources$data[[1]] From b09b4cdde9ef0316baae20c3d6ebbf0ea2ca52f8 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 06:55:27 +0100 Subject: [PATCH 54/84] simplify pkgdown setup don't use registerS3Method for derived classes of data.frame since it does not seem to make any difference --- vignettes/R/df_print.R | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/vignettes/R/df_print.R b/vignettes/R/df_print.R index 89b18ebe..466bccd3 100644 --- a/vignettes/R/df_print.R +++ b/vignettes/R/df_print.R @@ -16,16 +16,6 @@ registerS3method( envir = asNamespace("knitr") ) -registerS3method( - "knit_print", "sc_meta", knit_print.data.frame, - envir = asNamespace("knitr") -) - -registerS3method( - "knit_print", "sc_schema", knit_print.data.frame, - envir = asNamespace("knitr") -) - options(crayon.enabled = TRUE) options(pillar.min_chars = 30) options(pillar.bold = TRUE) From e23601cb7f4ff24a8a68063bbf5d17f9ccf6a505 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 07:01:52 +0100 Subject: [PATCH 55/84] add custom color palette for @examples the current palette of colors for sc_schema is only suited for dark editors. For now, add a light theme for the pkgdown reference pages TODO: once the annotation printing is implemented, think about adding a way to customize the color scheme in all STATcubeR. This could also take care of cli_theme_pkgdown() [ci skip] --- vignettes/R/setup.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vignettes/R/setup.R b/vignettes/R/setup.R index ca14df9e..036c78e9 100644 --- a/vignettes/R/setup.R +++ b/vignettes/R/setup.R @@ -1,4 +1,12 @@ pkgload::load_all(export_all = FALSE, reset = FALSE, helpers = FALSE, quiet = TRUE) source("R/df_print.R") knitr::opts_chunk$set(comment = "#>") +options( + STATcubeR.schema_colors = list( + "FOLDER" = "#4400cc", "DATABASE" = "#186868", "TABLE" = "#624918", + "GROUP" = "#4400cc", "FIELD" = "cyan", "VALUESET" = "cadetblue", + "VALUE" = "#4400cc", "MEASURE" = "#624918", "STAT_FUNCTION" = "cadetblue", + "COUNT" = "#624918" + ) +) source("R/add_tooltip.R")$value From e95f5e4f5611b9305783f01509eb4fd01c8b77d6 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 07:12:24 +0100 Subject: [PATCH 56/84] use target=_blank for statcube links make sure the links to statcube in the schema vignette open in a new tab TODO: do this globally in R/df_print.R an also use target=_blank for OGD links [ci skip] --- vignettes/sc_schema.Rmd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vignettes/sc_schema.Rmd b/vignettes/sc_schema.Rmd index 71cd0265..2220b895 100644 --- a/vignettes/sc_schema.Rmd +++ b/vignettes/sc_schema.Rmd @@ -237,4 +237,7 @@ sc_schema_db("dekonjunkturmonitor") %>% the `r ticle("sc_table_custom")` * See the `r ticle("sc_table_saved")` to get access to the data for table nodes in the schema. - . + +```{js, echo=FALSE} +$('[href^="https://portal.statistik.at/statistik.at/ext/statcube"]').attr("target", "_blank"); +``` From 737c1b477c563594964060c55c0892a2315d2546 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 27 Feb 2023 12:27:19 +0100 Subject: [PATCH 57/84] build external links with devtools::document() assume that the environment where document() runs can be characterized by the NOT_CRAN flag being present (not necessarily FALSE or TRUE). This is to avoid having links to the editing server in the manpages and on pkgdown this will only matter if the docs are built inside the STAT firewall. If at some point, the docs are built via gh-actions, this can be reverted --- R/browse.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/browse.R b/R/browse.R index 55184f72..9347ccb4 100644 --- a/R/browse.R +++ b/R/browse.R @@ -75,7 +75,7 @@ in_stat <- function() { } sc_url_gui <- function(server = "ext") { - if (server == "ext" && !in_stat()) + if (server == "ext" && (!in_stat() || Sys.getenv("NOT_CRAN") != "")) return("https://portal.statistik.at/statistik.at/ext/statcube/") if (server == "test") return("http://sdbtest:8081/statistik.at/wdev/statcube/") From ae804040a635356d5c53bff8962d84c5ed93908d Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 28 Feb 2023 15:44:33 +0100 Subject: [PATCH 58/84] allow node-types in sc_schema_flatten() the following expression will now show all folders from the catalogue sc_schema_catalogue() %>% sc_schema_flatten("FOLDER") previously, this would only have returned one entry containig the root folder because the recursion was stopped as soon as the appropriate schema type was detected this change also affects the schema types GROUP, MEASURE, FIELD and VALUESET in sc_schema_db() which can also have child nodes [ci skip] --- R/schema.R | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/R/schema.R b/R/schema.R index 7ab893ac..74f3a97e 100644 --- a/R/schema.R +++ b/R/schema.R @@ -147,15 +147,18 @@ print.sc_schema_flatten <- function(x, ...) { } sc_schema_flatten_impl <- function(resp, type) { - if (resp$type == type) - return(list(id = resp$id, label = resp$label)) - if (is.null(resp$children)) - return(NULL) - ret <- lapply(resp$children, sc_schema_flatten_impl, type) - list( - id = lapply(ret, function(x) { x$id }) %>% unlist(), - label = lapply(ret, function(x) { x$label }) %>% unlist() - ) + id <- character() + label <- character() + if (!is.null(resp$children)) { + ret <- lapply(resp$children, sc_schema_flatten_impl, type) + id <- lapply(ret, function(x) x$id) %>% unlist() + label <- lapply(ret, function(x) x$label) %>% unlist() + } + if (resp$type == type) { + id <- c(resp$id, id) + label <- c(resp$label, label) + } + list(id = id, label = label) } #' @describeIn sc_schema is similar to the From 1567f61f9646b0904e96891807dbc9db8c6df844 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 28 Feb 2023 17:42:23 +0100 Subject: [PATCH 59/84] check arg 'type' in sc_schema_flatten() check the argument against the list of available schema types. the argument is now also coerced via toupper() because the spelling in schema uris uses lowercase --- R/schema.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/schema.R b/R/schema.R index 74f3a97e..aac8801c 100644 --- a/R/schema.R +++ b/R/schema.R @@ -128,6 +128,7 @@ sc_as_nested_list <- function(x) { #' @export sc_schema_flatten <- function(x, type) { stopifnot(inherits(x, "sc_schema")) + type <- match.arg(toupper(type), names(sc_schema_colors())) response <- attr(x, "response") stopifnot(!is.null(response)) response <- httr::content(response) From f7bdba49056ef91acb442128fade1d9c294bc6da Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Tue, 28 Feb 2023 17:44:42 +0100 Subject: [PATCH 60/84] + sentence on empty folders in sc_schema() [ci skip] --- vignettes/sc_schema.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/sc_schema.Rmd b/vignettes/sc_schema.Rmd index 2220b895..637cd4f3 100644 --- a/vignettes/sc_schema.Rmd +++ b/vignettes/sc_schema.Rmd @@ -70,6 +70,7 @@ knitr::include_graphics("img/catalogue4.png") ``` In some cases, the API shows more folders than the GUI in which case the folders from the API will be empty. +Seeing an empty folder usually means that your STATcube user is not permitted to view the contents of the folder. ```{r} my_catalogue$Statistics$`Foreign Trade`$Außenhandelsindizes From 72091bc6198471c45029a52fe411ee6e71155704 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 1 Mar 2023 07:05:57 +0100 Subject: [PATCH 61/84] update sc_example("foregirn_trade.json") the nace classification in this database was updadet. Reflect this in the example request [ci skip] --- inst/json_examples/foreign_trade.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/inst/json_examples/foreign_trade.json b/inst/json_examples/foreign_trade.json index dafd5f99..65a9d692 100644 --- a/inst/json_examples/foreign_trade.json +++ b/inst/json_examples/foreign_trade.json @@ -14,17 +14,17 @@ [ "str:value:denatec06:F-DATA:C-NATEC_CPA-0:C-NATEC_CPA-0:NATEC_CPA-CPA_E" ] ] }, - "str:field:denatec06:F-DATA:C-NACEK2_3-0" : { + "str:field:denatec06:F-DATA:C-NACEK2_4-0" : { "map" : [ - [ "str:value:denatec06:F-DATA:C-NACEK2_3-0:C-NACEK2_2-0:NACE-BTE" ], - [ "str:value:denatec06:F-DATA:C-NACEK2_3-0:C-NACEK2_2-0:NACE-G" ], - [ "str:value:denatec06:F-DATA:C-NACEK2_3-0:C-NACEK2_2-0:NACE-AFHTU" ] + [ "str:value:denatec06:F-DATA:C-NACEK2_4-0:C-NACEK2_4-0:NACE-BTE" ], + [ "str:value:denatec06:F-DATA:C-NACEK2_4-0:C-NACEK2_4-0:NACE-G" ], + [ "str:value:denatec06:F-DATA:C-NACEK2_4-0:C-NACEK2_4-0:NACE-AFHTU" ] ] } }, "dimensions" : [ [ "str:field:denatec06:F-DATA:C-NATEC_CPA-0" ], [ "str:field:denatec06:F-DATA:C-A10-0" ], - [ "str:field:denatec06:F-DATA:C-NACEK2_3-0" ] + [ "str:field:denatec06:F-DATA:C-NACEK2_4-0" ] ] } From a6cef705c4195999e04ae73f9c73fd064878c9ee Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 1 Mar 2023 17:17:59 +0100 Subject: [PATCH 62/84] use output, not message in print method cli_text uses the message channel to generate the visible console outputs this is not what to exprect from a print method wich should always feed into stdout cli_text() is also used in other places of STATcubeR but always wrapped into cli_fmt() which means that output channels do not matter in those circumstances because the outputs are captured to be formatted elsewhere --- R/od_revisions.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/od_revisions.R b/R/od_revisions.R index e97c0c0d..02d954e0 100644 --- a/R/od_revisions.R +++ b/R/od_revisions.R @@ -41,15 +41,15 @@ print.od_revisions <- function(x, ...) { since <- attr(x, "since") response <- attr(x, "response") if (!is.null(since)) - cli::cli_text("{.strong {length(x)}} changes between + cli::format_inline("{.strong {length(x)}} changes between {.timestamp {attr(x, 'since')}} and - {.timestamp {response$date}}") + {.timestamp {response$date}}") %>% cat() else - cli::cli_text("{.strong {length(x)}} datasets are available - ({.timestamp {response$date}})") + cli::format_inline("{.strong {length(x)}} datasets are available ", + "({.timestamp {response$date}})\n") %>% cat() if (length(x) > 0) { y <- cli::cli_vec(x, list("vec-trunc" = 3)) - cli::cli_text("{.strong ids}: {.emph {y}}") + cli::format_inline("{.strong ids}: {.emph {y}}") %>% cat() } invisible(x) } From 99659dc1d7da6281d3a797c311c660b7294958a8 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 1 Mar 2023 17:20:27 +0100 Subject: [PATCH 63/84] add url for bug reports include another link to github into the DESCRIPTION metadata. this is common practice in most packages on CRAN [ci skip] --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 14d18c0b..0e259f19 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,6 +14,7 @@ Description: Import data from the STATcube REST API or from the open data License: GPL (>= 2) URL: https://statistikat.github.io/STATcubeR, https://github.com/statistikat/STATcubeR +BugReports: https://github.com/statistikat/STATcubeR/issues Imports: cli (>= 3.4.1), httr, From 2a6a9e1f7632ff5dffc81e90feb0a357a0bb4651 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Thu, 9 Mar 2023 11:36:22 +0100 Subject: [PATCH 64/84] fix: sc_table_saved_list() and no tables if there are no saved tables, the previous version generated an error of the form "expected character but got list" now, a data.frame with zero rows is returned instead TODO: it is probably a good idea to replace sapply() by vapply() everywhere in STATcubeR. Most static code alanyzers recommend this. [ci skip] --- R/table_saved.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/table_saved.R b/R/table_saved.R index 832bcf62..9475c81f 100644 --- a/R/table_saved.R +++ b/R/table_saved.R @@ -12,8 +12,8 @@ sc_table_saved_list <- function(key = NULL, server = "ext") { vctrs::new_data_frame(list( label = sapply(saved_tables, function(x) x$label), id = new_schema_uri( - sapply(saved_tables, function(x) x$id), - sapply(saved_tables, function(x) x$id) + vapply(saved_tables, function(x) x$id, ""), + vapply(saved_tables, function(x) x$id, "") ) ), class = c("tbl", "tbl_df")) } From 9ef5bbc7144500aeb59d5ced50d4b96276612a98 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 10 Mar 2023 10:49:25 +0100 Subject: [PATCH 65/84] allow STAT_ prefix in od_table() there is a new namespace of datasets coming up which will use the STAT_ prefix instead og OGD_ for the primary id of the dataset. Relax the input checks to allow OGD_ datasets to be fetched. For external users, this will only become relevant in a few months. --- R/od_resource.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/od_resource.R b/R/od_resource.R index 55d82907..70da1ff8 100644 --- a/R/od_resource.R +++ b/R/od_resource.R @@ -20,8 +20,9 @@ od_resource_blacklist <- c( ) od_resource_check_id <- function(id) { - if (substr(id, 1, 4) != "OGD_") - stop("Dataset ids must begin with \"OGD_\": ", shQuote(id), call. = FALSE) + if (!grepl("^OGD_", id) && !grepl("^STAT_", id)) + stop("Dataset ids must begin with \"OGD_\" or \"STAT_\": ", + shQuote(id), call. = FALSE) if (id %in% od_resource_blacklist) stop("Dataset ", shQuote(id), " was blacklisted in STATcubeR ", "because of inconsistent formats", call. = FALSE) From b777af9a845484669521695d7935544426a0054c Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Fri, 10 Mar 2023 11:06:46 +0100 Subject: [PATCH 66/84] od_table_local(): support $PublDateTime$ some internal datasets now use $PublDateTime$ as a placeholder for the deployment timestamp. Make sure that those datasets can be used with STATcubeR The way this is implemented now, reading and resaving a dataset is not a no-op because the interpolated value will be written in place of the placholder. There might come a point where it makes sense to implement this differently in order to preserve the placeholder [ci skip] --- R/od_table_save.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/R/od_table_save.R b/R/od_table_save.R index 704ba3c3..22dc9d52 100644 --- a/R/od_table_save.R +++ b/R/od_table_save.R @@ -83,9 +83,16 @@ od_table_local <- function(file) { od_table_local_paths <- function() { extracted <- dir() stopifnot(length(extracted) == 1) - json <- jsonlite::read_json(file.path(extracted, "meta.json")) + json_file <- file.path(extracted, "meta.json") + json <- jsonlite::read_json(json_file) id <- json$resources[[1]]$name stopifnot(is.character(id), length(id) == 1) + if (json$extras$metadata_modified == "$PublDateTime$") { + readLines(json_file) %>% + gsub("\\$PublDateTime\\$", json$extras$begin_datetime, .) %>% + writeLines(json_file) + json <- jsonlite::read_json(json_file) + } timestamps <- sapply(json$resources, function(x) x$last_modified) %>% as.POSIXct(format = "%Y-%m-%dT%H:%M:%OS") stopifnot(all(timestamps <= Sys.time())) @@ -93,7 +100,7 @@ od_table_local_paths <- function() { classifications = dir(file.path(extracted, "classifications"), full.names = TRUE), data = file.path(extracted, "data.csv"), header = file.path(extracted, "header.csv"), - meta = file.path(extracted, "meta.json"), + meta = json_file, id = id ) stopifnot(all(file.exists(c(paths$data, paths$header, paths$meta)))) From 08a5102b33780b8cfb126512414aab291ca571a6 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Sat, 11 Mar 2023 14:26:45 +0100 Subject: [PATCH 67/84] try out new logo [ci skip] --- README.md | 2 +- man/figures/logo2.svg | 44 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 man/figures/logo2.svg diff --git a/README.md b/README.md index ab7a085b..b57e4d00 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# STATcubeR +# STATcubeR [![R-CMD-check](https://github.com/statistikat/STATcubeR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/statistikat/STATcubeR/actions/workflows/R-CMD-check.yaml) diff --git a/man/figures/logo2.svg b/man/figures/logo2.svg new file mode 100644 index 00000000..e4522516 --- /dev/null +++ b/man/figures/logo2.svg @@ -0,0 +1,44 @@ + From f4e18ac794f1f9d706aeeaabb3fece8875ee3ef2 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 29 Mar 2023 14:21:57 +0200 Subject: [PATCH 68/84] add support for sdmx parsing this is the first step to resolving #27 by adding a function that creates sc_table() like objects based on sdmx archives The sdmx format contains all metadata that is necessary for STATcubeR to reuse the existing $tabulate() workflow and this first version already provides support for various features via the base class (sc_data) - $tabulate() to aggregate data - $total_codes() to set/unset total codes - $recoder to recode datasets (change labels) change codes, toggle visibility of elements, reorder elements, etc. - importing german and english labels simultaniously (both languages are included in a zip download) and allowing to swhitch between them using $language<-(). New features - sdmx arcives provide a $parent column in the $fields() table which are used to represent hierarchical classifications. Previously, this was only possible with od_table() There are still some improvements. See the issue #27 for more details - properly parse time variables - currently they are treated as generic categories. - parse element annotations (detailed descriptions for classification elements) and add them to $field()$de_desc just like with OGD dataset - parse value annotations (see #39) - provide a print/fromat method - add a reasonable logic for total codes that takes the parent codes into account - fill meta$measures$fun and $meta$measures$precision based on the sdmx metadata - modify very long codes which use the @-symbol (probably for escapes) - extend documentation - possibly check SuperCROSS compability --- NAMESPACE | 1 + R/sdmx_table.R | 133 ++++++++++++++++++++++++++++++++++++++++++++++ man/sdmx_table.Rd | 20 +++++++ 3 files changed, 154 insertions(+) create mode 100644 R/sdmx_table.R create mode 100644 man/sdmx_table.Rd diff --git a/NAMESPACE b/NAMESPACE index 24eba8d7..1f60a13d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -74,6 +74,7 @@ export(sc_table_custom) export(sc_table_saved) export(sc_table_saved_list) export(sc_tabulate) +export(sdmx_table) importFrom(magrittr,"%<>%") importFrom(magrittr,"%>%") importFrom(magrittr,"%T>%") diff --git a/R/sdmx_table.R b/R/sdmx_table.R new file mode 100644 index 00000000..86e27668 --- /dev/null +++ b/R/sdmx_table.R @@ -0,0 +1,133 @@ +#' Import data from SDMX +#' +#' Function that reads STATcube data from an sdmx archive - a zip file +#' consisting of `structure.xml` with metadata and `dataset.xml` for the +#' values. +#' +#' @param file a "sdmx archive" file that was downloaded from STATcube. +#' @return An object of class `sc_data` +#' @keywords experimental +#' @export +sdmx_table <- function(file) { + sdmx_read_zip(file) %>% + sdmx_as_data_object() +} + +sdmx_read <- function(folder = ".") { + list( + meta = folder %>% sprintf('%s/structure.xml', .) %>% xml2::read_xml() %>% + xml2::xml_ns_strip(), + data = folder %>% sprintf('%s/dataset.xml', .) %>% xml2::read_xml() %>% + xml2::xml_ns_strip() + ) +} + +sdmx_read_zip <- function(zip_file) { + exdir <- tempfile() + dir.create(exdir) + on.exit(unlink(exdir, recursive = TRUE)) + unzip(zipfile = zip_file, exdir = exdir) + sdmx_read(exdir) +} + +sdmx_as_raw_df <- function(x) { + obs <- x$data %>% xml2::xml_find_all(".//ObsValue") %>% + xml2::xml_attr("value") %>% as.numeric() + val <- x$data %>% xml2::xml_find_all(".//SeriesKey//Value") %>% + xml2::xml_attr("value") + # assume that entries of SeriesKey always use the same order + val_lab <- x$data %>% xml2::xml_find_first(".//SeriesKey") %>% + xml2::xml_find_all(".//Value") %>% xml2::xml_attr("concept") + val_split <- split(val, rep( + seq_len(length(val)/length(obs)), length(obs) + )) + names(val_split) <- val_lab + n <- which(val_lab == "MEASURES_DIMENSION") + obs_split <- split(obs, val_split[[n]]) + res <- c( + lapply(val_split[-n], function(x) { + u <- head(x, length(obs_split[[1]])) + factor(u, unique(u)) + }), + obs_split + ) %>% vctrs::new_data_frame() + names(res) <- gsub("F-DATA_", "", names(res)) + res +} + +sdmx_fields <- function(x) { + fields <- x$meta %>% + xml2::xml_find_all(".//CodeList[(@id and starts-with(@id, 'C-'))]") + lapply(fields, function(field) { + names <- xml2::xml_find_all(field, ".//Name") %>% + xml2::xml_text() + codes <- xml2::xml_find_all(field, "Code") + values <- xml2::xml_attr(codes, "value") + desc <- codes %>% xml2::xml_find_all("Description") %>% + xml2::xml_text() + ind_de <- seq(1, length(desc), 2) + ind_en <- seq(2, length(desc), 2) + parent <- xml2::xml_attr(codes, "parentCode") + list( + id = field %>% xml2::xml_attr("id"), + label_en = names[2], + label_de = names[1], + elements = vctrs::new_data_frame(list( + label = desc[ind_en], + code = values, + parsed = desc[ind_en], + label_de = desc[ind_de], + label_en = desc[ind_en], + parent = factor(parent, levels = values), + de_desc = rep(NA_character_, length(values)), + en_desc = rep(NA_character_, length(values)) + )) + ) + }) +} + +sdmx_meta <- function(x) { + code_measure <- x$meta %>% xml2::xml_find_all( + ".//*[(@id = 'CL_MEASURES_DIMENSION')]/Code") %>% xml2::xml_attr("value") + label_measure <- x$meta %>% xml2::xml_find_all( + ".//*[(@id = 'CL_MEASURES_DIMENSION')]/Code/Description") %>% + xml2::xml_text() + code_db <- xml2::xml_find_all(x$meta, ".//ConceptScheme") %>% + xml2::xml_attr("id") + label_dataset <- x$meta %>% xml2::xml_find_all(".//ConceptScheme/Name") %>% + xml2::xml_text() + ind_de <- seq(1, length(label_measure), 2) + ind_en <- seq(2, length(label_measure), 2) + list( + source = data_frame(label = label_dataset[2], code = code_db, + lang = "en", label_de = label_dataset[1], + label_en = label_dataset[2]), + measures = data_frame( + label = label_measure[ind_en], + code = code_measure, + label_de = label_measure[ind_de], + label_en = label_measure[ind_en], + NAs = rep(0, length(code_measure)) + ) + ) +} + +sdmx_as_data_object <- function(x) { + df <- sdmx_as_raw_df(x) + fields <- sdmx_fields(x) + meta <- sdmx_meta(x) + meta$fields <- data_frame( + code = vapply(fields, function(x) x$id, ""), + label = vapply(fields, function(x) x$label_en, "") , + label_de = vapply(fields, function(x) x$label_de, ""), + label_en = vapply(fields, function(x) x$label_en, ""), + total_code = rep(NA, length(fields)), + nitems = sapply(fields, function(x) {nrow(x$elements)}), + type = rep("Category", length(fields)) + ) + fields2 <- lapply(fields, function(x) x$elements) + names(df) <- c(meta$fields$code, meta$measures$code) + y <- sc_data$new(df, meta, fields2) + y$language <- "en" + y +} diff --git a/man/sdmx_table.Rd b/man/sdmx_table.Rd new file mode 100644 index 00000000..45e8090a --- /dev/null +++ b/man/sdmx_table.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sdmx_table.R +\name{sdmx_table} +\alias{sdmx_table} +\title{Import data from SDMX} +\usage{ +sdmx_table(file) +} +\arguments{ +\item{file}{a "sdmx archive" file that was downloaded from STATcube.} +} +\value{ +An object of class \code{sc_data} +} +\description{ +Function that reads STATcube data from an sdmx archive - a zip file +consisting of \code{structure.xml} with metadata and \code{dataset.xml} for the +values. +} +\keyword{experimental} From 4640def740fc7ebb909ef3e6cb830f769654e917 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 5 Apr 2023 08:20:14 +0200 Subject: [PATCH 69/84] sdmx: import x$field()$en_desc import annotations from the sdmx metadata and make them available as an additional column in field() --- R/sdmx_table.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/R/sdmx_table.R b/R/sdmx_table.R index 86e27668..ae2ea5f1 100644 --- a/R/sdmx_table.R +++ b/R/sdmx_table.R @@ -65,6 +65,13 @@ sdmx_fields <- function(x) { values <- xml2::xml_attr(codes, "value") desc <- codes %>% xml2::xml_find_all("Description") %>% xml2::xml_text() + ann_list <- codes %>% + xml2::xml_find_all(".//common:AnnotationText", flatten = FALSE) %>% + lapply(xml2::xml_text) + has_ann <- vapply(ann_list, length, 0) > 0 + ann_de <- ann_en <- rep(NA_character_, length(ann_list)) + ann_de[has_ann] <- vapply(ann_list[has_ann], function(x) {x[1]}, "") + ann_en[has_ann] <- vapply(ann_list[has_ann], function(x) {x[2]}, "") ind_de <- seq(1, length(desc), 2) ind_en <- seq(2, length(desc), 2) parent <- xml2::xml_attr(codes, "parentCode") @@ -79,8 +86,8 @@ sdmx_fields <- function(x) { label_de = desc[ind_de], label_en = desc[ind_en], parent = factor(parent, levels = values), - de_desc = rep(NA_character_, length(values)), - en_desc = rep(NA_character_, length(values)) + de_desc = ann_de, + en_desc = ann_en )) ) }) From 99ece7fff57b2cddb4996931ccabf41a8e458b61 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 5 Apr 2023 08:28:29 +0200 Subject: [PATCH 70/84] sdmx parse "prepared" timestamp --- R/sdmx_table.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/sdmx_table.R b/R/sdmx_table.R index ae2ea5f1..d3a81d1c 100644 --- a/R/sdmx_table.R +++ b/R/sdmx_table.R @@ -105,10 +105,12 @@ sdmx_meta <- function(x) { xml2::xml_text() ind_de <- seq(1, length(label_measure), 2) ind_en <- seq(2, length(label_measure), 2) + prepared <- x$meta %>% xml2::xml_find_all(".//message:Prepared") %>% + xml2::xml_text() %>% as.POSIXct(format = "%FT%T") list( source = data_frame(label = label_dataset[2], code = code_db, lang = "en", label_de = label_dataset[1], - label_en = label_dataset[2]), + label_en = label_dataset[2], prepared = prepared), measures = data_frame( label = label_measure[ind_en], code = code_measure, From c283ff89e944a1f4b3c059969812e3c549d555bb Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 5 Apr 2023 08:29:18 +0200 Subject: [PATCH 71/84] add R6 class for sdmx_table() --- R/sdmx_table.R | 75 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/R/sdmx_table.R b/R/sdmx_table.R index d3a81d1c..d4686531 100644 --- a/R/sdmx_table.R +++ b/R/sdmx_table.R @@ -9,8 +9,7 @@ #' @keywords experimental #' @export sdmx_table <- function(file) { - sdmx_read_zip(file) %>% - sdmx_as_data_object() + sdmx_table_class$new(file) } sdmx_read <- function(folder = ".") { @@ -121,22 +120,60 @@ sdmx_meta <- function(x) { ) } -sdmx_as_data_object <- function(x) { - df <- sdmx_as_raw_df(x) - fields <- sdmx_fields(x) - meta <- sdmx_meta(x) - meta$fields <- data_frame( - code = vapply(fields, function(x) x$id, ""), - label = vapply(fields, function(x) x$label_en, "") , - label_de = vapply(fields, function(x) x$label_de, ""), - label_en = vapply(fields, function(x) x$label_en, ""), - total_code = rep(NA, length(fields)), - nitems = sapply(fields, function(x) {nrow(x$elements)}), - type = rep("Category", length(fields)) +format.sdmx_table <- function(x, ...) { + c( + cli::style_bold(strwrap(x$meta$source$label)), + "", + cli_dl2(list( + Database = cli::style_hyperlink( + x$meta$source$code, sprintf( + "https://statcube.at/statistik.at/ext/statcube/openinfopage?id=%s", + x$meta$source$code) + ), + Measures = x$meta$measures$label, + Fields = x$meta$fields$label + )), + "", + cli_dl2(list( + Downloaded = cli_class(x$meta$source$prepared, "timestamp"), + STATcubeR = cli_class(x$meta$source$scr_version, "version") + )) ) - fields2 <- lapply(fields, function(x) x$elements) - names(df) <- c(meta$fields$code, meta$measures$code) - y <- sc_data$new(df, meta, fields2) - y$language <- "en" - y } + +sdmx_table_class <- R6::R6Class( + classname = "sdmx_table", class = TRUE, + inherit = sc_data, + list( + initialize = function(file) { + x <- sdmx_read_zip(file) + df <- sdmx_as_raw_df(x) + fields <- sdmx_fields(x) + meta <- sdmx_meta(x) + meta$fields <- data_frame( + code = vapply(fields, function(x) x$id, ""), + label = vapply(fields, function(x) x$label_en, "") , + label_de = vapply(fields, function(x) x$label_de, ""), + label_en = vapply(fields, function(x) x$label_en, ""), + total_code = rep(NA, length(fields)), + nitems = sapply(fields, function(x) {nrow(x$elements)}), + type = rep("Category", length(fields)) + ) + fields2 <- lapply(fields, function(x) x$elements) + names(df) <- c(meta$fields$code, meta$measures$code) + super$initialize(df, meta, fields2) + self$language <- "en" + private$p_xml <- x + } + ), + list(p_xml = NULL), + list( + xml = function() { private$p_xml }, + description = function() { + self$xml$meta %>% xml2::xml_find_first( + sprintf(".//ConceptScheme/Description[(@xml:lang='%s')]", self$language) + ) %>% xml2::xml_text() + } + ) +) + From 807288534a7a0d3278b78fb2a3308e9af84a0c40 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 5 Apr 2023 08:29:40 +0200 Subject: [PATCH 72/84] require vctrs vrsion 0.5.2 or higher --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0e259f19..842fc6fa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,7 +21,7 @@ Imports: jsonlite, magrittr, pillar (>= 1.5.0), - vctrs + vctrs (>= 0.5.2) Suggests: spelling, data.tree, From 3adf5206fc1668278f4ccfd26ddbdf939dcb1926 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 5 Apr 2023 18:30:25 +0200 Subject: [PATCH 73/84] safeguard against infinite recursion --- R/table_as_data_frame.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/table_as_data_frame.R b/R/table_as_data_frame.R index 82765381..31a9414d 100644 --- a/R/table_as_data_frame.R +++ b/R/table_as_data_frame.R @@ -1,4 +1,6 @@ unlist_n <- function(x, times) { + if (times <= 0) + return(x) x <- unlist(x, recursive = FALSE) if (times == 1) return(x) From 777f2ddc4c7011b2047ba5fa31052a5c3b848af1 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 5 Apr 2023 18:31:24 +0200 Subject: [PATCH 74/84] export print method for sdmx_table --- NAMESPACE | 1 + R/sdmx_table.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 1f60a13d..0b497d75 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ S3method(as.character,sc_schema_uri) S3method(as.data.frame,sc_data) S3method(format,pillar_shaft_ogd_file) S3method(format,sc_schema_uri) +S3method(format,sdmx_table) S3method(pillar_shaft,ogd_file) S3method(pillar_shaft,sc_dttm) S3method(pillar_shaft,sc_schema_type) diff --git a/R/sdmx_table.R b/R/sdmx_table.R index d4686531..848f83fa 100644 --- a/R/sdmx_table.R +++ b/R/sdmx_table.R @@ -120,6 +120,7 @@ sdmx_meta <- function(x) { ) } +#' @export format.sdmx_table <- function(x, ...) { c( cli::style_bold(strwrap(x$meta$source$label)), From 335db5795fe13e3d05d977768564c2da7bf65578 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Thu, 6 Apr 2023 11:02:46 +0200 Subject: [PATCH 75/84] gh-actions: bump ubuntu versions ubuntu 18.04 is no loger supported on gh-actions since 2023-04-01 bump up all the version numbers by two years to check 22.04 and 20.04 instead of 20.04 and 18.04 https://github.com/actions/runner-images/issues/6002 --- .github/workflows/R-CMD-check.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 3aee3815..55b3ab21 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -29,12 +29,12 @@ jobs: - {os: windows-latest, r: '3.6'} # Use older ubuntu to maximise backward compatibility - - {os: ubuntu-20.04, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-20.04, r: 'release'} - - {os: ubuntu-18.04, r: 'oldrel-1'} - - {os: ubuntu-20.04, r: 'oldrel-2'} - - {os: ubuntu-20.04, r: 'oldrel-3'} - - {os: ubuntu-20.04, r: 'oldrel-4'} + - {os: ubuntu-22.04, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-22.04, r: 'release'} + - {os: ubuntu-20.04, r: 'oldrel-1'} + - {os: ubuntu-22.04, r: 'oldrel-2'} + - {os: ubuntu-22.04, r: 'oldrel-3'} + - {os: ubuntu-22.04, r: 'oldrel-4'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} From 8e67fc89dddeaa51b4408bd675351e36f0168e70 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Thu, 6 Apr 2023 11:03:08 +0200 Subject: [PATCH 76/84] whitelist SDMX in spellchecks --- inst/WORDLIST | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/WORDLIST b/inst/WORDLIST index 33ac485a..f8d3d71d 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -70,3 +70,5 @@ webscraping wingarc yaml dataset +SDMX +sdmx From 5342a12e31b41fe91ef6e8a7f6890b7077baa856 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 12 Apr 2023 10:54:55 +0200 Subject: [PATCH 77/84] sdmx: fix long-to-wide logic in cases where several measures and several fields are involved, the previous logic produced incorrect tabulations of the data --- R/sdmx_table.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/sdmx_table.R b/R/sdmx_table.R index 848f83fa..7152a7df 100644 --- a/R/sdmx_table.R +++ b/R/sdmx_table.R @@ -43,9 +43,10 @@ sdmx_as_raw_df <- function(x) { names(val_split) <- val_lab n <- which(val_lab == "MEASURES_DIMENSION") obs_split <- split(obs, val_split[[n]]) + ind <- val_split[[n]] == val_split[[n]][1] res <- c( lapply(val_split[-n], function(x) { - u <- head(x, length(obs_split[[1]])) + u <- x[ind] factor(u, unique(u)) }), obs_split From 2555c2005b70a12bc4823712b4f04eadccc02d3d Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 12 Apr 2023 11:38:19 +0200 Subject: [PATCH 78/84] sdmx: add demo dataset and @examples --- R/sdmx_table.R | 11 +++++++++++ inst/sdmx/README.md | 6 ++++++ inst/sdmx/dedemo.zip | Bin 0 -> 3739 bytes man/sdmx_table.Rd | 12 ++++++++++++ 4 files changed, 29 insertions(+) create mode 100644 inst/sdmx/README.md create mode 100644 inst/sdmx/dedemo.zip diff --git a/R/sdmx_table.R b/R/sdmx_table.R index 7152a7df..b3294fc2 100644 --- a/R/sdmx_table.R +++ b/R/sdmx_table.R @@ -7,6 +7,17 @@ #' @param file a "sdmx archive" file that was downloaded from STATcube. #' @return An object of class `sc_data` #' @keywords experimental +#' @examples +#' x <- "sdmx/dedemo.zip" %>% system.file(package = "STATcubeR") %>% sdmx_table() +#' # print and tabulate +#' x +#' x$tabulate() +#' # explore hierarchies +#' nuts2 <- x$field("C-B00-0") +#' data.frame(label = nuts2$label, +#' parent = nuts2$label[match(nuts2$parent, nuts2$code)]) +#' # extract more data from the raw xml +#' x$xml$meta %>% xml2::xml_find_first(".//Name") #' @export sdmx_table <- function(file) { sdmx_table_class$new(file) diff --git a/inst/sdmx/README.md b/inst/sdmx/README.md new file mode 100644 index 00000000..42b271aa --- /dev/null +++ b/inst/sdmx/README.md @@ -0,0 +1,6 @@ +# sdmx demo datasets + +sdmx demo data intended to be used for documentation and automated tests. +Currently, one dataset is available which was generated from +[Communes (Demo)](https://statcube.at/statistik.at/ext/statcube/openinfopage?tableId=defaulttable_dedemo) +database. diff --git a/inst/sdmx/dedemo.zip b/inst/sdmx/dedemo.zip new file mode 100644 index 0000000000000000000000000000000000000000..0ae4c0b44f89f5a9cd955999c60a5167e85384bc GIT binary patch literal 3739 zcmZ{nXEYp4`^NR?ov6`87bSXGy|+lLx-6FHSyoxyMkInq?}U)(z1JYxVwLEEs6ljA zm*t;)dY|`y&U?*=nK}2FYi7Ru&V3o{;o{L^5fTz&MViN&Vf{;Ff7U)QZ(j$PuQx;_ zz{9=Ka?g9Qh3;;pxVuCm+tD>9#a^1Zva->A2=ufr*Yf$KRDB_~j`r8Ahh$hc7j7#b zDJGONl{D_J4K8#7$pO2%CJj=zy=~#!QAaE}B%6h7Y+u8d#5mwAtX*X<4uFvwpndeq z;NPyRIY(jlyMtRM$WNafW_JV=0|cmIv8=e6f89VK1=9^+i_6Q)nb6x)wi)wS;U32v z!gw8$bdjL6GhU%jlWpY`9)gfc2Ay{8giNv0*rVv#Mur&!9$0W;ilXx%9Zr(Q6=A02oY{TtDxuNXeV6wV>rOun=1AbhJo^ zqn>y@B=hI?BE%G$W*pZhg&LXLoa=X-GfVC7=nt73j9y7g#$2(1^>gQ?oyEd}%>sl^f)m zu53_D#9~Rb5^{Rux_YJ)!ggl38r#YL%A2SPBH5^;7A)*{=EB+SDty%rz)T>PW`)S? zh_n7GkIcg0>^*ek{`#2;TW^O~%u4FniZ7S6aj037v1k|mXs6TftZ)zKa%mQ8 z-`q7w@eJD%O!q^r-py||r+>71GMUt^69d#m7*i)~H**X5h-`R?OW>I6(q7IzaaiV$#wtq@C158D|A-!shZN@X(Dn?J zTT6a20ptw2c=92+h42dUBIh5Z;$fHgzsXm2_ZO81Vh+z|^#19+s2u6=$UADbGIR?# z9h2|r`ZDD`_5kQi9*V8Ko*4Sf-w1VUh1u@!(aFt?Eh#=;Mn1_443n2-$(=SN->3PV zElg`4Bpi&i#ePeLlf5qHYfI_Xjp7n`L==>v+KWnE*lnV{8FcUX(sr_r)6xQ!aiPkM zY~Q!8k*DX$_Oqe%u8fJv9@7|!FlWAy%aiM@)ACZFcPAy{i5^IbXny~7oN_NQ^{ji+ z&WD8_0nyeiOxF8C9-S(oKbmfL_95#;H*C^Wc4{XR+`tWb*xmoJ7tQY{zv;_t9`n7Y zUWPRJ-4NaI==}4Q1QMxPMSMxO47z$*bkoUl9@}rWI~Z{XO-~Dn2riCJ+qXypVkP!Q z{Zi=rcx^3xhDk<4G%?fnmzd{$l=` znd-U^kB_DO`I0Jlfp^q94S=7eaPtXm_ zR9nlZV%oQq#wCO`Tmn>?+|zP60Nke)^XbH4WgpC#H}wu`T-nUPsNj3_aFZb;ok&Ta z51ok@Vo^uhzrTR*^(j$S^>%f_K9=O<7Gl|6{}YT-AgB+F&D`Jp(L>$HS9SO`CXHgj zM4S~n#CWxc7~Ekk+20X5OThp$p5QJ(GcSaY55%OmROqI12!{xPlQDgmH*o$9P98}& z;&o&Ul+So!PDo0TiL%7Oe;=wS!Y>Am_J`ob1F#q$vllY>RY@4Nwd*C2tjMfp=t@V5 z;OmQrSU#{Y5zi>d-`9r~nh2>TPn9q~*7ecc#|~Uic|tdVyTra0NCNhvV`CKew_hOqm6Tba@!{pNaOL54MTSoG!MlC#dZ9Z9>ys^z%6 zF$drg51aD!4}xaw9Z4$hqB6{8_o;U;f5=*t?vv@FAYXYEe^y(-Ad>(UK^+_N=;uR8 zQaP;CXP>MRu7@7>2Vi9u`37QqA{gJCT{~@_#kCd%BN8*)upLSDIXuE;zLH$C2EGg% z2udGbLQGG_HV>JU1XE1R5rA{AP?7e=;2M_KGnb_8pFMiXk5`QHS$+i!vP-c>_3@`h zI^l*gx`&Sa;igu8~NkqFcJA+N5x<{fI$4Qw>5%3=%I?9eGc(pYRXLu26l zv45bVn}x5s_Ba}0@s2y`I)RVF4Rzb_4BOeM@%Kh9;f7+H9mO3EEM5cj>%DkHX!(=b zgszPmqHqYpmy{Wrn({kx~yH)-eo08DBF;#0id`zQYW{X5A=8=3ms1-D@Eh3RMb5%bq)zREr;)_7)dy@&ZhZgvArO7ocA)H0R1Bj#CE$= z^s>DfBJj!?tVQ5A)yu=OB8V(5MY=FIM+>kBgN95JYo_HS`ByZPzPE;#xGrQjV+PHQ z-$)<`cA%giV*U(bV^wro(e=U^KAvsitPJADh+iOf~*;WqmNVk*w%;SgN7}lKau&Tws z`0XYkGGzwgMYj~F7tk<$O{><;^US~E3()R&m^~0mLX8j?X@15-O}S<|UMmF+O{bQ7 z;&;k`uK1RNRQUi~PSlUIhX$#Xy2FjQ!vWCP=>83<#v1}zt>RXTyCPzr!++9fx0i)O zOmWkyW6}1U?FK(gDjNgJDi*Fm+C+Lp-Ilix(zfogjrAT7cAf|ts1spf-Ed-I8UN4r zi{wxHeIT&E&DTNBvUi~r?V#jI=y?4`|)i%b_E?Kq!=2 z5XL5ON&w~2DTsGPo|`8(Q{Bj3GKX=4*K;%AWd`tI^39q6`phAIc&k(pDIa(fI9RYwN+GQqEpxzt&9_RO-5mYc&QV6&eUn)~5YUQ<^s zWkm6|Yhi5DXG(^=G3{p)O-q5|FsY)F?%}r1&b<{&a=z0rt8z3|^UVLomv?#>;}j)2%8Vlou~)G8rbikQb7Ej_i0d+!6pn<%X)4F8e(s(6*& ztgI@aSpN|ph!HL@3&SW3mG@bbWC1ln@I=It z4LvM1!$IX2mYi|hNO;w?*)6t!Sm&Rs4kGNmr{xYW=SO%cHmgz;BAEwmo`y=C3W$9} zY^5GXxbBQfzt` zAO3T54bAz#AML8=%q*nL3IByq)VPP5p`Y_8!2SsO(J|1fDcNWO=H@~vOZ z7G0D5B~szZTVo46EUaSFzeK{up~d=7B>Xq6{zk(8)_(=W|9|w~wtsQ\% system.file(package = "STATcubeR") \%>\% sdmx_table() +# print and tabulate +x +x$tabulate() +# explore hierarchies +nuts2 <- x$field("C-B00-0") +data.frame(label = nuts2$label, + parent = nuts2$label[match(nuts2$parent, nuts2$code)]) +# extract more data from the raw xml +x$xml$meta \%>\% xml2::xml_find_first(".//Name") +} \keyword{experimental} From 53fec1eaa5df6058cdb7ece1951412e054e1dff0 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 10 May 2023 06:47:19 +0200 Subject: [PATCH 79/84] + print.sdmx_description() add a print mehod for descriptions of sdmx files which are accessible like so x <- sdmx_table(...) x$description --- NAMESPACE | 1 + R/sdmx_table.R | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 0b497d75..8336ba67 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ S3method(print,sc_schema_flatten) S3method(print,sc_table) S3method(print,sc_tibble_meta) S3method(print,sc_url) +S3method(print,sdmx_description) S3method(tbl_format_footer,sc_meta) S3method(tbl_sum,sc_meta) S3method(tbl_sum,sc_tibble) diff --git a/R/sdmx_table.R b/R/sdmx_table.R index b3294fc2..b79520af 100644 --- a/R/sdmx_table.R +++ b/R/sdmx_table.R @@ -185,8 +185,16 @@ sdmx_table_class <- R6::R6Class( description = function() { self$xml$meta %>% xml2::xml_find_first( sprintf(".//ConceptScheme/Description[(@xml:lang='%s')]", self$language) - ) %>% xml2::xml_text() + ) %>% xml2::xml_text() %>% `class<-`("sdmx_description") } ) ) +#' @export +print.sdmx_description <- function(x, ...) { + desc <- strsplit(x, "\r\n")[[1]] + desc <- ifelse(grepl("^.)", desc), + cli::style_bold(substring(desc, 4)), + desc) + cat(desc, sep = "\n") +} From 215b05ac08f0f6a406b6edeec07ea5d64d2283db Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 10 May 2023 06:52:40 +0200 Subject: [PATCH 80/84] sdmx_table: simplify codes for some reason, sdmx archives use escapes in the database ids such that some characters are substututed like this \x5f -> 5f@ undo this in the parser for the underscore character, so the link in the print method correctly references a STATcube table also, shorten the codes used in $field()$code to omit everything before the underscore TODO: check if shortening field codes like this might lead to duplicate codes --- R/sdmx_table.R | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/R/sdmx_table.R b/R/sdmx_table.R index b79520af..63d7762e 100644 --- a/R/sdmx_table.R +++ b/R/sdmx_table.R @@ -44,7 +44,7 @@ sdmx_as_raw_df <- function(x) { obs <- x$data %>% xml2::xml_find_all(".//ObsValue") %>% xml2::xml_attr("value") %>% as.numeric() val <- x$data %>% xml2::xml_find_all(".//SeriesKey//Value") %>% - xml2::xml_attr("value") + xml2::xml_attr("value") %>% sdmx_codes() # assume that entries of SeriesKey always use the same order val_lab <- x$data %>% xml2::xml_find_first(".//SeriesKey") %>% xml2::xml_find_all(".//Value") %>% xml2::xml_attr("concept") @@ -92,11 +92,11 @@ sdmx_fields <- function(x) { label_de = names[1], elements = vctrs::new_data_frame(list( label = desc[ind_en], - code = values, + code = sdmx_codes(values), parsed = desc[ind_en], label_de = desc[ind_de], label_en = desc[ind_en], - parent = factor(parent, levels = values), + parent = factor(parent, levels = values, labels = sdmx_codes(values)), de_desc = ann_de, en_desc = ann_en )) @@ -111,7 +111,7 @@ sdmx_meta <- function(x) { ".//*[(@id = 'CL_MEASURES_DIMENSION')]/Code/Description") %>% xml2::xml_text() code_db <- xml2::xml_find_all(x$meta, ".//ConceptScheme") %>% - xml2::xml_attr("id") + xml2::xml_attr("id") %>% gsub("@5f@", "\x5f", .) label_dataset <- x$meta %>% xml2::xml_find_all(".//ConceptScheme/Name") %>% xml2::xml_text() ind_de <- seq(1, length(label_measure), 2) @@ -154,6 +154,14 @@ format.sdmx_table <- function(x, ...) { ) } +sdmx_codes <- function(codes) { + simplified <- gsub("^.*_", "", codes) + if (anyDuplicated(simplified)) + codes + else + simplified +} + sdmx_table_class <- R6::R6Class( classname = "sdmx_table", class = TRUE, inherit = sc_data, From 0eaca1b9c6f2facacd759173d5dd6370fdac7642 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 10 May 2023 13:05:36 +0200 Subject: [PATCH 81/84] v0.5.2 --- DESCRIPTION | 2 +- NEWS.md | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 842fc6fa..86259bef 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: STATcubeR Title: R interface for the STATcube REST API and Open Government Data -Version: 0.5.1 +Version: 0.5.2 Authors@R: c( person("Gregor", "de Cillia", , "Gregor.deCillia@statistik.gv.at", role = c("aut", "cre")), person("Bernhard", "Meindl", , "Bernhard.Meindl@statistik.gv.at", role = "ctb"), diff --git a/NEWS.md b/NEWS.md index b85e82ba..13c5213f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,15 @@ -# STATcubeR 0.6.0 +# Upcoming (0.6.0) * Update print methods with the `{tibble}` package (#32) + +# STATcubeR 0.5.2 + * Add filters and other recodes to `sc_table_custom()` (#33) * Add global option `STATcubeR.language` to override the default language * `od_table()`: Add descriptions to `x$header` and `x$field(i)` * Depend on cli >= 3.4.1 (@matmo, #35) * Allow json strings in `sc_table()` (@matmo, #36) +* add `sdmx_table()` to import sdmx archives (.zip) # STATcubeR 0.5.0 From ca0a399ce741ef5285d6a5033d51f58aaf18ae10 Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Thu, 11 May 2023 16:24:25 +0200 Subject: [PATCH 82/84] sdmx: fix code simplification avoid inconsistencies between x$code and x$field(). Before this fix simplification was only applied in x$field() because of the anyDulicated() check in sdmx_codes() related: 215b05a [skip ci] --- R/sdmx_table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/sdmx_table.R b/R/sdmx_table.R index 63d7762e..cabb219d 100644 --- a/R/sdmx_table.R +++ b/R/sdmx_table.R @@ -44,7 +44,7 @@ sdmx_as_raw_df <- function(x) { obs <- x$data %>% xml2::xml_find_all(".//ObsValue") %>% xml2::xml_attr("value") %>% as.numeric() val <- x$data %>% xml2::xml_find_all(".//SeriesKey//Value") %>% - xml2::xml_attr("value") %>% sdmx_codes() + xml2::xml_attr("value") # assume that entries of SeriesKey always use the same order val_lab <- x$data %>% xml2::xml_find_first(".//SeriesKey") %>% xml2::xml_find_all(".//Value") %>% xml2::xml_attr("concept") @@ -58,7 +58,7 @@ sdmx_as_raw_df <- function(x) { res <- c( lapply(val_split[-n], function(x) { u <- x[ind] - factor(u, unique(u)) + factor(u, unique(u), sdmx_codes(unique(u))) }), obs_split ) %>% vctrs::new_data_frame() From 5e5aed6dc6a9cb2a316f8107105be2e07899b26f Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Wed, 17 May 2023 16:08:01 +0200 Subject: [PATCH 83/84] sdmx: unescape codes resolve escapes as in @f5@ -> \uf5 for all codes in numeric columns currently, there are only certain symbols whitelisted which will be resolved like this. possible improvement: escape all character sequences of this form by using a regex [skip ci] --- R/sdmx_table.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/R/sdmx_table.R b/R/sdmx_table.R index cabb219d..bd476a99 100644 --- a/R/sdmx_table.R +++ b/R/sdmx_table.R @@ -62,7 +62,7 @@ sdmx_as_raw_df <- function(x) { }), obs_split ) %>% vctrs::new_data_frame() - names(res) <- gsub("F-DATA_", "", names(res)) + names(res) <- gsub("F-DATA_", "", names(res)) %>% sdmx_unescape_codes() res } @@ -111,7 +111,7 @@ sdmx_meta <- function(x) { ".//*[(@id = 'CL_MEASURES_DIMENSION')]/Code/Description") %>% xml2::xml_text() code_db <- xml2::xml_find_all(x$meta, ".//ConceptScheme") %>% - xml2::xml_attr("id") %>% gsub("@5f@", "\x5f", .) + xml2::xml_attr("id") %>% sdmx_unescape_codes() label_dataset <- x$meta %>% xml2::xml_find_all(".//ConceptScheme/Name") %>% xml2::xml_text() ind_de <- seq(1, length(label_measure), 2) @@ -124,7 +124,7 @@ sdmx_meta <- function(x) { label_en = label_dataset[2], prepared = prepared), measures = data_frame( label = label_measure[ind_en], - code = code_measure, + code = sdmx_unescape_codes(code_measure), label_de = label_measure[ind_de], label_en = label_measure[ind_en], NAs = rep(0, length(code_measure)) @@ -162,6 +162,16 @@ sdmx_codes <- function(codes) { simplified } +sdmx_esc <- function(codes, char) { + int <- utf8ToInt(char) + gsub(sprintf("@%x@", int), char, codes, fixed = TRUE) +} + +sdmx_unescape_codes <- function(codes) { + codes %>% sdmx_esc("\u5f") %>% sdmx_esc("\u7c") %>% sdmx_esc("\u2b") %>% + sdmx_esc("\u2e") %>% sdmx_esc("\u23") %>% sdmx_esc("\u40") +} + sdmx_table_class <- R6::R6Class( classname = "sdmx_table", class = TRUE, inherit = sc_data, From 38d10f3fd9392167789b2074e80aaedc2a6fed8d Mon Sep 17 00:00:00 2001 From: Gregor de Cillia Date: Mon, 22 May 2023 15:04:06 +0200 Subject: [PATCH 84/84] sd_table(): don't warn for missing \n suppress warnings if there is no newline character at the end of a json request file because that is the way the server formats those files in the download options STATcubeR started doing this with 6b63a60 [ci skip] --- R/table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/table.R b/R/table.R index 8a51453a..f1e23f8f 100644 --- a/R/table.R +++ b/R/table.R @@ -251,7 +251,7 @@ normalize_json <- function(json, json_file) { file <- NULL if (length(json) == 1 && !jsonlite::validate(json)) { file <- json - json <- readLines(file) + json <- readLines(file, warn = FALSE) } list(file = file, string = json) }