An R package for interacting with REDCap, inspired by the redcapAPI package by Benjamin Nutter.
- Outputs tibble-style data frames
- New features for
fetch_records()
- by default, ensure record ID field always returned
- by default, omit rows where all form-specific fields are empty
- optional arguments for resolving Double Data Entry to single entry per record
- simplified column classes
- REDCap categorical variables have class “character” (can optionally use “factor”)
- REDCap date variables have class “Date” (“POSIXct” used only for datetime variables)
- New function
fetch_database()
vectorizesfetch_records()
over forms. Accepts additional functions as arguments, e.g. for deriving new variables or customizing form names - New function
generate_queries()
creates data validation queries based on branching logic specified in the project codebook. These can be run using thequery_vec()
function in queryr.
Note this package has only been tested with REDCap versions 10.0 - 12.0
Install from GitHub with:
# install.packages("remotes")
remotes::install_github("epicentre-msf/redcap")
library(redcap)
### create an API connection (this project just used for testing)
conn <- rconn(
url = Sys.getenv("REDCAP_API_URL"),
token = Sys.getenv("REDCAP_PKG")
)
### fetch REDCap version number
redcap_version(conn)
#> [1] "12.0.29"
### fetch metadata tables
df_dictionary <- meta_dictionary(conn)
df_fields <- meta_fields(conn)
df_forms <- meta_forms(conn)
df_arms <- meta_arms(conn)
df_events <- meta_events(conn)
df_mapping <- meta_mapping(conn)
df_repeating <- meta_repeating(conn)
### fetch records (generally from a single form)
df_records <- fetch_records(conn, forms = "eligibility")
### fetch records from all forms (as a list of data frames, one per form)
db_records <- fetch_database(conn)
### project/user info
df_info <- project_info(conn)
df_users <- project_users(conn)
df_dags <- project_dags(conn)
df_users_dags <- project_users_dags(conn)
### project backup/logging
backup_xml <- project_xml(conn)
backup_logs <- project_logging(conn)
### generate queries based on branching logic in project codebook
df_queries <- generate_queries(conn)
# run queries using the queryr package
queries_out <- queryr::query_vec(
x = db_records, # REDCap database as list of data frames
cond = df_queries$query, # query expressions
name = df_queries$query_id, # name/identifier for each query
element = df_queries$form_name, # primary form for each query expression
cols_base = record_id, # columns to always include in return
join_type = "left", # join type if query refers to multiple forms
join_by = "record_id" # join key if query refers to multiple forms
)
- Customize form-specific names returned by
fetch_database()
### default element names returned by fetch_database() are REDCap form names
db_records <- fetch_database(conn)
names(db_records)
#> [1] "enrolment" "eligibility" "followup"
### to customize we need a function that takes a vector of form names and
# returns a vector of custom names
recode_forms <- function(x) {
dplyr::recode(
x,
enrolment = "ENR", eligibility = "ELG", followup = "FUP"
)
}
db_records <- fetch_database(conn, names_fn = recode_forms)
names(db_records)
#> [1] "ENR" "ELG" "FUP"
- Pass additional functions to
fetch_database()
to act on each form
library(dplyr, warn.conflicts = FALSE)
library(rlang)
### for each form, find the column giving the date and time of form completion
# (ends with "_form_dt"), and use it to derive a new date column form_date
add_form_date <- function(df) {
# find name of datetime col for given form
col_dt <- grep("_form_dt$", names(df), value = TRUE)
# derive col form_date
dplyr::mutate(
df,
form_date = lubridate::as_date(!!ensym(col_dt)),
.after = "record_id"
)
}
db_records <- fetch_database(conn, fns = list(add_form_date))
db_records$enrolment[,1:5] # print first few cols to show new form_date column
#> # A tibble: 3 × 5
#> record_id form_date redcap_event_name redcap_repeat_instrument redcap_repeat_instance
#> <chr> <date> <chr> <chr> <int>
#> 1 0001 2020-12-01 Enrollment <NA> NA
#> 2 0002 2020-11-25 Enrollment <NA> NA
#> 3 0003 2020-12-11 Enrollment <NA> NA