Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move utility functions to fuimus package #84

Open
19 of 34 tasks
andrewallenbruce opened this issue Apr 4, 2024 · 1 comment
Open
19 of 34 tasks

Move utility functions to fuimus package #84

andrewallenbruce opened this issue Apr 4, 2024 · 1 comment
Assignees

Comments

@andrewallenbruce
Copy link
Owner

andrewallenbruce commented Apr 4, 2024

Once complete, add the following to the DESCRIPTION file:

Remotes: 
    github::andrewallenbruce/fuimus

provider utils.R

  • %nn%
  • %nin%
  • format_zipcode
  • clean_credentials (rename remove_periods)
  • clean_dollars (rename remove_commas)
  • na_blank
  • yn_logical
  • tf_2_yn
  • abb2full
  • display_long
  • df2chr
  • tidyup
  • combine
  • narm
  • github_raw (duplicate of gh_raw)
  • format_cli
  • add_counties
utils.R

#' Infix operator for `if (!is.null(x)) y else x` statements
#' @param x,y description
#' @return description
#' @examples
#' ccn <- 123456
#' ccn <- ccn %nn% as.character(ccn)
#' ccn
#' @autoglobal
#' @noRd
`%nn%` <- function(x, y) if (!is.null(x)) y else x #nocov

#' Infix operator for `not in` statements
#' @return description
#' @autoglobal
#' @noRd
`%nin%` <- function(x, table) match(x, table, nomatch = 0L) == 0L #nocov

#' Format US ZIP codes
#' @param zip Nine-digit US ZIP code
#' @return ZIP code, hyphenated for ZIP+4 or 5-digit ZIP.
#' @examples
#' format_zipcode(123456789)
#' format_zipcode(12345)
#' @autoglobal
#' @noRd
format_zipcode <- function(zip) {

  zip <- as.character(zip)

  if (stringr::str_detect(zip, "^[[:digit:]]{9}$") == TRUE) {
    zip <- paste0(stringr::str_sub(zip, 1, 5), "-",
                  stringr::str_sub(zip, 6, 9))
    return(zip)
    } else {
      return(zip)
  }
}

#' Remove periods from credentials
#' @param x Character vector
#' @return Character vector with periods removed
#' @autoglobal
#' @noRd
clean_credentials <- function(x) {
  gsub("\\.", "", x)
}

#' Remove commas from dollar amounts
#' @param x Character vector
#' @return Character vector with commas removed
#' @autoglobal
#' @noRd
clean_dollars <- function(x) {
  gsub(",", "", x)
}

#' Convert empty char values to NA
#' @param x vector
#' @autoglobal
#' @noRd
na_blank <- function(x) {

  x <- dplyr::na_if(x, "")
  x <- dplyr::na_if(x, " ")
  x <- dplyr::na_if(x, "*")
  x <- dplyr::na_if(x, "--")
  x <- dplyr::na_if(x, "N/A")
  return(x)
}

#' Convert Y/N char values to logical
#' @param x vector
#' @autoglobal
#' @noRd
yn_logical <- function(x) {

  dplyr::case_match(
    x,
    c("Y", "YES", "Yes", "yes", "y", "True") ~ TRUE,
    c("N", "NO", "No", "no", "n", "False") ~ FALSE,
    .default = NA
  )
}

#' Convert TRUE/FALSE values to Y/N
#' @param x vector
#' @autoglobal
#' @noRd
tf_2_yn <- function(x) {

  dplyr::case_match(
    x,
    TRUE ~ "Y",
    FALSE ~ "N",
    .default = NULL
  )
}

#' @param abb state abbreviation
#' @return state full name
#' @autoglobal
#' @noRd
abb2full <- function(abb,
                     arg = rlang::caller_arg(abb),
                     call = rlang::caller_env()) {

  results <- dplyr::tibble(x = c(state.abb[1:8],
                                 'DC',
                                 state.abb[9:50],
                                 'AS', 'GU', 'MP', 'PR', 'VI', 'UK'),
                           y = c(state.name[1:8],
                                 'District of Columbia',
                                 state.name[9:50],
                                 'American Samoa',
                                 'Guam',
                                 'Northern Mariana Islands',
                                 'Puerto Rico',
                                 'Virgin Islands',
                                 'Unknown')) |>
    dplyr::filter(x == abb) |>
    dplyr::pull(y)

  if (vctrs::vec_is_empty(results)) {
    cli::cli_abort(c("{.val {abb}} is not a valid state abbreviation."), # nolint
                   call = call)
  }
  return(results)
}

#' Pivot data frame to long format for easy printing
#' @param df data frame
#' @param cols columns to pivot long, default is [dplyr::everything()]
#' @autoglobal
#' @export
#' @keywords internal
display_long <- function(df, cols = dplyr::everything()) {

  df |> dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |>
        tidyr::pivot_longer({{ cols }})
}

#' Convert data.frame cols to character
#' @param df data frame
#' @autoglobal
#' @export
#' @keywords internal
df2chr <- function(df) {
  df |>
    dplyr::mutate(
      dplyr::across(
        dplyr::where(is.numeric), as.character))
}

#' Tidy a Data Frame
#' @param df data frame
#' @param dtype `mdy` or `ymd`
#' @param dt convert to date, default is 'date'
#' @param yn convert to logical
#' @param int convert to integer
#' @param dbl convert to double
#' @param chr convert to character
#' @param up convert to upper case
#' @param cred remove periods
#' @param zip normalize zip code
#' @param lgl convert to logical
#' @param cma remove commas
#' @returns tidy data frame
#' @autoglobal
#' @export
#' @keywords internal
tidyup <- function(df,
                   dtype = NULL,
                   dt = "date",
                   yn = NULL,
                   int = NULL,
                   dbl = NULL,
                   chr = NULL,
                   up = NULL,
                   cred = NULL,
                   zip = NULL,
                   lgl = NULL,
                   cma = NULL) {

  x <- janitor::clean_names(df) |>
    dplyr::tibble() |>
    dplyr::mutate(dplyr::across(dplyr::everything(), stringr::str_squish),
                  dplyr::across(dplyr::where(is.character), na_blank))

  if (!is.null(dtype)) {
    if (dtype == 'mdy') x <- dplyr::mutate(x, dplyr::across(dplyr::contains(dt), ~ lubridate::mdy(.x, quiet = TRUE)))
    if (dtype == 'ymd') x <- dplyr::mutate(x, dplyr::across(dplyr::contains(dt), ~ lubridate::ymd(.x, quiet = TRUE)))
  }

  if (!is.null(cma))  x <- dplyr::mutate(x, dplyr::across(dplyr::contains(cma),  clean_dollars))
  if (!is.null(yn))   x <- dplyr::mutate(x, dplyr::across(dplyr::contains(yn),   yn_logical))
  if (!is.null(int))  x <- dplyr::mutate(x, dplyr::across(dplyr::contains(int),  as.integer))
  if (!is.null(dbl))  x <- dplyr::mutate(x, dplyr::across(dplyr::contains(dbl),  as.double))
  if (!is.null(chr))  x <- dplyr::mutate(x, dplyr::across(dplyr::contains(chr),  as.character))
  if (!is.null(up))   x <- dplyr::mutate(x, dplyr::across(dplyr::contains(up),   toupper))
  if (!is.null(cred)) x <- dplyr::mutate(x, dplyr::across(dplyr::contains(cred), clean_credentials))
  if (!is.null(zip))  x <- dplyr::mutate(x, dplyr::across(dplyr::contains(zip),  zipcodeR::normalize_zip))
  if (!is.null(lgl))  x <- dplyr::mutate(x, dplyr::across(dplyr::contains(lgl),  as.logical))

  return(x)
}

#' @param df data frame
#' @param nm new col name, unquoted
#' @param cols columns to combine
#' @param sep separator
#' @autoglobal
#' @noRd
combine <- function(df, nm, cols, sep = " ") {

  return(tidyr::unite(df, col = {{ nm }},
                      dplyr::any_of(cols),
                      remove = TRUE,
                      na.rm = TRUE,
                      sep = sep))
}

#' Remove empty rows and columns
#' @param df data frame
#' @autoglobal
#' @noRd
narm <- function(df) {
  janitor::remove_empty(df, which = c("rows", "cols"))
}

#' Return GitHub raw url
#' @param x url
#' @returns raw url
#' @examplesIf interactive()
#' github_raw("andrewallenbruce/provider/")
#' @autoglobal
#' @noRd
github_raw <- function(x) {
  paste0("https://raw.githubusercontent.com/", x)
}

#' Format empty search results
#' @param df data frame of parameter arguments
#' @autoglobal
#' @noRd
format_cli <- function(df) {

  x <- purrr::map2(df$x,
                   df$y,
                   stringr::str_c,
                   sep = " = ",
                   collapse = "")

  cli::cli_alert_danger("No results for {.val {x}}",
                        wrap = TRUE)

}

provider utils-gt.R

  • gt_entype_badge
  • gt_check_xmark
  • gt_qmark
  • gt_datadict
  • gt_prov
utils-gt.R

#' @name gt_entype_badge
#' @title entity type badge
#' @param x column
#' @autoglobal
#' @export
#' @keywords internal
# nocov start
gt_entype_badge <- function(x) {

  add_color <- if (x == "Ind") {

    "background: hsl(116, 60%, 90%); color: hsl(116, 30%, 25%);"

  } else if (x == "Org") {

    "background: hsl(350, 70%, 90%); color: hsl(350, 45%, 30%);"

  } else if (x != "Ind" | x != "Org") {
    x
  }

  div_out <- htmltools::div(
    style = paste("display: inline-block; padding: 2px 12px; border-radius: 15px; font-weight: 600; font-size: 16px;",
                  add_color),
    x)

  as.character(div_out) |> gt::html()
}

#' @name gt_check_xmark
#' @title check or x mark icon
#' @param gt_tbl gt_tbl object
#' @param cols columns in data frame
#' @autoglobal
#' @export
#' @keywords internal
gt_check_xmark <- function(gt_tbl, cols) {

  gt_tbl |>
    gt::text_case_when(
      x == TRUE ~ gt::html(
        fontawesome::fa("check",
                        prefer_type = "solid",
                        fill = "black")),
      x == FALSE ~ gt::html(
        fontawesome::fa("xmark",
                        prefer_type = "solid",
                        fill = "red")),
      .default = NA,
      .locations = gt::cells_body(
        columns = {{ cols }}))
}

#' @name gt_qmark
#' @title check, x, question mark
#' @param gt_tbl gt_tbl object
#' @param cols columns in data frame
#' @autoglobal
#' @export
#' @keywords internal
gt_qmark <- function(gt_tbl, cols) {

  gt_tbl |>
    gt::text_case_when(
      x == "Y" ~ gt::html(
        fontawesome::fa("circle-check",
                        prefer_type = "solid",
                        fill = "black",
                        height = "1.75em",
                        width = "1.75em")),
      x == "N" ~ gt::html(
        fontawesome::fa("circle-xmark",
                        prefer_type = "solid",
                        fill = "red",
                        height = "1.75em",
                        width = "1.75em")),
      x == "M" ~ gt::html(
        fontawesome::fa("circle-question",
                        prefer_type = "solid",
                        fill = "red",
                        height = "1.75em",
                        width = "1.75em")),
      .default = "",
      .locations = gt::cells_body(
        columns = {{ cols }}))
}

#' @name gt_datadict
#' @title data dictionary theme
#' @param df data frame
#' @autoglobal
#' @export
#' @keywords internal
gt_datadict <- function(df) {

  df |>
    gt::gt() |>
    gt::fmt_markdown(columns = Variable) |>
    gtExtras::gt_add_divider(
      columns = c("Variable"), # nolint
      style = "solid",
      color = "gray",
      weight = gt::px(2),
      include_labels = FALSE) |>
    gtExtras::gt_merge_stack(col1 = Description,
                             col2 = Definition,
                             small_cap = FALSE,
                             font_size = c("16px", "14px"),
                             font_weight = c("bold", "normal"),
                             palette = c("black", "darkgray")) |>
    gt::opt_stylize(style = 6,
                    color = "red",
                    add_row_striping = FALSE) |>
    gt::opt_table_lines(extent = "default") |>
    gt::opt_table_outline(style = "none") |>
    gt::opt_table_font(font = gt::google_font(name = "Karla")) |>
    gt::tab_options(table.width = gt::pct(100))

}

#' @name gt_prov
#' @title gt theme
#' @param df data frame
#' @param divider description
#' @param title description
#' @param subtitle description
#' @param source description
#' @param checkmark description
#' @param qmark description
#' @param dollars description
#' @param pct description
#' @param pctchg description
#' @autoglobal
#' @export
#' @keywords internal
gt_prov <- function(df,
                    divider   = NULL,
                    title     = NULL,
                    subtitle  = NULL,
                    source    = NULL,
                    checkmark = NULL,
                    qmark     = NULL,
                    dollars   = NULL,
                    pct       = NULL,
                    pctchg    = NULL,
                    clean     = TRUE) {

  results <- df |>
    gt::gt() |>
    gtExtras::gt_theme_538() |>
    gt::sub_missing(columns = dplyr::everything(), missing_text = "--") |>
    gt::tab_options(table.width = gt::pct(100),
                    column_labels.background.color = "white",
                    column_labels.font.weight = "bolder",
                    heading.background.color = "white",
                    column_labels.border.top.color = "white",
                    column_labels.border.bottom.color = "black",
                    table_body.border.bottom.color = "black")

  if (clean) {
    results <- results |>
      gt::cols_label_with(fn = ~ janitor::make_clean_names(., case = "title"))
  }

  if (!is.null(divider)) {
    results <- results |>
      gtExtras::gt_add_divider(
        columns = {{ divider }},
        style = "solid",
        color = "black",
        weight = gt::px(3),
        include_labels = FALSE)
  }

  if (!is.null(title)) {
    results <- results |> gt::tab_header(title = title)
  }

  if (!is.null(subtitle)) {
    results <- results |> gt::tab_header(title = title, subtitle = subtitle)
  }

  if (!is.null(source)) results <- gt::tab_source_note(results, source_note = source) # nolint

  if (!is.null(checkmark)) {
    results <- results |> gt_check_xmark(cols = checkmark)
  }

  if (!is.null(qmark)) {
    results <- results |> gt_qmark(cols = qmark)
  }

  if (!is.null(dollars)) {
    results <- results |>
      gt::fmt_currency(columns = {{ dollars }},
                       currency = "USD",
                       suffixing = TRUE,
                       sep_mark = ",",
                       incl_space = TRUE)
  }

  if (!is.null(pct)) {
    results <- results |> gt::fmt_percent(columns = {{ pct }}, decimals = 0)
  }

  if (!is.null(pctchg)) {
    results <- results |>
      gt::fmt_percent(columns = {{ pctchg }},
                      decimals = 1,
                      force_sign = TRUE)
  }
  return(results)
}
# nocov end

provider calculations.R

  • gen_data
  • change
  • years_df
  • duration_vec
  • make_interval
  • summary_stats
  • chg
  • pct
  • ror
  • geomean
  • change_year
  • years_vec
calculations.R

#' Utility Functions
#'
#' @description Common utility functions
#'
#' @examples
#' # Example data
#' ex <- gen_data(2020:2025)
#' head(ex)
#'
#' # Lagged absolute/percentage change, rate of return and cumulative sum
#' # `change()`
#' dplyr::filter(ex, group == "A") |>
#' change(pay)
#'
#' # `geomean()` # Geometric mean
#' ex |>
#' dplyr::filter(group == "A") |>
#' ror(pay) |>
#' dplyr::summarise(gmean = geomean(pay_ror))
#'
#' # When performing a `group_by()`, watch for
#' # the correct order of the variables
#' ex |>
#' dplyr::group_by(group) |>
#' change(pay)
#'
#' ex |>
#' dplyr::group_by(group) |>
#' change(pay) |>
#' dplyr::summarise(mean_pay = mean(pay, na.rm = TRUE),
#'                  csm_chg  = sum(pay_chg),
#'                  csm_pct  = sum(pay_pct),
#'                  mean_ror = mean(pay_ror, na.rm = TRUE),
#'                  geomean  = geomean(pay_ror))
#'
#' # Timespans
#' dt <- dplyr::tibble(date = lubridate::today() - 366)
#'
#' # `years_df()`
#' years_df(dt, date)
#'
#' # `duration_vec()`
#' dplyr::mutate(dt, dur = duration_vec(date))
#'
#' # `make_interval()`
#' dplyr::tibble(date = lubridate::today() - 1000) |>
#' make_interval(start = date, end = lubridate::today() - 500)
#'
#'
#' # `summary_stats()`
#' sm <- dplyr::tibble(provider = sample(c("A", "B", "C"), size = 200, replace = TRUE),
#'                     city = sample(c("ATL", "NYC"), size = 200, replace = TRUE),
#'                     charges = sample(1000:2000, size = 200),
#'                     payment = sample(1000:2000, size = 200))
#'
#' head(sm)
#'
#' summary_stats(sm,
#'               condition    = city == "ATL",
#'               group_vars   = provider,
#'               summary_vars = c(charges, payment),
#'               arr          = provider)
#'
#' @returns [tibble()] or vector
#' @name calculations
#' @keywords internal
NULL

#' @param df data frame
#' @param cols numeric columns to calculate absolute/relative change & rate of return
#' @param csm numeric cols to calculate cumulative sum for
#' @param digits Number of digits to round to, default is 3
#' @rdname calculations
#' @examplesIf interactive()
#' dplyr::filter(ex, group == "A") |>
#' change(pay)
#' @autoglobal
#' @export
#' @keywords internal
change <- function(df, cols, csm = NULL, digits = 5) {

  results <- dplyr::mutate(df,
    dplyr::across({{ cols }}, list(
      chg = \(x) chg(x),
      pct = \(x) pct(x)),
      .names = "{.col}_{.fn}")) |>
    dplyr::mutate(dplyr::across(dplyr::ends_with("_pct"), ~ .x + 1, .names = "{.col}_ror")) |>
    dplyr::mutate(dplyr::across(dplyr::where(is.double), ~janitor::round_half_up(., digits = digits))) |>
    dplyr::relocate(dplyr::ends_with("_chg"), dplyr::ends_with("_pct"), dplyr::ends_with("_pct_ror"), .after = {{ cols }})

  names(results) <- gsub("_pct_ror", "_ror", names(results))

  if (!is.null(csm)) {
    results <- dplyr::mutate(results,
               dplyr::across(dplyr::ends_with({{ csm }}), list(cusum = \(x) cumsum(x)), .names = "{.col}_{.fn}"))
  }
  return(results)
}

#' Lagged absolute change
#' @param x numeric vector
#' @param n values to offset
#' @param fill_na fill value for any NAs; default is 0
#' @rdname calculations
#' @examplesIf interactive()
#' dplyr::filter(ex, group == "A") |>
#' dplyr::mutate(change = chg(pay))
#' @autoglobal
#' @export
#' @keywords internal
chg <- function(x, n = 1L, fill_na = 0L) {
  lg  <- dplyr::lag(x, n = n)
  res <- (x - lg)
  if (!is.na(fill_na)) res[is.na(res)] <- fill_na
  return(res)
}

#' Lagged percentage change
#' @param x numeric vector
#' @param n values to offset
#' @param fill_na fill value for any NAs; default is 0
#' @rdname calculations
#' @examplesIf interactive()
#' dplyr::filter(ex, group == "A") |>
#' dplyr::mutate(pct_change = pct(pay))
#' @autoglobal
#' @export
#' @keywords internal
pct <- function(x, n = 1L, fill_na = 0L) {
  lg <- dplyr::lag(x, n = n)
  res <- (x - lg) / lg
  if (!is.na(fill_na)) res[is.na(res)] <- fill_na
  return(res)
}

#' Lagged rate of return
#' @param df data frame
#' @param col numeric column
#' @param n values to offset
#' @rdname calculations
#' @examplesIf interactive()
#' dplyr::filter(ex, group == "A") |>
#' ror(pay)
#'
#' ex |>
#' dplyr::group_by(group) |>
#' ror(pay)
#' @autoglobal
#' @export
#' @keywords internal
ror <- function(df, col, n = 1L) {
  dplyr::mutate(df,
                copy = dplyr::if_else({{ col }} == 0, 1, {{ col }}),
                lg = dplyr::lag(copy, n = n),
                "{{ col }}_ror" := copy / lg,
                copy = NULL,
                lg = NULL, .after = {{ col }})
}

#' Calculate geometric mean (average rate of return)
#' For use in conjunction with [ror()]
#' @param x numeric vector
#' @rdname calculations
#' @examplesIf interactive()
#' dplyr::filter(ex, group == "A") |>
#' ror(pay) |>
#' dplyr::summarise(gmean = geomean(pay_ror))
#'
#' ex |>
#' dplyr::group_by(group) |>
#' ror(pay) |>
#' dplyr::summarise(gmean = geomean(pay_ror))
#' @autoglobal
#' @export
#' @keywords internal
geomean <- function(x) exp(mean(log(x), na.rm = TRUE))

#' Calculate lagged values by column
#' @param df data frame
#' @param col column of numeric values to calculate lag
#' @param by column to calculate lag by
#' @param digits Number of digits to round to
#' @rdname calculations
#' @examplesIf interactive()
#' dplyr::filter(ex, group == "A") |>
#' change_year(pay, year)
#' @autoglobal
#' @noRd
change_year <- function(df, col, by = year, digits = 3) {

  newcol <- rlang::englue("{{col}}_chg")
  newcol <- rlang::sym(newcol)

  df |>
    dplyr::mutate(
      "{{ col }}_chg" := {{ col }} - dplyr::lag({{ col }},
                                                order_by = {{ by }}),
      "{{ col }}_pct" := !!newcol / dplyr::lag({{ col }}, order_by = {{ by }}),
      .after = {{ col }}) |>
    dplyr::mutate(dplyr::across(
      dplyr::where(is.double), ~janitor::round_half_up(., digits = digits)))
}

#' Calculate number of years since today's date
#' @param df data frame
#' @param date_col date column
#' @rdname calculations
#' @autoglobal
#' @export
#' @keywords internal
years_df <- function(df, date_col) {

  df |>
    dplyr::mutate(
      years_passed = round(
        as.double(
          difftime(
            lubridate::today(),
            {{ date_col }},
            units = "weeks",
            tz = "UTC")) / 52.17857, 2),
      .after = {{ date_col }})
}

#' Calculate number of years since today's date
#' @param date_col date column
#' @rdname calculations
#' @examplesIf interactive()
#' dt <- dplyr::tibble(date = lubridate::today() - 366)
#' dplyr::mutate(dt, years = years_vec(date))
#' @autoglobal
#' @export
#' @keywords internal
years_vec <- function(date_col) {
  round(
    as.double(
      difftime(
        lubridate::today(),
        date_col,
        units = "weeks",
        tz = "UTC")) / 52.17857, 2)
}

#' Calculate duration since today's date
#' @param date_col date column
#' @rdname calculations
#' @examplesIf interactive()
#' dplyr::tibble(date = lubridate::today() - 366,
#'               date2 = date - 789) |>
#' dplyr::mutate(dur = duration_vec(date),
#'               dur2 = duration_vec(date2))
#' @autoglobal
#' @export
#' @keywords internal
duration_vec <- function(date_col) {
  date <- difftime(date_col, lubridate::today(), units = "auto", tz = "UTC")
  date <- lubridate::as.duration(date)
  return(date)
}

#' Create interval and period columns from a start and end date
#' @param df data frame
#' @param start start date column
#' @param end end date column
#' @rdname calculations
#' @examplesIf interactive()
#' dt <- dplyr::tibble(date = lubridate::today() - 366)
#' make_interval(dt, start = date)
#' @autoglobal
#' @export
#' @keywords internal
# nocov start
make_interval <- function(df, start, end = lubridate::today()) {
  dplyr::mutate(df,
                interval = lubridate::interval(
                  lubridate::ymd({{ start }}), lubridate::ymd({{ end }})),
                period = lubridate::as.period(interval),
                timelength_days = lubridate::time_length(interval, unit = "days"))
}
# nocov end
#' Summary stats
#' @param df data frame
#' @param condition filter condition, i.e. `patient == "new"`
#' @param group_vars variables to group by, i.e. `c(specialty, state, hcpcs, cost)`
#' @param summary_vars variables to summarise, i.e. `c(min, max, mode, range)`
#' @param arr column to arrange data by, i.e. `cost`
#' @param digits Number of digits to round to, default is 3
#' @rdname calculations
#' @autoglobal
#' @export
#' @keywords internal
summary_stats <- function(df,
                          condition = NULL,
                          group_vars = NULL,
                          summary_vars = NULL,
                          arr = NULL,
                          digits = 3) {

  results <- df |>
    dplyr::filter({{ condition }}) |>
    dplyr::summarise(
      dplyr::across({{ summary_vars }},
                    list(median = \(x) stats::median(x, na.rm = TRUE),
                         mean = \(x) mean(x, na.rm = TRUE),
                         sd = \(x) stats::sd(x, na.rm = TRUE)),
                    .names = "{.col}_{.fn}"),
      n = dplyr::n(),
      .by = ({{ group_vars }})) |>
    dplyr::arrange(dplyr::desc({{ arr }})) |>
    dplyr::mutate(dplyr::across(
      dplyr::where(is.double), ~janitor::round_half_up(., digits = digits)))

  return(results)
}


#' Generate tibble of data for testing
#' @param years sequence of years, e.g. `2010:2020`
#' @rdname calculations
#' @returns tibble
#' @autoglobal
#' @export
#' @keywords internal
# nocov start
gen_data <- function(years) {
  lng <- length(years) * 2
  vctrs::vec_rbind(
    dplyr::tibble(year = {{ years }}, group = "A"),
    dplyr::tibble(year = {{ years }}, group = "B")) |>
    dplyr::mutate(pay = sample(1000:2000, lng))
}
# nocov end

@andrewallenbruce andrewallenbruce self-assigned this Apr 4, 2024
@andrewallenbruce
Copy link
Owner Author

Add Helper function to transpose yearly data

library(provider)
library(dplyr)
library(tidyr)

performance <- utilization_(npi = 1043477615, type = "provider") |> 
  unnest(performance) |> 
  mutate(year = as.integer(year)) |> 
  select(year, tot_hcpcs:.pymt_per_srvc)

#> # A tibble: 8 × 12
#>    year tot_hcpcs tot_benes tot_srvcs tot_charges tot_allowed tot_payment
#>   <int>     <int>     <int>     <int>       <dbl>       <dbl>       <dbl>
#> 1  2014        45       598       823      319401      42429.      33775.
#> 2  2015        54      1042      1449      551630      82729.      64720.
#> 3  2016        62       619      1000      653517     111283.      87144.
#> 4  2017        65       606       972      460677      88160.      68173.
#> 5  2018        54       505      1034      504640     102857.      80079.
#> 6  2019        55       532      1252      617797     134101.     104987.
#> 7  2020        57       650      1260      482488     106512.      81868.
#> 8  2021        58       748      1369      444671      97159.      75295.
#> # ℹ 5 more variables: tot_std_pymt <dbl>, .copay_deduct <dbl>,
#> #   .srvcs_per_bene <dbl>, .pymt_per_bene <dbl>, .pymt_per_srvc <dbl>

performance |> 
  pivot_longer(!year,
               names_to = "measure", 
               values_to = "value") |> 
  pivot_wider(names_from = year, 
              values_from = value)

#> # A tibble: 11 × 9
#>    measure            `2014`    `2015` `2016` `2017` `2018` `2019` `2020` `2021`
#>    <chr>               <dbl>     <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#>  1 tot_hcpcs           45        54    6.2 e1 6.5 e1 5.4 e1 5.5 e1 5.7 e1 5.8 e1
#>  2 tot_benes          598      1042    6.19e2 6.06e2 5.05e2 5.32e2 6.5 e2 7.48e2
#>  3 tot_srvcs          823      1449    1   e3 9.72e2 1.03e3 1.25e3 1.26e3 1.37e3
#>  4 tot_charges     319401    551630    6.54e5 4.61e5 5.05e5 6.18e5 4.82e5 4.45e5
#>  5 tot_allowed      42429.    82729.   1.11e5 8.82e4 1.03e5 1.34e5 1.07e5 9.72e4
#>  6 tot_payment      33775.    64720.   8.71e4 6.82e4 8.01e4 1.05e5 8.19e4 7.53e4
#>  7 tot_std_pymt     34339.    66427.   8.93e4 6.95e4 8.37e4 1.06e5 8.26e4 7.55e4
#>  8 .copay_deduct     8654.    18009.   2.41e4 2.00e4 2.28e4 2.91e4 2.46e4 2.19e4
#>  9 .srvcs_per_bene      1.38      1.39 1.62e0 1.60e0 2.05e0 2.35e0 1.94e0 1.83e0
#> 10 .pymt_per_bene      56.5      62.1  1.41e2 1.12e2 1.59e2 1.97e2 1.26e2 1.01e2
#> 11 .pymt_per_srvc      41.0      44.7  8.71e1 7.01e1 7.74e1 8.39e1 6.50e1 5.50e1

@andrewallenbruce andrewallenbruce changed the title Move utility functions to {himni} package Move utility functions to fuimus package Apr 10, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant