Skip to content

Commit

Permalink
parse from comment
Browse files Browse the repository at this point in the history
  • Loading branch information
tonyelhabr committed Jan 17, 2024
1 parent dbd52b2 commit 912875a
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 32 deletions.
31 changes: 20 additions & 11 deletions R/chromote-fbref.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ WorldfootballRDynamicPage <- R6::R6Class("WorldfootballRDynamicPage", public = l
unlist(self$session$DOM$querySelectorAll(self$root_id, css)$nodeIds)
},

call_node_method = function(node_id) {
js_fun <- paste0("function() { return this.outerHTML}")
call_node_method = function(node_id, method, ...) {
js_fun <- paste0("function() { return this", method, "}")
obj_id <- self$object_id(node_id)
self$session$Runtime$callFunctionOn(js_fun, objectId = obj_id)
self$session$Runtime$callFunctionOn(js_fun, objectId = obj_id, ...)
},

object_id = function(node_id) {
Expand All @@ -41,14 +41,23 @@ WorldfootballRDynamicPage <- R6::R6Class("WorldfootballRDynamicPage", public = l
#' @importFrom purrr map_chr
#' @importFrom xml2 xml_children read_html
#' @noRd
worldfootballr_html_page <- function(x) {
stopifnot(identical(class(x), c("WorldfootballRDynamicPage", "R6")))
nodes <- x$find_nodes("table")

elements <- purrr::map_chr(nodes, function(node_id) {
json <- x$call_node_method(node_id)
json$result$value
})
worldfootballr_html_player_table <- function(session) {
stopifnot(identical(class(session), c("WorldfootballRDynamicPage", "R6")))

## find element "above" commented out table
node_id1 <- session$find_nodes("#stats_shooting_sh")
## find element "below" commented out table
node_id2 <- session$find_nodes("#stats_shooting_control")
## find commented out element in-between
node_id <- round((node_id1 + node_id2) / 2)

elements <- session$call_node_method(node_id, ".textContent")[['result']][['value']]
n_elements <- length(elements)
if (n_elements != 1) {
warning(sprintf("Did not find the expected number of tables on the page (3). Found %s.", n_elements))
return(NULL)
}

html <- paste0("<html>", paste0(elements, collapse = "\n"), "</html>")
xml2::read_html(html)
}
39 changes: 18 additions & 21 deletions R/fb_league_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,33 +37,30 @@
.frequency_id = "fb_league_stats-player"
)
session <- worldfootballr_chromote_session(url)
page <- worldfootballr_html_page(session)
player_table <- worldfootballr_html_player_table(session)
session$session$close(wait_ = FALSE)
elements <- xml2::xml_children(xml2::xml_children(page))
tables <- rvest::html_table(elements)

n_tables <- length(tables)
if (n_tables != 3) {
warning(sprintf("Did not find the expected number of tables on the page (3). Found %s.", n_tables))
if (is.null(player_table)) {
return(tibble::tibble())
}
renamed_table <- .rename_fb_cols(tables[[3]])
renamed_table <- renamed_table[renamed_table$Rk != "Rk", ]
renamed_table <- .add_player_href(
renamed_table,
parent_element = elements[[3]],

player_table_elements <- xml2::xml_children(xml2::xml_children(player_table))
parsed_player_table <- rvest::html_table(player_table_elements)
renamed_player_table <- worldfootballR:::.rename_fb_cols(parsed_player_table[[1]])
renamed_table <- renamed_player_table[renamed_player_table$Rk != "Rk", ]
renamed_player_table <- worldfootballR:::.add_player_href(
renamed_player_table,
parent_element = player_table_elements,
player_xpath = ".//tbody/tr/td[@data-stat='player']/a"
)
}

suppressMessages(
readr::type_convert(
clean_table,
guess_integer = TRUE,
na = "",
trim_ws = TRUE
suppressMessages(
readr::type_convert(
renamed_player_table,
guess_integer = TRUE,
na = "",
trim_ws = TRUE
)
)
)
}
}


Expand Down

0 comments on commit 912875a

Please sign in to comment.