handle for non-existent data

JaseZiv · Jan 14, 2024 · f8e4160 · f8e4160
1 parent 22069a7
commit f8e4160
Showing 1 changed file with 38 additions and 37 deletions.
diff --git a/R/tm_player_absence.R b/R/tm_player_absence.R
@@ -2,55 +2,60 @@
 #' @importFrom xml2 read_html
 #' @importFrom purrr pluck
 #' @importFrom janitor clean_names
-#' @importFrom dplyr mutate across
+#' @importFrom dplyr mutate across mutate_all
 .tm_each_absence_page <- function(absence_page_url) {
 
  absence_pg <- xml2::read_html(absence_page_url)
 
  # get the main table, knowing that come columns won't be returned
  main_df <- absence_pg |> rvest::html_elements("#yw1 .items") |> rvest::html_table() |> data.frame()
 
- # create an object of each table row and the col headings
- tab_rows <- absence_pg |> rvest::html_elements("#yw1 .items tbody tr")
- tab_head <- absence_pg |> rvest::html_elements("#yw1 .items tr th") |> rvest::html_text()
+ if(nrow(main_df) > 0) {
 
- # index of columns we need to get extra html elements for
- competition_idx <- grep("competition", tolower(tab_head))
- club_missed_idx <- grep("games missed", tolower(tab_head))
+  # create an object of each table row and the col headings
+  tab_rows <- absence_pg |> rvest::html_elements("#yw1 .items tbody tr")
+  tab_head <- absence_pg |> rvest::html_elements("#yw1 .items tr th") |> rvest::html_text()
 
+ # index of columns we need to get extra html elements for
+ competition_idx <- grep("competition", tolower(tab_head))
+ club_missed_idx <- grep("games missed", tolower(tab_head))
 
- # parse competiton name
- comp_name <- c()
- for(i in 1:length(tab_rows)) {
- each <- tab_rows[i] |>
- rvest::html_elements("td") |>
- purrr::pluck(competition_idx) |>
- rvest::html_elements("img") |> rvest::html_attr("title") |>
- .replace_empty_na()
 
- comp_name <- c(comp_name, each)
- }
+ # parse competiton name
+ comp_name <- c()
+ for(i in 1:length(tab_rows)) {
+ each <- tab_rows[i] |>
+ rvest::html_elements("td") |>
+ purrr::pluck(competition_idx) |>
+ rvest::html_elements("img") |> rvest::html_attr("title") |>
+ .replace_empty_na()
+
+ comp_name <- c(comp_name, each)
+ }
 
 
- # parse team name
- club_name <- c()
- for(i in 1:length(tab_rows)) {
- each <- tab_rows[i] |>
- rvest::html_elements("td") |>
- purrr::pluck(club_missed_idx) |>
- rvest::html_elements("a") |> rvest::html_attr("title") |>
- .replace_empty_na()
+ # parse team name
+ club_name <- c()
+ for(i in 1:length(tab_rows)) {
+ each <- tab_rows[i] |>
+ rvest::html_elements("td") |>
+ purrr::pluck(club_missed_idx) |>
+ rvest::html_elements("a") |> rvest::html_attr("title") |>
+ .replace_empty_na()
+
+ club_name <- c(club_name, each)
+ }
 
- club_name <- c(club_name, each)
- }
 
+ main_df$Competition <- comp_name
+ main_df$club_missed <- club_name
 
- main_df$Competition <- comp_name
- main_df$club_missed <- club_name
+ main_df <- main_df |>
+ dplyr::mutate_all(as.character) |>
+ dplyr::mutate(dplyr::across(c(from, until), .tm_fix_dates)) |>
+ janitor::clean_names()
 
- main_df <- main_df |>
- dplyr::mutate(dplyr::across(c(from, until), .tm_fix_dates)) |>
- janitor::clean_names()
+ }
 
  return(main_df)
 
@@ -85,15 +90,11 @@ tm_get_player_absence <- function(player_urls) {
 
  .tm_each_players_absence <- function(player_url) {
 
- # pb$tick()
+ pb$tick()
 
  main_url <- "https://www.transfermarkt.com"
 
- # player_url <- "https://www.transfermarkt.com/cristian-romero/profil/spieler/355915"
- #
  # # change the url to point to the absences url
- # "https://www.transfermarkt.com/cristian-romero/ausfaelle/spieler/355915"
-
  player_url_changed <- gsub("profil", "ausfaelle", player_url)