Skip to content

Commit

Permalink
handle for non-existent data
Browse files Browse the repository at this point in the history
  • Loading branch information
JaseZiv committed Jan 14, 2024
1 parent 22069a7 commit f8e4160
Showing 1 changed file with 38 additions and 37 deletions.
75 changes: 38 additions & 37 deletions R/tm_player_absence.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,55 +2,60 @@
#' @importFrom xml2 read_html
#' @importFrom purrr pluck
#' @importFrom janitor clean_names
#' @importFrom dplyr mutate across
#' @importFrom dplyr mutate across mutate_all
.tm_each_absence_page <- function(absence_page_url) {

absence_pg <- xml2::read_html(absence_page_url)

# get the main table, knowing that come columns won't be returned
main_df <- absence_pg |> rvest::html_elements("#yw1 .items") |> rvest::html_table() |> data.frame()

# create an object of each table row and the col headings
tab_rows <- absence_pg |> rvest::html_elements("#yw1 .items tbody tr")
tab_head <- absence_pg |> rvest::html_elements("#yw1 .items tr th") |> rvest::html_text()
if(nrow(main_df) > 0) {

# index of columns we need to get extra html elements for
competition_idx <- grep("competition", tolower(tab_head))
club_missed_idx <- grep("games missed", tolower(tab_head))
# create an object of each table row and the col headings
tab_rows <- absence_pg |> rvest::html_elements("#yw1 .items tbody tr")
tab_head <- absence_pg |> rvest::html_elements("#yw1 .items tr th") |> rvest::html_text()

# index of columns we need to get extra html elements for
competition_idx <- grep("competition", tolower(tab_head))
club_missed_idx <- grep("games missed", tolower(tab_head))

# parse competiton name
comp_name <- c()
for(i in 1:length(tab_rows)) {
each <- tab_rows[i] |>
rvest::html_elements("td") |>
purrr::pluck(competition_idx) |>
rvest::html_elements("img") |> rvest::html_attr("title") |>
.replace_empty_na()

comp_name <- c(comp_name, each)
}
# parse competiton name
comp_name <- c()
for(i in 1:length(tab_rows)) {
each <- tab_rows[i] |>
rvest::html_elements("td") |>
purrr::pluck(competition_idx) |>
rvest::html_elements("img") |> rvest::html_attr("title") |>
.replace_empty_na()

comp_name <- c(comp_name, each)
}


# parse team name
club_name <- c()
for(i in 1:length(tab_rows)) {
each <- tab_rows[i] |>
rvest::html_elements("td") |>
purrr::pluck(club_missed_idx) |>
rvest::html_elements("a") |> rvest::html_attr("title") |>
.replace_empty_na()
# parse team name
club_name <- c()
for(i in 1:length(tab_rows)) {
each <- tab_rows[i] |>
rvest::html_elements("td") |>
purrr::pluck(club_missed_idx) |>
rvest::html_elements("a") |> rvest::html_attr("title") |>
.replace_empty_na()

club_name <- c(club_name, each)
}

club_name <- c(club_name, each)
}

main_df$Competition <- comp_name
main_df$club_missed <- club_name

main_df$Competition <- comp_name
main_df$club_missed <- club_name
main_df <- main_df |>
dplyr::mutate_all(as.character) |>
dplyr::mutate(dplyr::across(c(from, until), .tm_fix_dates)) |>
janitor::clean_names()

main_df <- main_df |>
dplyr::mutate(dplyr::across(c(from, until), .tm_fix_dates)) |>
janitor::clean_names()
}

return(main_df)

Expand Down Expand Up @@ -85,15 +90,11 @@ tm_get_player_absence <- function(player_urls) {

.tm_each_players_absence <- function(player_url) {

# pb$tick()
pb$tick()

main_url <- "https://www.transfermarkt.com"

# player_url <- "https://www.transfermarkt.com/cristian-romero/profil/spieler/355915"
#
# # change the url to point to the absences url
# "https://www.transfermarkt.com/cristian-romero/ausfaelle/spieler/355915"

player_url_changed <- gsub("profil", "ausfaelle", player_url)


Expand Down

0 comments on commit f8e4160

Please sign in to comment.