Skip to content

Commit

Permalink
fixing thingies in 1.1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
BlasBenito committed May 13, 2021
1 parent bebd65a commit d9c58da
Show file tree
Hide file tree
Showing 35 changed files with 453 additions and 444 deletions.
16 changes: 10 additions & 6 deletions R/auto_cor.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,19 @@ auto_cor <- function(

#message
if(verbose == TRUE){
message(
paste0(
"[auto_cor()]: Removed variables: ",
if(length(removed.vars) != 0){
message(
paste0(
removed.vars,
collapse = ", "
"[auto_cor()]: Removed variables: ",
paste0(
removed.vars,
collapse = ", "
)
)
)
)
} else {
message("[auto_cor()]: Variables are not collinear.")
}
}

#return output
Expand Down
16 changes: 10 additions & 6 deletions R/auto_vif.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,19 @@ auto_vif <- function(
#message
if(verbose == TRUE){
removed.vars <- setdiff(colnames(x), output.list$selected.variables)
message(
paste0(
"[auto_vif()]: Removed variables: ",
if(length(removed.vars) != 0){
message(
paste0(
removed.vars,
collapse = ", "
"[auto_vif()]: Removed variables: ",
paste0(
removed.vars,
collapse = ", "
)
)
)
)
} else {
message("[auto_vif()]: Variables are not collinear.")
}
}

#adding class
Expand Down
7 changes: 7 additions & 0 deletions R/filter_spatial_predictors.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ filter_spatial_predictors <- function(
cor.threshold = 0.50
){

#predictor.variable.names comes from auto_vif or auto_cor
if(!is.null(predictor.variable.names)){
if(inherits(predictor.variable.names, "variable_selection")){
predictor.variable.names <- predictor.variable.names$selected.variables
}
}

#filtering spatial predictors by pair-wise correlation
spatial.predictors.df <- auto_cor(
x = spatial.predictors.df,
Expand Down
7 changes: 7 additions & 0 deletions R/plot_training_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ plot_training_df <- function(
stop("No variables to plot.")
}

#predictor.variable.names comes from auto_vif or auto_cor
if(!is.null(predictor.variable.names)){
if(inherits(predictor.variable.names, "variable_selection")){
predictor.variable.names <- predictor.variable.names$selected.variables
}
}

plot.list <- list()
for(variable in predictor.variable.names){
plot.list[[variable]] <- ggplot2::ggplot(
Expand Down
7 changes: 7 additions & 0 deletions R/plot_training_df_moran.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ plot_training_df_moran <- function(
stop("No variables to plot.")
}

#predictor.variable.names comes from auto_vif or auto_cor
if(!is.null(predictor.variable.names)){
if(inherits(predictor.variable.names, "variable_selection")){
predictor.variable.names <- predictor.variable.names$selected.variables
}
}

if(is.null(distance.matrix)){
stop("distance.matrix is missing.")
}
Expand Down
7 changes: 7 additions & 0 deletions R/rank_spatial_predictors.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ rank_spatial_predictors <- function(
cluster.port = "11000"
){

#predictor.variable.names comes from auto_vif or auto_cor
if(!is.null(predictor.variable.names)){
if(inherits(predictor.variable.names, "variable_selection")){
predictor.variable.names <- predictor.variable.names$selected.variables
}
}

#testing method argument
ranking.method <- match.arg(
arg = ranking.method,
Expand Down
19 changes: 5 additions & 14 deletions R/rf_interactions.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,10 @@ rf_interactions <- function(
y <- NULL

#predictor.variable.names comes from auto_vif or auto_cor
if(inherits(predictor.variable.names, "variable_selection")){
predictor.variable.names <- predictor.variable.names$selected.variables
if(!is.null(predictor.variable.names)){
if(inherits(predictor.variable.names, "variable_selection")){
predictor.variable.names <- predictor.variable.names$selected.variables
}
}

#fitting model
Expand Down Expand Up @@ -562,19 +564,8 @@ rf_interactions <- function(

}

#plot list of plots
if(length(plot.list) == 1){
plot.list.out <- plot.list[[1]]
}
if(length(plot.list) == 2){
plot.list.out <- patchwork::wrap_plots(plot.list)
}
if(length(plot.list) > 2){
plot.list.out <- patchwork::wrap_plots(plot.list)
}

if(verbose == TRUE){
print(plot.list.out)
print(patchwork::wrap_plots(plot.list))
}

#generating training df
Expand Down
8 changes: 8 additions & 0 deletions R/rf_repeat.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ rf_repeat <- function(

#getting arguments from model rather than ranger.arguments
if(!is.null(model)){

ranger.arguments <- model$ranger.arguments
data <- ranger.arguments$data
dependent.variable.name <- ranger.arguments$dependent.variable.name
Expand All @@ -137,6 +138,13 @@ rf_repeat <- function(

}

#predictor.variable.names comes from auto_vif or auto_cor
if(!is.null(predictor.variable.names)){
if(inherits(predictor.variable.names, "variable_selection")){
predictor.variable.names <- predictor.variable.names$selected.variables
}
}

if(is.null(ranger.arguments)){
ranger.arguments <- list()
}
Expand Down
7 changes: 7 additions & 0 deletions R/select_spatial_predictors_recursive.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ select_spatial_predictors_recursive <- function(
cluster.port = "11000"
){

#predictor.variable.names comes from auto_vif or auto_cor
if(!is.null(predictor.variable.names)){
if(inherits(predictor.variable.names, "variable_selection")){
predictor.variable.names <- predictor.variable.names$selected.variables
}
}

#preparing fast ranger arguments
if(is.null(ranger.arguments)){
ranger.arguments <- list()
Expand Down
7 changes: 7 additions & 0 deletions R/select_spatial_predictors_sequential.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@ select_spatial_predictors_sequential <- function(
cluster.port = "11000"
){

#predictor.variable.names comes from auto_vif or auto_cor
if(!is.null(predictor.variable.names)){
if(inherits(predictor.variable.names, "variable_selection")){
predictor.variable.names <- predictor.variable.names$selected.variables
}
}

#getting spatial.predictors.rank
spatial.predictors.ranking <- spatial.predictors.ranking$ranking

Expand Down
23 changes: 11 additions & 12 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ always_allow_html: yes

# Introduction

The package **spatialRF** facilitates fitting spatial regression models on regular or irregular data with Random Forest. It does so by generating *spatial predictors* that help the model "understand" the spatial structure of the training data with the end goal of minimizing the spatial autocorrelation of the model residuals and offering honest ariable importance scores.
The package **spatialRF** facilitates fitting spatial regression models on regular or irregular data with Random Forest. It does so by generating *spatial predictors* that help the model "understand" the spatial structure of the training data with the end goal of minimizing the spatial autocorrelation of the model residuals and offering honest variable importance scores.

Two main methods to generate *spatial predictors* from the distance matrix of the data points are implemented in the package:

- Moran's Eigenvector Maps [(Dray, Legendre, and Peres-Neto 2006)](https://www.sciencedirect.com/science/article/abs/pii/S0304380006000925).
- Distance matrix columns as explanatory variables [(Hengl et al. 2018)](https://peerj.com/articles/5518/).

The package is designed to minimize the amount of code required to fit a spatial model from a training dataset, the names of the response and the predictors, and a distance matrix, as the example below shows.
The package is designed to minimize the code required to fit a spatial model from a training dataset, the names of the response and the predictors, and a distance matrix, as shown below.

```{r, eval=FALSE}
spatial.model <- spatialRF::rf_spatial(
Expand Down Expand Up @@ -117,7 +117,7 @@ library(pdp)
The data required to fit random forest models with `spatialRF` must fulfill several conditions:

+ **The input format is data.frame**. At the moment, tibbles are not fully supported.
+ **The number of rows must be somewhere between 100 and ~5000**, at least if your target is fitting spatial models. This limitation comes from the fact that the distance matrix grows in size very fast with increasing number of training records, so for large datasets there might not be enough RAM memory in your machine.
+ **The number of rows must be somewhere between 100 and ~5000**, at least if your target is fitting spatial models. This limitation comes from the fact that the distance matrix grows very fast with an increasing number of training records, so for large datasets, there might not be enough RAM in your machine.
+ **The number of predictors should be larger than 3**. Fitting a Random Forest model is moot otherwise.
+ **Factors in the response or the predictors are not explicitly supported in the package**. They may work, or they won't, but in any case, I designed this package for quantitative data alone. However, binary responses with values 0 and 1 are partially supported.
+ **Must be free of `NA`**. You can check if there are NA records with `sum(apply(df, 2, is.na))`. If the result is larger than 0, then just execute `df <- na.omit(df)` to remove rows with empty cells.
Expand All @@ -140,7 +140,7 @@ The package follows a convention throughout functions:
It is therefore convenient to define these arguments at the beginning of the workflow.

```{r}
#loading ddata
#loading training data and distance matrix from the package
data(plant_richness_df)
data(distance_matrix)
Expand All @@ -154,15 +154,13 @@ xy <- plant_richness_df[, c("x", "y")]
#distance matrix
distance.matrix <- distance_matrix
#distance thresholds
#distance thresholds (same units as distance_matrix)
distance.thresholds <- c(0, 1000, 2000, 4000, 8000)
#random seed for reproducibility
random.seed <- 100
random.seed <- 1
```



The response variable of `plant_richness_df` is "richness_species_vascular", that represents the total count of vascular plant species found on each ecoregion. The figure below shows the centroids of each ecoregion along with their associated value of the response variable.

```{r, echo=TRUE, message=FALSE, warning=FALSE, fig.width=6, fig.height=5.5}
Expand Down Expand Up @@ -258,7 +256,7 @@ predictor.variable.names <- spatialRF::auto_cor(
)
```

The output of `auto_cor()` or `auto_vif()` has the class "variable_selection", that can be used as input for the argument `predictor.variable.names` of any modeling function within the package.
The output of `auto_cor()` or `auto_vif()` has the class "variable_selection", which can be used as input in every function having the argument `predictor.variable.names`.

```{r}
names(predictor.variable.names)
Expand All @@ -281,14 +279,15 @@ The function [`rf_interactions()`](https://blasbenito.github.io/spatialRF/refere
interactions <- rf_interactions(
data = plant_richness_df,
dependent.variable.name = dependent.variable.name,
predictor.variable.names = predictor.variable.names$selected.variables,
predictor.variable.names = predictor.variable.names,
cor.threshold = 0.75,
verbose = FALSE
seed = random.seed,
verbose = TRUE
)
```

```{r, echo=TRUE, fig.width = 5, fig.height = 4}
patchwork::wrap_plots(interactions$plot)
interactions$plot
```
Here `rf_interactions()` suggests several candidate interactions ordered by their impact on the model. Interactions computed via multiplication are named `a..x..b`, while interactions computed via PCA are named `a..pca..b`. The function cannot say whether an interaction *makes sense*, and it is up to the user to choose wisely whether to select an interaction or not.

Expand Down
Loading

0 comments on commit d9c58da

Please sign in to comment.