From a626a9469d4590b5b664c9bf3c1a597f531dcbb7 Mon Sep 17 00:00:00 2001 From: Yuanchao-Xu/hyfo Date: Wed, 9 Sep 2015 21:11:57 +0200 Subject: [PATCH] Extract months and years from time series. In order to provide obs for the biascorrection --- .Rhistory | 68 +++++------ .Rproj.user/132DF987/pcs/source-pane.pper | 2 +- .Rproj.user/132DF987/pcs/workbench-pane.pper | 2 +- .Rproj.user/132DF987/sdb/per/t/51993C6D | 18 --- .Rproj.user/132DF987/sdb/per/t/579C533F | 18 --- .Rproj.user/132DF987/sdb/per/t/606BD0E | 17 --- .Rproj.user/132DF987/sdb/per/t/9038F880 | 17 --- .../132DF987/sdb/per/t/{E7F036D3 => A902E596} | 12 +- .Rproj.user/132DF987/sdb/per/t/A9B56DE9 | 6 +- .Rproj.user/132DF987/sdb/per/t/E6E4511D | 17 +++ .Rproj.user/132DF987/sdb/per/t/EAF9DB09 | 18 --- DESCRIPTION | 2 +- R/biasCorrect.R | 2 + R/extractPeriod.R | 114 ++++++++++++++---- R/getSpatialMap.R | 2 + man/biasCorrect.Rd | 2 + man/extractPeriod.Rd | 39 +++++- man/extractPeriod_dataframe.Rd | 22 ---- vignettes/hyfo.Rmd | 4 +- 19 files changed, 198 insertions(+), 184 deletions(-) delete mode 100644 .Rproj.user/132DF987/sdb/per/t/51993C6D delete mode 100644 .Rproj.user/132DF987/sdb/per/t/579C533F delete mode 100644 .Rproj.user/132DF987/sdb/per/t/606BD0E delete mode 100644 .Rproj.user/132DF987/sdb/per/t/9038F880 rename .Rproj.user/132DF987/sdb/per/t/{E7F036D3 => A902E596} (85%) create mode 100644 .Rproj.user/132DF987/sdb/per/t/E6E4511D delete mode 100644 .Rproj.user/132DF987/sdb/per/t/EAF9DB09 delete mode 100644 man/extractPeriod_dataframe.Rd diff --git a/.Rhistory b/.Rhistory index de58d86..f36142c 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,37 +1,3 @@ -# For every column, it's biascorrected respectively. -frc_data <- lapply(2:n, function(x) biasCorrect_core(frc[, x], hindcast[, x], obs[, 2], method = method, -scaleType = scaleType, preci = preci)) -frc_data <- do.call('cbind', frc_data) -} else stop('Wrong TS input, check your TS dimension.') -} else if (input == 'hyfo') { -print('Under development...') -} -names <- colnames(frc) -frc <- data.frame(frc[, 1], frc_data) -colnames(frc) <- names -return(frc) -} -# this is only used to calculate the value column, -biasCorrect_core <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', preci = FALSE){ -# default is the simplest method in biascorrection, just do simple addition and subtraction. -if (method == 'delta') { -# comes from downscaleR biascorrection method -frcMean <- mean(obs[, 2], na.rm = TRUE) -hindcastMean <- mean(hindcast[, 2], na.rm = TRUE) -frc <- obs - hindcastMean + frcMean -} else if (method == 'scaling') { -obsMean <- mean(obs, na.rm = TRUE) -hindcastMean <- mean(hindcast, na.rm = TRUE) -if (scaleType == 'multi') { -frc <- frc / hindcastMean * obsMean -} else if (scaleType == 'add') { -frc <- frc - hindcastMean + obsMean -} -} else if (method == 'eqm') { -# To be added, right now too complicated and not so much use. -} -return(frc) -} aaa <- biasCorrect(frc, hindcast, obs) debug(biasCorrect) aaa <- biasCorrect(frc, hindcast, obs) @@ -510,3 +476,37 @@ a <- getFrcEnsem(nc) a1 <- getFrcEnsem(tgridData) a <- getFrcEnsem(nc, cell = c(6,2)) b <- getFrcEnsem(nc, coord = c(-1.4, 43.2)) +devtools::install_github('Yuanchao-Xu/hyfo') +devtools::install_github('Yuanchao-Xu/hyfo') +library(hyfo) +?biasCorrect +?layer +?structure +structure(1:6, dim = 2:3) +a <- structure(1:6, dim = 2:3) +attr(a) +attrs(a) +attributes(a) +a <- structure(1:6, dim = 2:3, dimnames(dalkf,dafda)) +a <- structure(1:6, dim = 2:3, dimnames(1,2)) +a <- structure(1:6, dim = 2:3, dimnames(1)) +a <- structure(1:6, dim = 2:3, dimnames = 1: 3) +a <- structure(1:6, dim = 2:3, dimnames = c('dafa', 'dafda')) +?UseMethod() +devtools::document() +devtools::check() +devtools::document() +devtools::check() +devtools::document() +devtools::check() +devtools::document() +devtools::check() +devtools::document() +devtools::build() +install.packages("~/hyfo_1.1.8.tar.gz", repos = NULL, type = "source") +devtools::document() +devtools::check() +devtools::build() +install.packages("~/hyfo_1.1.9.tar.gz", repos = NULL, type = "source") +devtools::document() +devtools::check() diff --git a/.Rproj.user/132DF987/pcs/source-pane.pper b/.Rproj.user/132DF987/pcs/source-pane.pper index 1743e40..3249574 100644 --- a/.Rproj.user/132DF987/pcs/source-pane.pper +++ b/.Rproj.user/132DF987/pcs/source-pane.pper @@ -1,3 +1,3 @@ { - "activeTab" : 0 + "activeTab" : 2 } \ No newline at end of file diff --git a/.Rproj.user/132DF987/pcs/workbench-pane.pper b/.Rproj.user/132DF987/pcs/workbench-pane.pper index 6785be6..7135db5 100644 --- a/.Rproj.user/132DF987/pcs/workbench-pane.pper +++ b/.Rproj.user/132DF987/pcs/workbench-pane.pper @@ -1,4 +1,4 @@ { "TabSet1" : 0, - "TabSet2" : 3 + "TabSet2" : 0 } \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/51993C6D b/.Rproj.user/132DF987/sdb/per/t/51993C6D deleted file mode 100644 index b493ac7..0000000 --- a/.Rproj.user/132DF987/sdb/per/t/51993C6D +++ /dev/null @@ -1,18 +0,0 @@ -{ - "contents" : "#' Get ensemble forecast from historical data.\n#' \n#' getHisEnsem use historical data as the forecasting input time series.\n#' \n#' @param TS A time series dataframe, with first column Date, and second column value.\n#' @param example A vector containing two strings showing the start and end date, which represent the \n#' forecasting period. Check details for more information.\n#'\n#' the program will extract every possible period in TS you provided to generate the ensemble. Check details for \n#' more information.\n#' @param interval A number representing the interval of each ensemble member. NOTE: \"interval\" takes\n#' 365 as a year, and 30 as a month, regardless of leap year and months with 31 days. So if you want the interval \n#' to be 2 years, set \\code{interval = 730}, which equals 2 * 365 ; if two months, set \\code{interval = 60}; \n#' 2 days, \\code{interval = 2}, for other numbers that cannot be divided by 365 or 30 without remainder, it will treat the \n#' number as days.By defualt interval is set to be 365, a year.\n#' @param buffer A number showing how many days are used as buffer period for models. Check details for more\n#' information.\n#' \n#' @param plot A string showing whether the plot will be shown, e.g., 'norm' means normal plot (without any process), \n#' 'cum' means cummulative plot, default is 'norm'. For other words there will be no plot.\n#' @param output A string showing which type of output you want. Default is \"data\", if \"ggplot\", the \n#' data that can be directly plotted by ggplot2 will be returned, which is easier for you to make series\n#' plots afterwards. NOTE: If \\code{output = 'ggplot'}, the missing value in the data will\n#' be replaced by \\code{mv}, if assigned, default mv is 0.\n#' \n#' @param name If \\code{output = 'ggplot'}, name has to be assigned to your output, in order to differentiate\n#' different outputs in the later multiplot using \\code{getEnsem_comb}.\n#' \n#' @param mv A number showing representing the missing value. When calculating the cumulative value, \n#' missing value will be replaced by mv, default is 0.\n#' @param ... \\code{title, x, y} showing the title and x and y axis of the plot. e.g. \\code{title = 'aaa'}\n#' \n#' @details \n#' \n#' \\code{example} E.g., if you have a time series from 2000 to 2010. Assuming you are in 2003,\n#' you want to forecast the period from 2003-2-1 to 2003-4-1. Then for each year in your input\n#' time series, every year from 1st Feb to 1st Apr will be extracted to generate the ensemble\n#' forecasts. In this case your input example should be \\code{example = c('2003-2-1', '2003-4-1')}\n#' \n#' \\code{interval} doesn't care about leap year and the months with 31 days, it will take 365 as a year, and 30 as a month.\n#' e.g., if the interval is from 1999-2-1 to 1999-3-1, you should just set interval to 30, although the real interval is 28\n#' days.\n#' \n#' \\code{example} and \\code{interval} controls how the ensemble will be generated. e.g. if the time series is from \n#' 1990-1-1 to 2001-1-1.\n#' \n#' if \\code{example = c('1992-3-1', '1994-1-1')} and \\code{interval = 1095}, note, 1095 = 365 * 3, so the program treat\n#' this as 3 years.\n#' \n#' Then you are supposed to get the ensemble consisting of following part:\n#' \n#' 1. 1992-3-1 to 1994-1-1 first one is the example, and it's NOT start from 1990-3-1.\n#' 2. 1995-3-1 to 1997-1-1 second one starts from 1993, because \"interval\" is 3 years.\n#' 3. 1998-3-1 to 2000-1-1\n#' \n#' because the last one \"2000-3-1 to 2002-1-1\", 2002 exceeds the original TS range, so it will not be included.\n#' \n#' Sometimes, there are leap years and months with 31 days included in some ensemble part, in which case the length of the data will\n#' be different, e.g., 1999-1-1 to 1999-3-1 is 1 day less than 2000-1-1 to 2000-3-1. In this situation,\n#' the data will use example as a standard. If the example is 1999-1-1 to 1999-3-1, then the latter one\n#' will be changed to 2001-1-1 to 2000-2-29, which keeps the start Date and change the end Date.\n#' \n#' If the end date is so important that cannot be changed, try to solve this problem by resetting\n#' the example period, to make the event included in the example.\n#' \n#' Good set of example and interval can generate good ensemble.\n#' \n#' \\code{buffer}\n#' Sometimes the model needs to run for a few days to warm up, before the forecast. E.g., if a forecast starts at\n#' '1990-1-20', for some model like MIKE NAM model, the run needs to be started about 14 days. So the input timeseries\n#' should start from '1990-1-6'.\n#' \n#' Buffer is mainly used for the model hotstart. Sometimes the hot start file cannot contain all the parameters needed,\n#' only some important parameters. In this case, the model needs to run for some time, to make other parameters ready\n#' for the simulation.\n#' \n#' \n#' \\code{name}\n#' Assuming you have two ggplot outputs, you want to plot them together. In this situation, you\n#' need a name column to differentiate one ggplot output from the other. You can assigne this name\n#' by the argument directly, name has to be assigned if \\code{output = 'ggplot'} is selected,\n#' @return A ensemble time series using historical data as forecast.\n#' \n#' @examples\n#' \n#' data(testdl)\n#' \n#' a <- testdl[[1]]\n#' \n#' # Choose example from \"1994-2-4\" to \"1996-1-4\"\n#' b <- getHisEnsem(a, example = c('1994-2-4', '1996-1-4'))\n#' \n#' # Default interval is one year, can be set to other values, check help for information.\n#' \n#' # Take 7 months as interval\n#' b <- getHisEnsem(a, example = c('1994-2-4', '1996-1-4'), interval = 210, plot = 'cum') \n#' # Take 30 days as buffer\n#' b <- getHisEnsem(a, example = c('1994-2-4', '1996-1-4'), interval = 210, buffer = 30)\n#' @importFrom reshape2 melt \n#' @importFrom grDevices rainbow\n#' @import ggplot2\n#' @references \n#' Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software,\n#' 21(12), 1-20. URL http://www.jstatsoft.org/v21/i12/.\n#' \n#' H. Wickham. ggplot2: elegant graphics for data analysis. Springer New York, 2009.\n#' @export\n\ngetHisEnsem <- function (TS, example, interval = 365, buffer = 0, plot = 'norm', output = 'data', \n name = NULL, mv = 0, ...) {\n if (!grepl('-|/', TS[1, 1])) {\n stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} \n and use as.Date to convert.')\n } else if (!grepl('-|/', example[1]) | !grepl('-|/', example[1])) {\n stop('Wrong date format in the example, check the format in ?as.Date{base} \n and use as.Date to convert.')\n } else {\n \n \n \n TS[, 1] <- as.Date(TS[, 1])\n example <- as.Date(example ,tz = '')\n exL <- example[2] - example[1]\n # Test if example is in the range of the TS\n a <- which(TS[, 1] == example[1] | TS[, 1] == example[2])\n if (length(a) < 2) stop('Example is out of the time series, reset example.')\n \n \n \n if (interval %% 365 == 0) {\n d <- interval / 365\n \n # Get sequence of start and end date.\n \n startDate <- rev(seq(from = example[1], to = min(TS[, 1]), by = paste(-d, 'years')))\n endDate <- seq(from = example[2], to = max(TS[, 1]), by = paste(d, 'years'))\n\n n <- length(startDate) + length(endDate) - 1 # example is counted twice, should be subtracted. \n \n # Generate full start date series.\n startDate <- seq(min(startDate), length = n, by = paste(d, 'years'))\n endDate <- startDate + exL\n \n } else if (interval %% 30) {\n d <- interval / 30\n \n # Get sequence of start and end date.\n \n startDate <- rev(seq(from = example[1], to = min(TS[, 1]), by = paste(-d, 'months')))\n endDate <- seq(from = example[2], to = max(TS[, 1]), by = paste(d, 'months'))\n \n n <- length(startDate) + length(endDate) - 1\n \n startDate <- seq(min(startDate), length = n, by = paste(d, 'months'))\n endDate <- startDate + exL\n \n } else {\n d <- interval\n \n # Get sequence of start and end date.\n \n startDate <- rev(seq(from = example[1], to = min(TS[, 1]), by = paste(-d, 'days')))\n endDate <- seq(from = example[2], to = max(TS[, 1]), by = paste(d, 'days'))\n \n n <- length(startDate) + length(endDate) - 1\n \n startDate <- seq(min(startDate), length = n, by = paste(d, 'days'))\n endDate <- startDate + exL\n }\n \n data <- mapply(FUN = function(x, y) extractPeriod_dataframe(dataframe = TS, startDate = x, endDate = y),\n x = startDate, y = endDate)\n \n data <- lapply(1:n, function(x) data.frame(data[, x]))\n \n if (buffer > 0) {\n bufferStart <- example[1] - buffer\n bufferEnd <- example[1] - 1\n bufferTS <- extractPeriod_dataframe(TS, bufferStart, bufferEnd)\n \n data <- lapply(data, function(x) rbind(bufferTS, x))\n \n } else if (buffer < 0) {\n stop ('Buffer should be positive, or reset example.')\n }\n \n \n data_output <- list2Dataframe(data)\n colnames(data_output) <- c('Date', as.character(startDate))\n \n # Rearrange dataframe to make example the first column.\n ind <- match(c('Date', as.character(example[1])), colnames(data_output))\n # when use cbind, to ensure the output is also a dataframe, one inside cbind should be dataframe\n # Even output is alread a dataframe, but when ind is a single number, then output[ind] will\n # not be a dataframe, but an array.\n data_output <- cbind(data.frame(data_output[ind]), data_output[-ind])\n ex_date <- seq(from = example[1] - buffer, to = example[2], by = 1)\n data_output$Date <- ex_date\n colnames(data_output)[2] <- 'Observation'\n \n meanV <- apply(data_output[, 2:ncol(data_output)], MARGIN = 1, FUN = mean, na.rm = TRUE)\n \n data_output <- cbind(data.frame(Date = data_output[, 1]), Mean = meanV, \n data_output[, 2:ncol(data_output)])\n \n data_ggplot <- melt(data_output, id.var = 'Date')\n NAIndex <- is.na(data_ggplot$value)\n data_ggplot$nav <- rep(0, nrow(data_ggplot))\n data_ggplot$nav[NAIndex] <- 1\n \n if (plot == 'norm') {\n data_ggplot$value[NAIndex] <- mv\n \n } else if (plot == 'cum') {\n data_output[is.na(data_output)] <- mv\n cum <- cbind(data.frame(Date = data_output$Date), cumsum(data_output[2:ncol(data_output)]))\n \n data_ggplot <- melt(cum, id.var = 'Date')\n } else {\n stop('plot can only be \"norm\" or \"cum\", do not assign other words')\n }\n \n #generate different colors \n colors = c('brown1', 'dodgerblue3', rainbow(n = length(unique(data_ggplot$variable)) - 2,\n start = 0.1))\n \n theme_set(theme_bw())\n mainLayer <- with(data_ggplot, {\n ggplot(data = data_ggplot) +\n aes(x = Date, y = value, color = variable, group = variable) +\n geom_line(size = 0.5) +\n geom_line(data = data_ggplot[data_ggplot$variable == 'Observation', ], size = 1.6) +\n geom_line(data = data_ggplot[data_ggplot$variable == 'Mean', ], size = 1.6) +\n geom_point(data = data_ggplot[NAIndex, ], size = 3, shape = 4, color = 'black') +\n scale_color_manual(values = colors) +\n labs(empty = NULL, ...) +#in order to pass \"...\", arguments shouldn't be empty.\n theme(axis.text.x = element_text(size = rel(1.8)),\n axis.text.y = element_text(size = rel(1.8)),\n axis.title.x = element_text(size = rel(1.8)),\n axis.title.y = element_text(size = rel(1.8)))\n })\n print(mainLayer)\n \n if (output == 'ggplot') {\n if (is.null(name)) stop('\"name\" argument not found, \n If you choose \"ggplot\" as output, please assign a name.')\n data_ggplot$name <- rep(name, nrow(data_ggplot)) \n data_ggplot$nav <- rep(0, nrow(data_ggplot))\n data_ggplot$nav[NAIndex] <- 1\n\n return(data_ggplot)\n } else {\n return(data_output)\n }\n }\n}\n\n\n\n\n\n\n#' Extract time series from forecasting data.\n#' \n#' getFrcEnsem extract timeseries from forecasting data, if forecasting data has a member session\n#' an ensemble time sereis will be returned, if forecasting data doesn't have a member session, a singe time\n#' series will be returned.\n#' \n#' @param dataset A list containing different information, should be the result of reading netcdf file using\n#' \\code{library(ecomsUDG.Raccess)}.\n#' @param cell A vector containing the locaton of the cell, e.g. c(2, 3), default is \"mean\", representing\n#' the spatially averaged value. Check details for more information.\n#' @param plot A string showing whether the plot will be shown, e.g., 'norm' means normal plot (without any process), \n#' 'cum' means cummulative plot, default is 'norm'. For other words there will be no plot.\n#' @param output A string showing which type of output you want. Default is \"data\", if \"ggplot\", the \n#' data that can be directly plotted by ggplot2 will be returned, which is easier for you to make series\n#' plots afterwards. NOTE: If \\code{output = 'ggplot'}, the missing value in the data will\n#' be replaced by \\code{mv}, if assigned, default mv is 0.\n#' @param name If \\code{output = 'ggplot'}, name has to be assigned to your output, in order to differentiate\n#' different outputs in the later multiplot using \\code{getEnsem_comb}.\n#' @param mv A number showing representing the missing value. When calculating the cumulative value, \n#' missing value will be replaced by mv, default is 0.\n#' @param coord A coordinate of longitude and latitude. e.g. corrd = c(lon, lat). If coord is assigned,\n#' cell argument will no longer be used.\n#' @param ... \\code{title, x, y} showing the title and x and y axis of the plot. e.g. \\code{title = 'aaa'}\n#' \n#' @details \n#' \n#' \\code{cell} representing the location of the cell, NOTE: this location means the index of the cell,\n#' IT IS NOT THE LONGITUDE AND LATITUDE. e.g., \\code{cell = c(2, 3)}, the program will take the 2nd longitude\n#' and 3rd latitude, by the increasing order. Longitude comes first.\n#' \n#' \\code{name}\n#' Assuming you have two ggplot outputs, you want to plot them together. In this situation, you\n#' need a name column to differentiate one ggplot output from the other. You can assigne this name\n#' by the argument directly, If name is not assigned and \\code{output = 'ggplot'} is selected, then\n#' the system time will be selected as name column.\n#' \n#' @examples \n#' \n#' filePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n\n#' # Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\n#' varname <- getNcdfVar(filePath)\n#' nc <- loadNcdf(filePath, varname)\n#' a <- getFrcEnsem(nc)\n#' \n#' # If there is no member session in the dataset, a single time sereis will be extracted.\n#' a1 <- getFrcEnsem(tgridData)\n#' \n#' \n#' # The default output is spatially averaged, if there are more than one cells in the dataset, \n#' # the mean value of the cells will be calculated. While if you are interested in special cell, \n#' # you can assign the cell value. You can also directly use longitude and latitude to extract \n#' # time series.\n#' \n#' getSpatialMap(nc, 'mean')\n#' a <- getFrcEnsem(nc, cell = c(6,2))\n#' \n#' # From the map, cell = c(6, 2) means lon = -1.4, lat = 43.2, so you can use corrd to locate\n#' # your research area and extract time series.\n#' b <- getFrcEnsem(nc, coord = c(-1.4, 43.2))\n#' \n#' \n#' @return A ensemble time series extracted from forecating data.\n#' \n#' @import ggplot2\n#' @importFrom reshape2 melt\n#' @references \n#' H. Wickham. ggplot2: elegant graphics for data analysis. Springer New York, 2009.\n#' \n#' Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software,\n#' 21(12), 1-20. URL http://www.jstatsoft.org/v21/i12/.\n#' \n#' @export\ngetFrcEnsem <- function(dataset, cell = 'mean', plot = 'norm', output = 'data', name = NULL,\n mv = 0, coord = NULL, ...) {\n # cell should be a vector showing the location, or mean representing the loacation averaged.\n \n checkWord <- c('Data', 'xyCoords', 'Dates')\n if (any(is.na(match(checkWord, attributes(dataset)$names)))) {\n stop('Input dataset is incorrect, it should contain \"Data\", \"xyCoords\", and \"Dates\", \n check help for details.')\n }\n \n Date <- as.Date(dataset$Dates$start)\n data <- dataset$Data\n \n # Dimension needs to be arranged. Make sure first and second dimension is lat and lon.\n att <- attributes(data)$dimensions\n dimIndex <- seq(1, length(att))\n dimIndex1 <- match(c('lon', 'lat', 'time'), att)# match can apply to simple cases\n dimIndex2 <- dimIndex[-dimIndex1]# choose nomatch\n \n data <- aperm(data, c(dimIndex1, dimIndex2))\n attributes(data)$dimensions <- att[c(dimIndex1, dimIndex2)]\n \n if (!is.null(coord)) {\n if (length(coord) == 2) {\n # corrdinates\n lonC <- coord[1]\n latC <- coord[2]\n \n lon <- dataset$xyCoords$x\n lat <- dataset$xyCoords$y\n \n # Index\n lonI <- which(abs(lon - lonC) == min(abs(lon - lonC), na.rm = TRUE)) \n latI <- which(abs(lat - latC) == min(abs(lat - latC), na.rm = TRUE))\n \n cell <- c(max(lonI), max(latI))\n \n } else stop('Wrong coord input, should be c(lon, lat). Lon and lat should be within the dataset range.')\n } \n \n \n \n \n if (!any(attributes(data)$dimensions == 'member')){\n message('There is no member part in the dataset, there will be only one column of value\n returned.')\n \n if (length(cell) == 2) {\n data_ensem <- data[cell[1], cell[2], ]\n \n } else if (cell == 'mean') {\n data_ensem <- apply(data, MARGIN = 3, FUN = mean, na.rm = TRUE)\n # colnames <- 1:ncol(data_ensem)\n \n } else {\n stop('Wrong cell input, check help for information.')\n }\n \n } else {\n \n if (length(cell) == 2) {\n data_ensem <- data[cell[1], cell[2], , ]\n meanV <- apply(data_ensem, MARGIN = 1, FUN = mean, na.rm = TRUE)\n data_ensem <- data.frame('Mean' = meanV, data_ensem) \n \n } else if (cell == 'mean') {\n data_ensem <- apply(data, MARGIN = c(3, 4), FUN = mean, na.rm = TRUE)\n # colnames <- 1:ncol(data_ensem)\n meanV <- apply(data_ensem, MARGIN = 1, FUN = mean, na.rm = TRUE)\n data_ensem <- data.frame('Mean' = meanV, data_ensem)\n \n } else {\n stop('Wrong cell input, check help for information.')\n }\n }\n\n \n data_output <- data.frame(Date, data_ensem)\n data_ggplot <- melt(data_output, id.var = 'Date')\n NAIndex <- is.na(data_ggplot$value)\n \n \n if (plot == 'norm') {\n data_ggplot$value[NAIndex] <- mv\n } else if (plot == 'cum') {\n data_output[is.na(data_output)] <- mv\n cum <- cbind(data.frame(Date = data_output$Date), cumsum(data_output[2:ncol(data_output)]))\n \n data_ggplot <- melt(cum, id.var = 'Date')\n \n }\n \n colors = c('brown1', rainbow(n = length(unique(data_ggplot$variable)) - 1,\n start = 0.1))\n \n theme_set(theme_bw())\n mainLayer <- with(data_ggplot, {\n ggplot(data = data_ggplot) +\n aes(x = Date, y = value, color = variable) +\n geom_line(size = 0.5) +\n geom_line(data = data_ggplot[data_ggplot$variable == 'Mean', ], size = 1.6, color = 'red') +\n geom_point(data = data_ggplot[NAIndex, ], size = 2, shape = 4, color = 'black') +\n scale_color_manual(values = colors) +\n theme(axis.text.x = element_text(size = rel(1.8)),\n axis.text.y = element_text(size = rel(1.8)),\n axis.title.x = element_text(size = rel(1.8)),\n axis.title.y = element_text(size = rel(1.8))) +\n labs(empty = NULL, ...)#in order to pass \"...\", arguments shouldn't be empty.\n \n })\n print(mainLayer)\n \n if (output == 'ggplot') {\n if (is.null(name)) stop('\"name\" argument not found, \n If you choose \"ggplot\" as output, please assign a name.')\n \n data_ggplot$name <- rep(name, nrow(data_ggplot)) \n data_ggplot$nav <- rep(0, nrow(data_ggplot))\n data_ggplot$nav[NAIndex] <- 1\n return(data_ggplot)\n } else {\n return(data_output)\n }\n}\n\n\n\n#' Combine ensembles together\n#' @param ... different ensembles generated by \\code{getHisEnsem(, output = 'ggplot')} \n#' or \\code{getFrcEnsem(, output = 'ggplot')}, see details.\n#' @param nrow A number showing the number of rows.\n#' @param list If input is a list containing different ggplot data, use \\code{list = inputlist}.\n#' @param legend A boolean representing whether you want the legend. Sometimes when you combine\n#' plots, there will be a lot of legends, if you don't like it, you can turn it off by setting\n#' \\code{legend = FALSE}, default is TRUE.\n#' @param x A string of x axis name.\n#' @param y A string of y axis name.\n#' @param title A string of the title.\n#' @param output A boolean, if chosen TRUE, the output will be given.\n#' @return A combined ensemble plot.\n#' @export\n#' @import ggplot2\n#' @references \n#' H. Wickham. ggplot2: elegant graphics for data analysis. Springer New York, 2009.\n\ngetEnsem_comb <- function(..., list = NULL, nrow = 1, legend = TRUE, x = '', y = '', title = '', \n output = FALSE) {\n \n if (!is.null(list)) {\n data_ggplot <- do.call('rbind', list)\n } else {\n plots <- list(...)\n checkBind(plots, 'rbind')\n data_ggplot <- do.call('rbind', plots)\n } \n #data_ggplot$name <- factor(data_ggplot$name, levels = data_ggplot$name, ordered = TRUE)\n \n if (!class(data_ggplot) == 'data.frame') {\n warning('Your input is probably a list, but you forget to add \"list = \" before it.\n Try again, or check help for more information.')\n } else if (is.null(data_ggplot$name)) {\n stop('No \"Name\" column in the input data, check the arguments in getFreEnsem() or getHisEnsem(), if \n output = \"ggplot\" is assigned, more info please check ?getFreEnsem() or ?getHisEnsem().')\n }\n \n colors = c('brown1', 'dodgerblue3', rainbow(n = length(unique(data_ggplot$variable)) - 2,\n start = 0.1))\n \n theme_set(theme_bw())\n mainLayer <- with(data_ggplot, {\n ggplot(data = data_ggplot) +\n aes(x = Date, y = value, color = variable) +\n geom_line(size = 0.5) +\n geom_line(data = data_ggplot[data_ggplot$variable == 'Mean', ], size = 1.6) +\n geom_line(data = data_ggplot[data_ggplot$variable == 'Observation', ], size = 1.6) +\n geom_point(data = data_ggplot[data_ggplot$nav == 1, ], size = 2, shape = 4, color = 'black') +\n scale_color_manual(values = colors) +\n theme(axis.text.x = element_text(size = rel(1.8)),\n axis.text.y = element_text(size = rel(1.8)),\n axis.title.x = element_text(size = rel(1.8)),\n axis.title.y = element_text(size = rel(1.8))) +\n facet_wrap( ~ name, nrow = nrow) +\n labs(x = x, y = y, title = title)\n \n })\n if (legend == FALSE) {\n mainLayer <- mainLayer + \n theme(legend.position = 'none')\n# following ones are to add label, may be added in future.\n# geom_text(data = data_ggplot[data_ggplot$Date == '2003-12-10', ], aes(label = variable), hjust = 0.7, vjust = 1)\n# geom_text(data = data_ggplot[data_ggplot$variable == 'Mean', ], aes(label = variable), hjust = 0.7, vjust = 1)\n }\n \n \n print(mainLayer)\n \n if (output == TRUE) return(data_ggplot)\n \n}", - "created" : 1441271740923.000, - "dirty" : false, - "encoding" : "ASCII", - "folds" : "", - "hash" : "1081962038", - "id" : "51993C6D", - "lastKnownWriteTime" : 1441615395, - "path" : "~/hyfo/R/getEnsemble.R", - "project_path" : "R/getEnsemble.R", - "properties" : { - "tempName" : "Untitled1" - }, - "relative_order" : 5, - "source_on_save" : false, - "type" : "r_source" -} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/579C533F b/.Rproj.user/132DF987/sdb/per/t/579C533F deleted file mode 100644 index 5976ecf..0000000 --- a/.Rproj.user/132DF987/sdb/per/t/579C533F +++ /dev/null @@ -1,18 +0,0 @@ -{ - "contents" : "\n\n#' Biascorrect the input timeseries or hyfo dataset\n#' \n#' Biascorrect the input time series or dataset, the input time series or dataset should consist of observation, hindcast, and forecast.\n#' observation and hindcast should belong to the same period, in order to calibrate. Then the modified forecast\n#' will be returned. If the input is a time series, first column should be date column and rest columns should be \n#' the value column. If the input is a hyfo dataset, the dataset should be the result of \\code{loadNcdf}, or a list\n#' file with the same format.\n#' \n#' @param frc a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, \n#' representing the forecast to be calibrated.\n#' @param hindcast a hyfo grid data output or a dataframe(time series) consists of Date column and one or more value columns, \n#' representing the hindcast data. This data will be used in the calibration of the forecast, so it's better to have the same date period as\n#' observation data. Check details for more information.\n#' @param obs a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, \n#' representing the observation data.\n#' @param method bias correct method, including 'delta', 'scaling'...\n#' @param scaleType only when the method \"scaling\" is chosen, scaleType will be available. Two different types\n#' of scaling method, 'add' and 'mult', which means additive and multiplicative scaling method. More info check \n#' details.\n#' @param input If input is a time series, \\code{input = 'TS'} needs to be assigned, or hyfo will take it as \n#' an hyfo output grid file. Default is time series input, where in most of the cases we prefer. If your input\n#' is a hyfo output file, \\code{input = 'hyfo'}.\n#' @param preci If the precipitation is biascorrected, then you have to assign \\code{preci = TRUE}. Since for\n#' precipitation, some biascorrect methods may not apply to, or some methods are specially for precipitation. \n#' Default is FALSE.\n#' @details \n#' \n#' Since climate forecast is based on global condition, when downscaling to different regions, it may include\n#' some bias, biascorrection is used then to fix the bias.\n#' \n#' \\strong{Hindcast}\n#' \n#' In order to bias correct, we need to pick up some\n#' data from the forecast to train with the observation, which is called hindcast in this function. Hindcast\n#' should have \\strong{EVERY} attributes that forecast has.\n#' \n#' Hindcast is also called re-forecast, is the forecast of the past. E.g. you have a forecast from year 2000-2010, assuming now you are in 2005. So from 2000-2005, this period\n#' is the hindcast period, and 2005-2010, this period is the forecast period.\n#'\n#'\n#' \\strong{How it works}\n#' \n#' Forecast product has to be calibrated, usually the system is doing forecast in real time. So, e.g., if the \n#' forecast starts from year 2000, assuming you are in year 2003, then you will have 3 years' hindcast \n#' data (year 2000 - 2003), which can be used to calibrate. And your forecast period is (2003-2004)\n#' \n#' E.g. you have observation from 2001 - 2002, this is your input obs. Then you can take the same \n#' period (2001-2002) from the forecast, which is the hindcast period. For forecast, you can take any period.\n#' The program will evaluate the obs and hindcast, to get the modification of the forecast, and then add the \n#' modification to the forecast data.\n#' \n#' \\strong{method}\n#' \n#' Different methods used in the bias correction.\n#' \n#' \\strong{delta}\n#' \n#' This method consists on adding to the observations the mean change signal (delta method). \n#' This method is applicable to any kind of variable but it is preferable to avoid it for bounded variables\n#' (e.g. precipitation, wind speed, etc.) because values out of the variable range could be obtained \n#' (e.g. negative wind speeds...)\n#' \n#' \\strong{scaling}\n#' \n#' This method consists on scaling the simulation with the difference (additive) or quotient (multiplicative) \n#' between the observed and simulated means in the train period. The \\code{additive} or \\code{multiplicative}\n#' correction is defined by parameter \\code{scaling.type} (default is \\code{additive}).\n#' The additive version is preferably applicable to unbounded variables (e.g. temperature) \n#' and the multiplicative to variables with a lower bound (e.g. precipitation, because it also preserves the frequency). \n#' \n#' \n#' @examples \n#' \n#' # Use testdl as an example, we take frc, hindcast and obs fro testdl.\n#' data(testdl)\n#' \n#' # common period has to be extracted in order to better train the forecast.\n#' \n#' datalist <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1')\n#' \n#' frc <- datalist[[1]]\n#' hindcast <- datalist[[2]]\n#' obs <- datalist[[3]]\n#' \n#' # default method is delta\n#' frc_new <- biasCorrect(frc, hindcast, obs)\n#' \n#' # If the variable is precipitation, it cannot be negative value, so use multi scale method\n#' frc_new <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi')\n#' \n#' @references \n#' Bias correction methods come from \\code{biasCorrection} from \\code{dowscaleR}\n#' \n#' Santander Meteorology Group (2015). downscaleR: Climate data manipulation and statistical downscaling. R\n#' package version 0.6-0. https://github.com/SantanderMetGroup/downscaleR/wiki\n#' @export\n\nbiasCorrect <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', input = 'TS', preci = FALSE){\n \n if (input == 'TS') {\n # First check if the first column is Date\n if (!grepl('-|/', obs[1, 1]) | !grepl('-|/', hindcast[1, 1]) | !grepl('-|/', frc[1, 1])) {\n stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} \n and use as.Date to convert.If your input is a hyfo dataset, put input = \"hyfo\" as an\n argument, check help for more info.')\n } \n \n \n # change to date type is easier, but in case in future the flood part is added, Date type doesn't have\n # hour, min and sec, so, it's better to convert it into POSIxlt.\n \n # if condition only accepts one condition, for list comparison, there are a lot of conditions, better\n # further process it, like using any.\n if (any(as.POSIXlt(hindcast[, 1]) != as.POSIXlt(obs[, 1]))) {\n warning('time of obs and time of hindcast are not the same, which may cause inaccuracy in \n the calibration.')\n }\n \n if (ncol(frc) == 2) {\n frc_data <- biasCorrect_core(frc[, 2], hindcast[, 2], obs[, 2], method = method, \n scaleType = scaleType, preci = preci)\n } else if (ncol(frc) > 2) {\n # In this case more than one value columns exist in the dataset, both frc and hindcast.\n \n n <- ncol(frc)\n \n # For every column, it's biascorrected respectively.\n frc_data <- lapply(2:n, function(x) biasCorrect_core(frc[, x], hindcast[, x], obs[, 2], method = method,\n scaleType = scaleType, preci = preci))\n frc_data <- do.call('cbind', frc_data)\n \n } else stop('Wrong TS input, check your TS dimension.')\n \n \n } else if (input == 'hyfo') {\n print('Under development...')\n }\n\n names <- colnames(frc)\n frc <- data.frame(frc[, 1], frc_data)\n colnames(frc) <- names\n \n return(frc)\n}\n\n\n# this is only used to calculate the value column, \nbiasCorrect_core <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', preci = FALSE){\n \n\n # default is the simplest method in biascorrection, just do simple addition and subtraction.\n if (method == 'delta') {\n # comes from downscaleR biascorrection method\n frcMean <- mean(obs, na.rm = TRUE)\n hindcastMean <- mean(hindcast, na.rm = TRUE)\n frc <- obs - hindcastMean + frcMean\n \n } else if (method == 'scaling') {\n obsMean <- mean(obs, na.rm = TRUE)\n hindcastMean <- mean(hindcast, na.rm = TRUE)\n \n if (scaleType == 'multi') {\n frc <- frc / hindcastMean * obsMean\n \n } else if (scaleType == 'add') {\n frc <- frc - hindcastMean + obsMean\n }\n \n \n } else if (method == 'eqm') {\n \n # To be added, right now too complicated and not so much use.\n \n }\n \n \n return(frc)\n}", - "created" : 1440499519868.000, - "dirty" : false, - "encoding" : "ASCII", - "folds" : "", - "hash" : "2108272061", - "id" : "579C533F", - "lastKnownWriteTime" : 1441616505, - "path" : "~/hyfo/R/biasCorrect.R", - "project_path" : "R/biasCorrect.R", - "properties" : { - "tempName" : "Untitled1" - }, - "relative_order" : 1, - "source_on_save" : false, - "type" : "r_source" -} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/606BD0E b/.Rproj.user/132DF987/sdb/per/t/606BD0E deleted file mode 100644 index e6832fe..0000000 --- a/.Rproj.user/132DF987/sdb/per/t/606BD0E +++ /dev/null @@ -1,17 +0,0 @@ -{ - "contents" : "% Generated by roxygen2 (4.1.0.9001): do not edit by hand\n% Please edit documentation in R/biasCorrect.R\n\\name{biasCorrect}\n\\alias{biasCorrect}\n\\title{Biascorrect the input timeseries or hyfo dataset}\n\\usage{\nbiasCorrect(frc, hindcast, obs, method = \"delta\", scaleType = \"multi\",\n input = \"TS\", preci = FALSE)\n}\n\\arguments{\n\\item{frc}{a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns,\nrepresenting the forecast to be calibrated.}\n\n\\item{hindcast}{a hyfo grid data output or a dataframe(time series) consists of Date column and one or more value columns,\nrepresenting the hindcast data. This data will be used in the calibration of the forecast, so it's better to have the same date period as\nobservation data. Check details for more information.}\n\n\\item{obs}{a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns,\nrepresenting the observation data.}\n\n\\item{method}{bias correct method, including 'delta', 'scaling'...}\n\n\\item{scaleType}{only when the method \"scaling\" is chosen, scaleType will be available. Two different types\nof scaling method, 'add' and 'mult', which means additive and multiplicative scaling method. More info check\ndetails.}\n\n\\item{input}{If input is a time series, \\code{input = 'TS'} needs to be assigned, or hyfo will take it as\nan hyfo output grid file. Default is time series input, where in most of the cases we prefer. If your input\nis a hyfo output file, \\code{input = 'hyfo'}.}\n\n\\item{preci}{If the precipitation is biascorrected, then you have to assign \\code{preci = TRUE}. Since for\nprecipitation, some biascorrect methods may not apply to, or some methods are specially for precipitation.\nDefault is FALSE.}\n}\n\\description{\nBiascorrect the input time series or dataset, the input time series or dataset should consist of observation, hindcast, and forecast.\nobservation and hindcast should belong to the same period, in order to calibrate. Then the modified forecast\nwill be returned. If the input is a time series, first column should be date column and rest columns should be\nthe value column. If the input is a hyfo dataset, the dataset should be the result of \\code{loadNcdf}, or a list\nfile with the same format.\n}\n\\details{\nSince climate forecast is based on global condition, when downscaling to different regions, it may include\nsome bias, biascorrection is used then to fix the bias.\n\n\\strong{Hindcast}\n\nIn order to bias correct, we need to pick up some\ndata from the forecast to train with the observation, which is called hindcast in this function. Hindcast\nshould have \\strong{EVERY} attributes that forecast has.\n\nHindcast is also called re-forecast, is the forecast of the past. E.g. you have a forecast from year 2000-2010, assuming now you are in 2005. So from 2000-2005, this period\nis the hindcast period, and 2005-2010, this period is the forecast period.\n\n\n\\strong{How it works}\n\nForecast product has to be calibrated, usually the system is doing forecast in real time. So, e.g., if the\nforecast starts from year 2000, assuming you are in year 2003, then you will have 3 years' hindcast\ndata (year 2000 - 2003), which can be used to calibrate. And your forecast period is (2003-2004)\n\nE.g. you have observation from 2001 - 2002, this is your input obs. Then you can take the same\nperiod (2001-2002) from the forecast, which is the hindcast period. For forecast, you can take any period.\nThe program will evaluate the obs and hindcast, to get the modification of the forecast, and then add the\nmodification to the forecast data.\n\n\\strong{method}\n\nDifferent methods used in the bias correction.\n\n\\strong{delta}\n\nThis method consists on adding to the observations the mean change signal (delta method).\nThis method is applicable to any kind of variable but it is preferable to avoid it for bounded variables\n (e.g. precipitation, wind speed, etc.) because values out of the variable range could be obtained\n (e.g. negative wind speeds...)\n\n \\strong{scaling}\n\nThis method consists on scaling the simulation with the difference (additive) or quotient (multiplicative)\nbetween the observed and simulated means in the train period. The \\code{additive} or \\code{multiplicative}\ncorrection is defined by parameter \\code{scaling.type} (default is \\code{additive}).\nThe additive version is preferably applicable to unbounded variables (e.g. temperature)\nand the multiplicative to variables with a lower bound (e.g. precipitation, because it also preserves the frequency).\n}\n\\examples{\n# Use testdl as an example, we take frc, hindcast and obs fro testdl.\ndata(testdl)\n\n# common period has to be extracted in order to better train the forecast.\n\ndatalist <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1')\n\nfrc <- datalist[[1]]\nhindcast <- datalist[[2]]\nobs <- datalist[[3]]\n\n# default method is delta\nfrc_new <- biasCorrect(frc, hindcast, obs)\n\n# If the variable is precipitation, it cannot be negative value, so use multi scale method\nfrc_new <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi')\n}\n\\references{\nBias correction methods come from \\code{biasCorrection} from \\code{dowscaleR}\n\nSantander Meteorology Group (2015). downscaleR: Climate data manipulation and statistical downscaling. R\npackage version 0.6-0. https://github.com/SantanderMetGroup/downscaleR/wiki\n}\n\n", - "created" : 1441615447496.000, - "dirty" : false, - "encoding" : "ASCII", - "folds" : "", - "hash" : "1599471990", - "id" : "606BD0E", - "lastKnownWriteTime" : 1441616512, - "path" : "~/hyfo/man/biasCorrect.Rd", - "project_path" : "man/biasCorrect.Rd", - "properties" : { - }, - "relative_order" : 6, - "source_on_save" : false, - "type" : "r_doc" -} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/9038F880 b/.Rproj.user/132DF987/sdb/per/t/9038F880 deleted file mode 100644 index 50846d1..0000000 --- a/.Rproj.user/132DF987/sdb/per/t/9038F880 +++ /dev/null @@ -1,17 +0,0 @@ -{ - "contents" : "#' Get spatial map of the input dataset.\n#' \n#' @param dataset A list containing different information, should be the result of reading netcdf file using\n#' \\code{library(ecomsUDG.Raccess)}.\n#' @param method A string showing different calculating method for the map. More information please refer to\n#' details.\n#' @param member A number showing which member is selected to get, if the dataset has a \"member\" dimension. Default\n#' is NULL, if no member assigned, and there is a \"member\" in dimensions, the mean value of the members will be\n#' taken.\n#' @param ... Check \\code{?getSpatialMap_mat} for details, e.g., x, y, title, catchment, \n#' points, output,\n#' @return A matrix representing the raster map is returned, and the map is plotted.\n#' @details\n#' There are following methods to be selected, \n#' \"meanAnnual\": annual rainfall of each year is plotted. \n#' \"winter\", \"spring\", \"autumn\", \"summer\": MEAN seasonal rainfall of each year is plotted.\n#' Month(number 1 to 12): MEAN month rainfall of each year is plotted, e.g. MEAN march rainfall of each year.\n#' \"mean\", \"max\", \"min\": mean daily, maximum daily, minimum daily precipitation.\n#' @examples\n#' \n#' #gridData provided in the package is the result of \\code {loadGridData{ecomsUDG.Raccess}}\n#' data(tgridData)\n#' getSpatialMap(tgridData, method = 'meanAnnual')\n#' getSpatialMap(tgridData, method = 'winter')\n#' \n#' \n#' getSpatialMap(tgridData, method = 'winter', catchment = testCat)\n#' \n#' file <- system.file(\"extdata\", \"points.txt\", package = \"hyfo\")\n#' points <- read.table(file, header = TRUE, sep = ',' )\n#' getSpatialMap(tgridData, method = 'winter', catchment = testCat, points = points)\n#' \n#' @export\ngetSpatialMap <- function(dataset, method = NULL, member = 'mean', ...) {\n\n #check input dataset\n checkWord <- c('Data', 'xyCoords', 'Dates')\n if (any(is.na(match(checkWord, attributes(dataset)$names)))) {\n stop('Input dataset is incorrect, it should contain \"Data\", \"xyCoords\", and \"Dates\", \n check help for details.')\n }\n \n \n #range of the dataset just loaded \n lon <- dataset$xyCoords$x\n lat <- dataset$xyCoords$y\n startTime <- as.POSIXlt(dataset$Dates$start, tz = 'GMT')\n yearIndex <- startTime$year + 1900\n monthIndex <-startTime$mon + 1\n data <- dataset$Data\n \n # Dimension needs to be arranged. Make sure first and second dimension is lat and lon.\n att <- attributes(data)$dimensions\n dimIndex <- seq(1, length(att))\n dimIndex1 <- match(c('lon', 'lat', 'time'), att)# match can apply to simple cases\n dimIndex2 <- dimIndex[-dimIndex1]# choose nomatch\n \n \n data <- aperm(data, c(dimIndex1, dimIndex2))\n attributes(data)$dimensions <- att[c(dimIndex1, dimIndex2)]\n \n # Because in the following part, only 3 dimensions are allowed, so data has to be processed.\n if (member == 'mean' & any(attributes(data)$dimensions == 'member')) {\n dimIndex3 <- which(attributes(data)$dimensions != 'member')\n data <- apply(data, MARGIN = dimIndex3, FUN = mean, na.rm = TRUE)\n message('Mean value of the members are returned.')\n \n } else if (member != 'mean' & any(attributes(data)$dimensions == 'member')) {\n dimIndex3 <- which(attributes(data)$dimensions == 'member')\n data <- chooseDim(data, dimIndex3, member, drop = TRUE)\n \n } else if (member != 'mean' & !any(attributes(data)$dimensions == 'member')){\n stop('There is no member part in the dataset, but you choose one, check the input\n dataset or change your arguments.')\n }\n \n \n \n \n if (is.null(method)) {\n \n warning('You should shoose a method, unless input is a matrix directly to be plotted.')\n #in case the dataset is ready to plot and no need to calculate\n \n } else if (method == 'meanAnnual') { \n #mean value of the annual precipitation over the period of the data \n #time <- proc.time()\n if (length(unique(monthIndex)) < 12) {\n warning ('There are less than 12 months in a year, the results may be inaccurate.')\n }\n data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, method = 'annual')\n #newTime <- proc.time() - time\n title_d <- 'Mean Annual Precipitation (mm / year)'\n \n } else if (method == 'winter') {\n #mean value of the seasonal precipitation, in this case, winter \n #time <- proc.time()\n wm <- match(c(12, 1, 2), unique(monthIndex))\n if (length(which(!is.na(wm))) < 3) {\n stop ('Winter has less than 3 months, check data and try to calculate every month\n seperately or choose another season.')\n }\n data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, monthIndex = monthIndex, \n method = 'winter')\n #newTime <- proc.time() - time\n title_d <- 'Mean Winter Precipitation (mm / winter)'\n \n } else if (method == 'spring') {\n wm <- match(c(3, 4, 5), unique(monthIndex))\n if (length(which(!is.na(wm))) < 3) {\n stop ('Spring has less than 3 months, check data and try to calculate every month\n seperately or choose another season.')\n }\n \n data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, monthIndex = monthIndex, \n method = 'spring') \n title_d <- 'Mean Spring Precipitation (mm / spring)'\n \n } else if (method == 'summer') {\n wm <- match(c(6, 7, 8), unique(monthIndex))\n if (length(which(!is.na(wm))) < 3) {\n stop ('Summer has less than 3 months, check data and try to calculate every month\n seperately or choose another season.')\n }\n \n data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, monthIndex = monthIndex, \n method = 'summer') \n title_d <- 'Mean Summer Precipitation (mm / summer)'\n \n } else if (method == 'autumn') {\n \n wm <- match(c(9, 10, 11), unique(monthIndex))\n if (length(which(!is.na(wm))) < 3) {\n stop ('Autumn has less than 3 months, check data and try to calculate every month\n seperately or choose another season.')\n }\n \n data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, monthIndex = monthIndex, \n method = 'autumn') \n title_d <- 'Mean Autumn Precipitation (mm / autumn)'\n \n } else if (method == 'mean') {\n \n #sum value of the dataset, this procedure is to get the mean value\n data_new <- apply(data, MARGIN = c(2, 1), FUN = mean, na.rm = TRUE)\n title_d <- 'Mean Daily Precipitation (mm / day)'\n \n } else if (method == 'max') {\n \n data_new <- apply(data, MARGIN = c(2, 1), FUN = suppressWarnings(max), na.rm = TRUE)\n title_d <- 'Max Daily Precipitation (mm / day)'\n \n } else if (method == 'min') {\n \n data_new <- apply(data, MARGIN = c(2, 1), FUN = suppressWarnings(min), na.rm = TRUE)\n title_d <- 'Min Daily Precipitation (mm / day)'\n \n } else if (is.numeric(method)) {\n \n data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, monthIndex = monthIndex, \n method = method) \n title_d <- paste(month.abb[method], 'Precipitation (mm / month)', sep = ' ')\n \n } else {\n wrongMethod <- method\n stop(paste('no method called', wrongMethod))\n }\n # This is to give attributes to the matrix and better be melted in ggplot.\n colnames(data_new) <- round(lon, 2)\n rownames(data_new) <- round(lat, 2)\n \n # If ... also has a title argument, this will cause conflicts. so title has to be renamed as title_d\n # This has to be paid a lot of attention when use ... to pass arguments.\n output <- getSpatialMap_mat(matrix = data_new, title_d = title_d, ...)\n return(output)\n}\n\n\n\n\n\n#' rePlot raster matrix\n#' \n#' replot the matrix output from \\code{getSpatialMap}, when \\code{output = 'data'} or output is default\n#' value.\n#' \n#' @param matrix A matrix raster, should be the result of \\code{getSpatialMap()}, output should be default\n#' or 'data'\n#' @param title_d A string showing the title of the plot, defaut is NULL.\n#' @param catchment A catchment file geting from \\code{shp2cat()} in the package, if a catchment is available for background.\n#' @param points A dataframe, showing other information, e.g., location of the gauging stations. The \n#' the data.frame should be with columes \"name, lon, lat, z, value\".\n#' @param output A string showing the type of the output, if \\code{output = 'ggplot'}, the returned \n#' data can be used in ggplot and \\code{getSpatialMap_comb()}; if \\code{output = 'plot'}, the returned data is the plot containing all \n#' layers' information, and can be plot directly or used in grid.arrange; if not set, the raster matrix data\n#' will be returned.\n#' @param name If \\code{output = 'ggplot'}, name has to be assigned to your output, in order to differentiate\n#' different outputs in the later multiplot using \\code{getSpatialMap_comb}.\n#' @param info A boolean showing whether the information of the map, e.g., max, mean ..., default is FALSE.\n#' @param scale A string showing the plot scale, 'identity' or 'sqrt'.\n#' @param colors Most of time you don't have to set this, but if you are not satisfied with the \n#' default color, you can set your own palette here. e.g., \\code{colors = c('red', 'blue')}, then\n#' the value from lowest to highest, will have the color from red to blue. More info about colors,\n#' please check ?palette().\n#' @param ... \\code{title, x, y} showing the title and x and y axis of the plot. e.g. \\code{title = 'aaa'}\n#'default is about precipitation.\n#' @return A matrix representing the raster map is returned, and the map is plotted.\n#' @examples\n#' data(tgridData)# the result of \\code{loadNcdf}\n#' #the output type of has to be default or 'data'.\n#' a1 <- getSpatialMap(tgridData, method = 'mean')\n#' a2 <- getSpatialMap(tgridData, method = 'max')\n#' a3 <- getSpatialMap(tgridData, method = 'winter')\n#' a4 <- getSpatialMap(tgridData, method = 'summer')\n#' #For example, if we want to investigate the difference between mean value and max.\n#' \n#' a5 <- a2 - a1\n#' getSpatialMap_mat(a4)\n#' \n#' #Or to investigate the difference between winter value and summer value.\n#' a6 <- a3 - a4\n#' getSpatialMap_mat(a6)\n#' \n#' @export\n#' @import ggplot2 plyr maps maptools rgeos\n#' @importFrom stats median\n#' @importFrom reshape2 melt\n#' @references \n#' R Core Team (2015). R: A language and environment for statistical computing. R Foundation for\n#' Statistical Computing, Vienna, Austria. URL http://www.R-project.org/.\n#' \n#' Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software,\n#' 21(12), 1-20. URL http://www.jstatsoft.org/v21/i12/.\n#' \n#' Hadley Wickham (2011). The Split-Apply-Combine Strategy for Data Analysis. Journal of Statistical\n#' Software, 40(1), 1-29. URL http://www.jstatsoft.org/v40/i01/.\n#' \n#' Original S code by Richard A. Becker and Allan R. Wilks. R version by Ray Brownrigg. Enhancements\n#' by Thomas P Minka (2015). maps: Draw Geographical Maps. R package version\n#' 2.3-11. http://CRAN.R-project.org/package=maps\n#' \n#' Roger Bivand and Nicholas Lewin-Koh (2015). maptools: Tools for Reading and Handling Spatial\n#' Objects. R package version 0.8-36. http://CRAN.R-project.org/package=maptools\n#' \n#' Roger Bivand and Colin Rundel (2015). rgeos: Interface to Geometry Engine - Open Source (GEOS). R\n#' package version 0.3-11. http://CRAN.R-project.org/package=rgeos\n#' \n#' \n#' \ngetSpatialMap_mat <- function(matrix, title_d = NULL, catchment = NULL, points = NULL, output = 'data', \n name = NULL, info = FALSE, scale = 'identity', colors = NULL, ...) {\n #check input\n checkWord <- c('lon', 'lat', 'z', 'value')\n if (is.null(attributes(matrix)$dimnames)) {\n stop('Input matrix is incorrect, check help to know how to get the matrix.')\n } else if (!is.null(catchment) & class(catchment) != \"SpatialPolygonsDataFrame\") {\n stop('Catchment format is incorrect, check help to get more details. ')\n } else if (!is.null(points) & any(is.na(match(checkWord, attributes(points)$names)))) {\n stop('Points should be a dataframe with colnames \"lon, lat, z, value\".')\n }\n \n #ggplot\n #for the aes option in ggplot, it's independent from any other command through all ggplot, and aes() function\n #get data from the main dataset, in this case, data_ggplot. for other functions in ggplot, if it wants to use \n #data from the main dataset as parameters, it has to use aes() function. if not, it has to use data available \n #in the environment.\n #in other words, all the parameters in aes(), they have to come from the main dataset. Otherwise, just put them\n #outside aes() as normal parameters.\n \n if (info == TRUE) { \n plotMax <- round(max(matrix, na.rm = TRUE), 2)\n plotMin <- round(min(matrix, na.rm = TRUE), 2)\n plotMean <- round(mean(matrix, na.rm = TRUE), 2)\n plotMedian <- round(median(matrix, na.rm = TRUE), 2)\n word <- paste('\\n\\n', paste('Max', '=', plotMax), ',', paste('Min', '=', plotMin), ',',\n paste('Mean', '=', plotMean), ',', paste('Median', '=', plotMedian))\n } else {\n word <- NULL\n }\n \n x_word <- paste('Longitude', word)\n world_map <- map_data('world')\n \n # For some cases, matrix has to be reshaped, because it's too fat or too slim, to make\n # it shown on the map, the ratio is x : y is 4 : 3.\n matrix <- reshapeMatrix(matrix)\n \n \n # cannot remove NA, or the matrix shape will be changed.\n data_ggplot <- melt(matrix, na.rm = FALSE) \n \n colnames(data_ggplot) <- c('lat', 'lon', 'value')\n theme_set(theme_bw())\n \n if (is.null(colors)) colors <- c('yellow', 'orange', 'red')\n # if (is.null(colors)) colors <- rev(rainbow(n = 20, end = 0.7))\n \n mainLayer <- with(data_ggplot, {\n \n ggplot(data = data_ggplot) +\n geom_tile(aes(x = lon, y = lat, fill = value)) +\n #scale_fill_discrete()+\n scale_fill_gradientn(colours = colors, na.value = 'transparent') +#usually scale = 'sqrt'\n #guide = guide_colorbar, colorbar and legend are not the same.\n guides(fill = guide_colourbar(title='Rainfall (mm)', barheight = rel(9), trans = scale)) +#usually scale = 'sqrt'\n geom_map(data = world_map, map = world_map, aes(map_id = region), fill = 'transparent', \n color='black') +\n # guides(fill = guide_colorbar(title='Rainfall (mm)', barheight = 15))+\n xlab(x_word) +\n ylab('Latitude') +\n ggtitle(title_d) +\n labs(empty = NULL, ...) +#in order to pass \"...\", arguments shouldn't be empty.\n theme(plot.title = element_text(size = rel(1.8), face = 'bold'),\n axis.title.x = element_text(size = rel(1.7)),\n axis.title.y = element_text(size = rel(1.7)),\n axis.text.x = element_text(size = rel(1.9)),\n axis.text.y = element_text(size = rel(1.9)),\n legend.text = element_text(size = rel(1.3)),\n legend.title = element_text(size = rel(1.3)))\n# coord_fixed(ratio = 1, xlim = xlim, ylim = ylim)\n \n# geom_rect(xmin=min(lon)+0.72*(max(lon)-min(lon)),\n# xmax=min(lon)+0.99*(max(lon)-min(lon)),\n# ymin=min(lat)+0.02*(max(lat)-min(lat)),\n# ymax=min(lat)+0.28*(max(lat)-min(lat)),\n# fill='white',colour='black')+\n# annotate('text', x = min(lon), y = min(lat), label=word, hjust = 0, vjust = -1)\n \n })\n \n printLayer <- mainLayer\n \n #catchment conversion\n if (is.null(catchment) == FALSE) {\n a <- catchment\n a@data$id <- rownames(a@data)\n b <- fortify(a, region = 'id')\n c <- join(b, a@data, by = 'id')\n catchmentLayer <- with(c, {\n geom_polygon(data = c, aes(long, lat, group = group), color = 'black', \n fill = 'transparent')\n })\n \n \n printLayer <- printLayer + catchmentLayer\n }\n #plot points\n if (is.null(points) == FALSE) {\n pointLayer <- with(points, {\n geom_point(data = points, aes(x = lon, y = lat, size = value, colour = z),\n guide = guide_legend(barheight = rel(3)))\n \n \n })\n \n printLayer <- printLayer + pointLayer\n }\n \n print(printLayer)\n \n if (output == 'ggplot') {\n if (is.null(name)) stop('\"name\" argument not found, \n If you choose \"ggplot\" as output, please assign a name.')\n data_ggplot$Name <- rep(name, dim(data_ggplot)[1])\n return (data_ggplot)\n } else if (output == 'plot') {\n return(printLayer)\n } else {\n return(matrix)\n }\n}\n\n\n#' Combine maps together\n#' @param ... different maps generated by \\code{getSpatialMap(, output = 'ggplot')}, see details.\n#' @param nrow A number showing the number of rows.\n#' @param list If input is a list containing different ggplot data, use \\code{list = inputlist}.\n#' @param x A string of x axis name.\n#' @param y A string of y axis name.\n#' @param title A string of the title.\n#' @param output A boolean, if chosen TRUE, the output will be given.\n#' @return A combined map.\n#' @examples\n#' data(tgridData)# the result of \\code{loadGridData{ecomsUDG.Raccess}}\n#' #The output should be 'ggplot'\n#' a1 <- getSpatialMap(tgridData, method = 'summer', output = 'ggplot', name = 'a1')\n#' a2 <- getSpatialMap(tgridData, method = 'winter', output = 'ggplot', name = 'a2')\n#'# a3 <- getSpatialMap(tgridData, method = 'mean', output = 'ggplot', name = 'a3')\n#'# a4 <- getSpatialMap(tgridData, method = 'max', output = 'ggplot', name = 'a4')\n#' getSpatialMap_comb(a1, a2)\n#' getSpatialMap_comb(a1, a2, nrow = 2)\n#' \n#' @details\n#' For \\code{getSpatialMap_comb}, the maps to be compared should be with same size and resolution, \n#' in other words, they should be fully overlapped by each other.\n#' \n#' If they have different resolutions, use \\code{interpGridData{ecomsUDG.Raccess}} to interpolate.\n#' \n#' @export\n#' @import ggplot2 maps\n#' @references \n#' H. Wickham. ggplot2: elegant graphics for data analysis. Springer New York, 2009.\ngetSpatialMap_comb <- function(..., list = NULL, nrow = 1, x = '', y = '', title = '', \n output = FALSE) {\n \n \n if (!is.null(list)) {\n data_ggplot <- do.call('rbind', list)\n } else {\n maps <- list(...)\n checkBind(maps, 'rbind')\n data_ggplot <- do.call('rbind', maps)\n }\n \n if (!class(data_ggplot) == 'data.frame') {\n warning('Your input is probably a list, but you forget to add \"list = \" before it.\n Try again, or check help for more information.')\n } else if (is.null(data_ggplot$Name)) {\n stop('No \"Name\" column in the input data, check the arguments in getSpatialMap(), if \n output = \"ggplot\" is assigned, more info please check ?getSpatialMap().')\n }\n \n data_ggplot$Name <- factor(data_ggplot$Name, levels = data_ggplot$Name, ordered = TRUE)\n \n# lim <- getLim(data_ggplot$lon, data_ggplot$lat)\n# xlim <- lim[[1]] \n# ylim <- lim[[2]]\n \n world_map <- map_data('world')\n theme_set(theme_bw())\n mainLayer <- with(data_ggplot, {\n ggplot(data = data_ggplot) + \n geom_tile(aes(x = lon, y = lat, fill = value)) +\n #scale_fill_gradient(high = 'red', low = 'yellow')+\n scale_fill_gradientn(colours = c('yellow', 'orange', 'red'), na.value = 'transparent') +#usually scale = 'sqrt'\n geom_map(data = world_map, map = world_map, aes(map_id = region), fill = 'transparent', color = 'black') +\n# guides(fill = guide_colourbar(title='Rainfall (mm)', barheight = rel(9), trans = scale)) +#\n facet_wrap(~ Name, nrow = nrow) +\n theme(plot.title = element_text(size = rel(1.8), face = 'bold'),\n axis.title.x = element_text(size = rel(1.7)),\n axis.title.y = element_text(size = rel(1.7)),\n axis.text.x = element_text(size = rel(1.9)),\n axis.text.y = element_text(size = rel(1.9)),\n legend.text = element_text(size = rel(1.3)),\n legend.title = element_text(size = rel(1.3))) +\n # no solultion for some very fat or very slim, in facet ggplot2, so, it's not buitiful.\n #coord_equal() +\n labs(x = x, y = y, title = title)\n })\n \n \n suppressWarnings(print(mainLayer))\n \n if (output == TRUE) return(data_ggplot)\n}\n\n\n\n\nchooseDim <- function(array, dim, value, drop = FALSE) { \n # Create list representing arguments supplied to [\n # bquote() creates an object corresponding to a missing argument\n dimnames <- attributes(array)$dimensions\n \n indices <- rep(list(bquote()), length(dim(array)))\n indices[[dim]] <- value\n \n if (dim(array)[dim] < max(value)) {\n stop('Chosen member exceeds the member range of the dataset.')\n }\n \n # Generate the call to [\n call <- as.call(c(\n list(as.name(\"[\"), quote(array)),\n indices,\n list(drop = drop)))\n # Print it, just to make it easier to see what's going on\n # Print(call)\n\n # Finally, evaluate it\n output <- eval(call)\n \n if (length(dim(output)) == length(dimnames)) {\n attributes(output)$dimensions <- dimnames\n } else if (length(dim(output)) < length(dimnames)){\n \n # In this case, one dimension is dropped, if value is a number \n # and drop == T, this situation can appear. So the dropped dimemsion\n # should be the chosen dimension.\n i <- 1:length(dimnames)\n # get rid of the dropped dimensin\n i <- i[-dim]\n attributes(output)$dimensions <- dimnames[i]\n }\n \n return(output)\n}\n\n\nreshapeMatrix <- function(matrix) {\n # This is for the map plot to keep the ratio x : y == 4:3\n # mainly used in map plot in ggplot2.\n \n \n # So the input matrix should be reshaped, add in some NA values, \n # in order to be shown appropriately in ggplot.\n \n lon <- as.numeric(colnames(matrix))\n lat <- as.numeric(rownames(matrix))\n \n dx <- mean(diff(lon))\n dy <- mean(diff(lat))\n \n lx <- max(lon) - min(lon)\n ly <- max(lat) - min(lat)\n \n \n if (0.75 * lx < ly) {\n # In this case, x needs to be made longer\n \n xhalf <- 0.67 * ly\n xadd <- xhalf - lx / 2\n # calculate how many columns needs to be added.\n nxadd <- abs(round(xadd / dx))\n \n madd1 <- matrix(data = NA, nrow = length(lat), ncol = nxadd)\n madd2 <- madd1\n colnames(madd1) <- seq(to = min(lon) - dx, length = nxadd, by = dx)\n colnames(madd2) <- seq(from = max(lon) + dx, length = nxadd, by = dx)\n \n matrix_new <- cbind(madd1, matrix, madd2) \n \n \n } else if (0.75 * lx > ly) {\n \n yhalf <- 0.38 * lx\n yadd <- yhalf - ly / 2\n nyadd <- abs(round(yadd / dy))\n \n madd1 <- matrix(data = NA, nrow = nyadd, ncol = length(lon))\n madd2 <- madd1 \n \n rownames(madd1) <- seq(to = max(lat) + dy, length = nyadd, by = -dy)\n rownames(madd2) <- seq(from = min(lat) - dx, length = nyadd, by = -dy)\n \n matrix_new <- rbind(madd1, matrix, madd2)\n \n } else {\n matrix_new <- matrix\n }\n \n return(matrix_new)\n}\n", - "created" : 1441195980783.000, - "dirty" : false, - "encoding" : "ASCII", - "folds" : "", - "hash" : "3592855216", - "id" : "9038F880", - "lastKnownWriteTime" : 1441196695, - "path" : "~/hyfo/R/getSpatialMap.R", - "project_path" : "R/getSpatialMap.R", - "properties" : { - }, - "relative_order" : 3, - "source_on_save" : false, - "type" : "r_source" -} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/E7F036D3 b/.Rproj.user/132DF987/sdb/per/t/A902E596 similarity index 85% rename from .Rproj.user/132DF987/sdb/per/t/E7F036D3 rename to .Rproj.user/132DF987/sdb/per/t/A902E596 index 18a6381..a666b34 100644 --- a/.Rproj.user/132DF987/sdb/per/t/E7F036D3 +++ b/.Rproj.user/132DF987/sdb/per/t/A902E596 @@ -1,17 +1,17 @@ { - "contents" : "Package: hyfo\nType: Package\nTitle: Hydrology and Climate Forecasting R Package for Data Analysis and Visualization\nVersion: 1.1.7\nDate: 2015-07-02\nAuthors@R: person(\"Yuanchao\", \"Xu\", email = \"xuyuanchao37@gmail.com\",\n role = c(\"aut\", \"cre\"))\nDescription: This package can be used as a tool for hydrology and climate forecasting. \n There are several tools including data processing, data visualization and data analysis. \n For hydrological and hydraulic modellers, hyfo can be a good pre-processing and post-processing \n tool for you.\n hyfo has been tested stable on windows platform.\nLicense: GPL-2\nDepends: R (>= 3.1.0), stats (>= 3.1.3), utils(>= 3.1.3)\nImports: ggplot2 (>= 1.0.1),\n reshape2 (>= 1.4.1),\n zoo (>= 1.7-12),\n rgdal (>= 0.9-3),\n plyr (>= 1.8.3),\n moments (>= 0.14),\n lmom (>= 2.5),\n maps(>= 2.3-9),\n maptools (>= 0.8-36),\n rgeos (>= 0.3-8),\n ncdf (>= 1.6.8)\nSuggests: gridExtra,\n knitr\nVignetteBuilder: knitr\nLazyData: true\nURL: http://yuanchao-xu.github.io/hyfo/\nBugReports: https://github.com/Yuanchao-Xu/hyfo/issues\nrepository: github", - "created" : 1441616319813.000, + "contents" : "Package: hyfo\nType: Package\nTitle: Hydrology and Climate Forecasting R Package for Data Analysis and Visualization\nVersion: 1.2.0\nDate: 2015-07-02\nAuthors@R: person(\"Yuanchao\", \"Xu\", email = \"xuyuanchao37@gmail.com\",\n role = c(\"aut\", \"cre\"))\nDescription: This package can be used as a tool for hydrology and climate forecasting. \n There are several tools including data processing, data visualization and data analysis. \n For hydrological and hydraulic modellers, hyfo can be a good pre-processing and post-processing \n tool for you.\n hyfo has been tested stable on windows platform.\nLicense: GPL-2\nDepends: R (>= 3.1.0), stats (>= 3.1.3), utils(>= 3.1.3)\nImports: ggplot2 (>= 1.0.1),\n reshape2 (>= 1.4.1),\n zoo (>= 1.7-12),\n rgdal (>= 0.9-3),\n plyr (>= 1.8.3),\n moments (>= 0.14),\n lmom (>= 2.5),\n maps(>= 2.3-9),\n maptools (>= 0.8-36),\n rgeos (>= 0.3-8),\n ncdf (>= 1.6.8)\nSuggests: gridExtra,\n knitr\nVignetteBuilder: knitr\nLazyData: true\nURL: http://yuanchao-xu.github.io/hyfo/\nBugReports: https://github.com/Yuanchao-Xu/hyfo/issues\nrepository: github", + "created" : 1441721267677.000, "dirty" : false, "encoding" : "ASCII", "folds" : "", - "hash" : "712805123", - "id" : "E7F036D3", - "lastKnownWriteTime" : 1441616324, + "hash" : "3829689401", + "id" : "A902E596", + "lastKnownWriteTime" : 1441825605, "path" : "~/hyfo/DESCRIPTION", "project_path" : "DESCRIPTION", "properties" : { }, - "relative_order" : 7, + "relative_order" : 3, "source_on_save" : false, "type" : "dcf" } \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/A9B56DE9 b/.Rproj.user/132DF987/sdb/per/t/A9B56DE9 index 0fc932a..6ec33d0 100644 --- a/.Rproj.user/132DF987/sdb/per/t/A9B56DE9 +++ b/.Rproj.user/132DF987/sdb/per/t/A9B56DE9 @@ -1,12 +1,12 @@ { - "contents" : "---\ntitle: '[hyfo Easy Start](http://yuanchao-xu.github.io/hyfo/)'\nauthor: \"Yuanchao Xu\"\ndate: '`r Sys.Date()`'\noutput:\n pdf_document:\n toc: yes\n toc_depth: 3\n html_vignette:\n toc: yes\n toc_depth: 3\n html_document:\n toc: yes\nvignette: |\n %\\VignetteIndexEntry{hyfo easy start} \n %\\VignetteEngine{knitr::rmarkdown} \n %\\VignetteEncoding{ASCII}\n---\n\n# Introduction\n\n**Official Website is [here](http://yuanchao-xu.github.io/hyfo/)**\n\nhyfo is designed for hydrology and climate forecasting anaylasis, containing a number of tools including data extration, data processing and data visulization.\n\n**If you feel hyfo is of a little help, please cite it as following:**\n\nXu, Yuanchao(2015). hyfo: Hydrology and Climate Forecasting R Package for Data Analysis and Visualization. Retrieved from http://yuanchao-xu.github.io/hyfo/\n\n#### TIPS\n* For the hydrology tools part, the minimum time unit is a day, i.e., it mainly focuses on water resource and some long term analysis. For flood analysis part, it will be added in future.\n\n\n* One important characteristic by which hyfo can be distinguished from others is its convenience in multiple plots and series plots. Most data visualization tool in hyfo provides the output that can be directly re-plot by `ggplot2`, if `output = 'ggplot'` is assigned in the argument of the function, which will be easier for the users to generated series/multiple plots afterwards. When `output = 'ggplot'` is selected, you also have to assigne a `name = 'yourname'` in the argument, for the convenience of generating multiplots in future. All the functions ending with `_comb` can generated series/multiple plots, details can be found in the user mannual. \n\n\n* For the forecasting tools part, `hyfo` mainly focuses on the post processing of the gridData derived from forecasts or other sources. The input is a list file, usually an NetCDF file. There are `getNcdfVar()`, `loadNcdf()` and `writeNcdf()` prepared in hyfo, for you to deal with NetCDF file. \n\n* If you don't like the tile, x axis, y axis of the plot, just set them as '', e.g. `title = ''`\n\n* For R beginners, R provides different functions to write to file. `write.table` is a popular choice, and after write the results to a file, you can directly copy paste to your model or to other uses.\n\n* The functions end with `_anarbe` are the functions designed specially for some case in Spain, those functions mostly are about data collection of the anarbe catchment, which will be introduced in the end of this mannual.\n\n##### Installation\n* You can go [here](http://yuanchao-xu.github.io/hyfo/) to download installation file, and use IDE like Rstudio to install from file, both `tar.gz` and `zip` formats are provided.\n\n* Also you can use the following code to install the latest version.\n```{r, eval=FALSE}\n install.packages('devtools')\n # Ignore the warning that Rtool is not installed, unless you want other function from devtools.\n # If you have \"devtools\" installed already, you just need to run the following code.\n devtools::install_github('Yuanchao-Xu/hyfo')\n```\n\n\n\n\n\n\n# 1. Hydrology\n\n##### Note\n\nIf you are an experienced R user, and know how to read data in R, deal with dataframe, generate date and list, please start from next charpter, \"1.2 Rainfall Analysis\"\n\n\n\n\n\n\n\n## 1.1 Start from Raw Data\n\n\n\n\n\n\n### 1.1.1 From File\n\n`hyfo` does provide a common tool for collecting data from different type of files, including \"txt\", \n\n\"csv\" and \"excel\", which has to be assigned to the argument `fileType`.\n\nNow let's use internal data as an example.\n\n```{r, fig.show = 'hold', fig.height = 4, fig.width = 5}\nlibrary(hyfo)#load the package.\n# get the folder containing different csv (or other type) files.\nfile <- system.file(\"extdata\", \"1999.csv\", package = \"hyfo\")\nfolder <- strsplit(file, '1999')[[1]][1]\n\n# Extract and combine content from different files and in each file, the extracted zone is \n# from row 10 to row 20, Column 1 to column2.\na <- collectData(folder, fileType = 'csv', range = c(10, 20, 1, 2))\nstr(a)\n```\n\n`a` cannot be directly inputed in `hyfo`, it still needs some process.\n```{r}\n# Check the date to see if it follows the format in ?as.Date(), if not, \n# use as.Date to convert. \na <- data.frame(a)\n#get date\ndate <- a[, 1]\n\n# The original format is d/m/year, convert to formal format.\ndate <- as.Date(date, format = '%d/%m/%Y')\na[, 1] <- date\n\n# Now a has become `a` time series dataframe, which is the atom element of the analysis. \n#`hyfo` deals with list containing different time series dataframe. In this example, \n#there is only one dataframe, and more examples please refer to the following chapter.\ndatalist <- list(a)\n\n# Use getAnnual as an example, here since `a` is not a complete time series, \n# the result is only base on the input.\n# getAnnual gives the annual precipitation of each year, \n# and will be introduced in the next chapter.\ngetAnnual(datalist)\n```\n\n\n\n\n\n\n\n\n\n### 1.1.2 Mannually\n\nFollowing example shows a simple way to generate dataframe with start date, end date, and the value. Here in the example, `sample()` is used to generate random values, while in real case it will be a vector containing time series values.\n```{r, fig.show = 'hold', fig.height = 4, fig.width = 7}\n# Generate timeseries datalist. Each data frame consists of a Date and a value.\nlibrary(hyfo)\nAAA <- data.frame(\n Date = seq(as.Date('1990-10-28'), as.Date('1997-4-1'), 1), # Date column\n AAA = sample(1:10, length(seq(as.Date('1990-10-28'), # value column\n as.Date('1997-4-1'), 1)), repl = TRUE))\n\nBBB <- data.frame(\n Date = seq(as.Date('1993-3-28'), as.Date('1999-1-1'),1), \n BBB = sample(1:10, length(seq(as.Date('1993-3-28'), \n as.Date('1999-1-1'),1)), repl = TRUE))\n\nCCC <- data.frame(\n Date = seq(as.Date('1988-2-2'), as.Date('1996-1-1'),1), \n CCC = sample(1:10, length(seq(as.Date('1988-2-2'), \n as.Date('1996-1-1'),1)), repl = TRUE)) \n\ndatalist <- list(AAA, BBB, CCC)# dput() and dget() can be used to save and load list file.\na <- getAnnual(datalist)\n```\n\n\n\n\n\n\n\n\n\n\n\n## 1.2 Raw Data Analysis\n\nAfter having the raw data, usually we need to have an overview of the rainfall in order to further process the data, `getAnnual` can provide the information based on annual rainfall for all the input time series.\n\nhyfo also provides time series plot `plotTS` and `plotTS_comb`, for you to plot single time series or multiple time series. And missing values will also be shown in the plot.\n\nAssuming we have three gauging stations named \"AAA\", \"BBB\", \"CCC\", the precipitation information can be get by the following:\n```{r, fig.show='hold', fig.height=4, fig.width=7}\n# testdl is a datalist provided by the package as a test. \n# It's a list containing different time series.\ndata(testdl)\na <- getAnnual(testdl)\n```\n\nAs shown above, the annual precipitation and the number of missing values are shown in the figure. Knowing how many missing values you have is alway important when calculating the mean annual precipitation. \n\nNow we want to get the mean annual precipitation.\n```{r, fig.show='hold', fig.height=4, fig.width=7}\na <- getAnnual(testdl, output = 'mean')\na\n```\n\nMean annual precipitation is calculated, but as we can see in the figure before, it's not reliable, since there are a lot of missing values in AAA and CCC, especially in AAA, in 1993, there are more than 30 missing values in a year. So we have to decide which is the threshold for the valid record. the default is 355, which means in a year (355 or 365 days), if the valid records (not missing) exceeds 355, then this year is taken into consideration in the mean annual preicipitation calculation.\n```{r, fig.height=4, fig.width=7}\ngetAnnual(testdl, output = 'mean', minRecords = 300)\ngetAnnual(testdl, output = 'mean', minRecords = 365)\n```\n\nIf you are not satisfied with the title and x axis and y axis, you can assign them yourself.\n```{r, fig.height=4, fig.width=7, results='hide'}\na <- getAnnual(testdl, output = 'mean', title = 'aaa', x = 'aaa', y = 'aaa')\n```\n\nIf you want to calculate annual rainfall for a single dataframe containing one time series. You can use the argument `dataframe =`. NOTE, if you don't put `dataframe =`, hyfo may take it as a list, which will give an error.\n```{r, fig.height=4, fig.width=7}\na <- getAnnual(dataframe = testdl[[1]])\n```\n\n`plotTS` is for you to plot time series, with missing values shown in the plot. And also you can use `plotTS_comb` to generate multiple time series plots\n\n```{r, fig.height=4, fig.width=7}\na1 <- plotTS(TS = testdl[[1]])\n\n# You can also choose 'bar' as time series type, default is 'line'. But most of time, \n# they are not # so different.\na2 <- plotTS(TS = testdl[[1]], type = 'bar')\n\n\n# To use comb function, you have to change output type to 'ggplot'\n\na1 <- plotTS(TS = testdl[[1]], output = 'ggplot', name = 1)\na2 <- plotTS(TS = testdl[[2]], output = 'ggplot', name = 2)\n```\n```{r, fig.height=8, fig.width=7}\nplotTS_comb(a1, a2, nrow = 2)\n```\n\n\n\n\n\n\n\n\n\n\n## 1.3 Further Process for Model Input\n\n### 1.3.1 Extract Common Period from Different Time Series\n\nNow we have the general information of the precipitation, if we want to use them in a model, we have to extract the common period of them, and use the common period precipitation to analyze.\n```{r, fig.height=4, fig.width=7, results='hide'}\ntestdl_new <- extractPeriod(testdl, commonPeriod = TRUE )\nstr(testdl_new)\n```\n\nIf we want to extract data from a certain period, we can assgin start and end date.\n\n```{r, fig.height=4, fig.width=7, results='hide'}\n# Extract period of the winter of 1994\ntestdl_new <- extractPeriod(testdl, startDate = '1994-12-01', endDate = '1995-03-01' )\nstr(testdl_new)\n```\n\nAbove is for us to extract period from different datalist, if we have a single time series, and we want to extract certain period from the single time series. We can make a small change to the argument : add `TS =`, a single time series can contain more than 1 column of value, e.g. the result from `list2dataframe`.\n\n```{r, fig.height=4, fig.width=7, results='hide'}\n# First change the list from above process to dataframe\ndataframe <- list2Dataframe(testdl_new)\n# not we have a dataframe to extract certain period.\ndataframe <- extractPeriod(dataframe = dataframe, startDate = '1994-12-01', endDate = '1995-03-01' )\nstr(testdl_new)\n```\n\n\n\n\n\n\n\n\n\n\n\n\n\n### 1.3.2 Fill Gaps (rainfall data gaps)\n\nAlthough we have got the precipitation of the common period, we can still see that there are some missing values inside, which we should fill.\n```{r, fig.show='hold', fig.height=4, fig.width=7}\ntestdl_new <- extractPeriod(testdl, commonPeriod = TRUE )\na <- getAnnual(testdl_new)\na\n```\n\nFirst we have to transform the datalist to dataframe, which can be done by the code below:\n```{r, fig.show='hold', fig.height=4, fig.width=7}\ndf <- list2Dataframe(testdl_new)\nhead(df)\n```\n\nFrom above, we can see that in the gauging station \"AAA\", there are some missing value marked as \"NA\". Now we are going to fill these gaps.\n\nThe gap filling is based on the correlation and linear regression between each two gauging stations, correlation table, correlation Order and Linear Coefficients are also printed when doing the calculation. Details can be found in `?fillGap`.\n```{r, fig.show='hold', fig.height=4, fig.width=7}\ndf_filled <- fillGap(df)\nhead(df_filled)\n```\n\nDefault correlation period is \"daily\", while sometimes the daily rainfall correlation of precipitation is not so strong, we can also select the correlation period.\n```{r, fig.show='hold', fig.height=4, fig.width=7}\ndf_filled <- fillGap(df, corPeriod = 'monthly')\nhead(df_filled)\ndf_filled <- fillGap(df, corPeriod = 'yearly')\nhead(df_filled)\n```\n\n\n\n\n\n\n\n\n\n\n\n### 1.3.3 Get Ensemble Hydrological Forecast from Historical Data (ESP method)\n\n\nThe basic forecasts are made from the historical data, to see, how the historical data act in the same situation. Using the same period from the historical data to generate an ensemble forcast. \n\nE.g., we have a period of data from 2000 to 2007, we assume 2004 to be the forecast year. Then, use 2004 as an example, the data in 2000, 2001, 2002, 2003, 2005, 2006, 2007 will be taken to generate an ensemble forecast of 6 members(except 2004).\n\n\nSet example year, e.g., year 1994.\n```{r, fig.show='hold', fig.height=4, fig.width=9}\ndata(testdl)\n\na <- testdl[[1]]\na1 <- getHisEnsem(a, example = c('1994-1-1', '1994-12-31'))\n\n```\n\nBoth cumulative and normal plot are provided, default is \"norm\", means normal plot without any process. If words other that \"norm\", \"plot\", there will be no plot. If there are missing values inside, cumulative plot will stop when finds missing values. As can be seen from below.\n\n```{r, fig.show='hold', fig.height=4, fig.width=9}\na2 <- getHisEnsem(a, example = c('1995-1-1', '1996-3-1'))# Default is plot = 'norm'\na3 <- getHisEnsem(a, example = c('1995-1-1', '1995-3-1'), plot = 'cum')\n```\n\n\nExample period can be any time, can be a year or some months.\n```{r, fig.show='hold', fig.height=4, fig.width=9}\na2 <- getHisEnsem(a, example = c('1995-1-1', '1996-3-1'))\na3 <- getHisEnsem(a, example = c('1995-1-1', '1995-8-11'))\n```\n\n`interval` means the interval between each member. Check `?getHisEnsem` for detailed instruction. Default is 365, representing one year.\n```{r, fig.height=4, fig.width=9, results='hide'}\n# If interval is two years.\na2 <- getHisEnsem(a, example = c('1995-1-1', '1996-3-1'), interval = 730)\nstr(a2)\n# If interval is three months.\na3 <- getHisEnsem(a, example = c('1995-1-1', '1995-8-11'), interval = 90)\nstr(a3)\n# If interval is 171 days.\na4 <- getHisEnsem(a, example = c('1995-1-1', '1995-8-11'), interval = 171)\nstr(a4)\n```\n\nFor some models, like MIKE NAM, it's necessary to run model a few days before the forecasting time, to warm up the model. In this case `buffer` is needed to generate the \"warm up period\".\n```{r, fig.height=4, fig.width=9, results='hide'}\n# If the model needs 14 days to warm up.\na2 <- getHisEnsem(a, example = c('1995-1-1', '1996-3-1'), interval = 730, buffer = 14, plot = 'cum')\nstr(a2)\n```\n\nFrom `str(a2)` we can see that the data has 14 more rows, and the start date is changed to \"1994-12-18\"\n\nAlso, if costomized title and xy axis are needed, you can set yourself.\n\n```{r, fig.height=4, fig.width=9}\na2 <- getHisEnsem(a, example = c('1995-1-1', '1996-3-1'), title = 'aaa', x = 'a')\n```\n\nIf you want to combine different ensemble together, there is a regular `_comb` function `getEnsem_comb` to combine different plots together.\n\n```{r, fig.show='hold', fig.height=4, fig.width=9}\na1 <- getHisEnsem(a, example = c('1994-1-1', '1994-12-31'), plot = 'cum', output = 'ggplot', name = 1)\na2 <- getHisEnsem(a, example = c('1994-3-1', '1995-3-31'), plot = 'cum', output = 'ggplot', name = 2)\na3 <- getHisEnsem(a, example = c('1994-5-1', '1995-4-30'), plot = 'cum', output = 'ggplot', name = 3)\n```\n\n```{r, fig.show='hold', fig.height=9, fig.width=9}\ngetEnsem_comb(a1, a2, a3, nrow = 3)\n```\n\n### 1.3.4 Monthly Data and Daily Data Conversion\n\nSometimes you have the monthly data, and want to generate the daily data, sometimes the opposite situation. `monDay` can help you with the conversion.\n\nIf you have daily data and want to convert it to a monthly data.\n```{r}\ndata(testdl)\nTS <- testdl[[2]] # Get daily data\nTS_new <- monDay(TS, method = 'day2mon')\n```\n\nIf you have monthly data and want to convert it to a daily data.\n\n```{r}\n# First generate a monthly data.\nTS <- data.frame(Date = seq(as.Date('1999-9-15'), length = 30, by = '1 month'), stats::runif(30, 3, 10))\nTS_new <- monDay(TS, method = 'mon2day')\n```\n\nMore information please check `?monDay`.\n\n\n\n\n\n\n\n\n\n## 1.4 Seasonal and Monthly Precipitation Analysis\n\nSometimes we need to know not only the annual precipitation, but also the precipitation of a certain month or certain season. `getPreciBar` is in charge of different analysis. It can analyze both grid file and singe timeseries. IF the input is a time series, the argument `TS =` must be put.\n\n`info` argument will give information about max, min, mean, and median, if selected TRUE.\n\n```{r, fig.height=5, fig.width=7}\ndata(testdl)\nTS <- testdl[[1]]\na <- getPreciBar(TS = TS, method = 'spring')\n# if info = T, the information will be given at the bottom.\na <- getPreciBar(TS = TS, method = 'spring', info = TRUE)\n\n```\nIf missing value is wanted, set `omitNA = FALSE`.\n```{r, fig.height=5, fig.width=7}\na <- getPreciBar(TS = TS, method = 'winter', omitNA = FALSE)\n```\nGet special month precipitation, e.g. march.\n```{r, fig.height=5, fig.width=7}\na <-getPreciBar(TS = TS, method = 3)\n```\nWe can also get annual precipitation, plot figure and assign title ans axis.\n```{r, fig.height=5, fig.width=7}\na <- getPreciBar(TS = TS, method = 'annual', x = 'aaa', title = 'aaa')\n```\n\n\nIf `output = 'ggplot'` is chosen, the the output can be used in `getPreciBar_comb`, to generate multiple plots.\n\n```{r, fig.height=5, fig.width=7}\na1 <- getPreciBar(TS = TS, method = 1, output = 'ggplot', name = 'Jan')\na2 <- getPreciBar(TS = TS, method = 2, output = 'ggplot', name = 'Feb')\ngetPreciBar_comb(a1, a2)\n```\n\n\n\n\n\n\n# 2. Climate Forecasting\n\n* For the climate forecasting part, `hyfo` mainly focuses on the post processing of the gridData derived from forecasts or other sources. The input is a list file, usually an NetCDF file. There are `getNcdfVar()`, `loadNcdf()` and `writeNcdf()` prepared in hyfo, for you to deal with NetCDF file. `loadNcdf()` will give a list based hyfo output file.\n\n\n\n##### Note\n\nIf an **ensemble forecast** data is loaded, there will be one dimension called \"member\", by default, `hyfo` will calculate the mean of different members. If you want to see a special member, add `member` argument to `getSpatialMap`, e.g., `getSpatialMap(tgridData, method = 'meanAnnual', member = 3)`, `getPreciBar(tgridData, method = 'annual', member = 14)`\n\n\n\n\n\n## 2.1 Load, write and downscale NetCDF file\n\nThere are three main functions dealing with `getNcdfVar()`, `loadNcdf()` and `writeNcdf()`. `getNcdfVar()` is for get the variable name if you don't know the name. Then you can load NetCDF file, and get a hyfo output, from which you will use in further analysis. Maybe you want to change some thing with the original NetCDF file. You can load first, then, make changes to hyfo output file and then write back to NetCDF file. Following examples shows the procedure.\n```{r, fig.height=6, fig.width=9}\n# First open the test NETcDF file.\nfilePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n \n# Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\nvarname <- getNcdfVar(filePath)\n\nnc <- loadNcdf(filePath, varname)\n\n# nc is a list based hyfo output file, with this file, you can make further analysis.\n\n# E.g. you want to make some changes to the data\nnc$Data <- nc$Data - 0.3\n\n```\n```{r, eval=FALSE}\n# Then write it back to file\nwriteNcdf(gridData = nc, filePath = 'test.nc')\n\n\n```\n\n\n`hyfo` can also do downscale job. When you load the file, you can directly assign the year, longitude and latitude. And if you already have a hyfo list file, you can use `downscaleNcdf` to downscale your list file. \n\n```{r, fig.height=6, fig.width=9}\n# First open the test NETcDF file.\nfilePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n \n# Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\nvarname <- getNcdfVar(filePath)\n\nnc <- loadNcdf(filePath, varname, year = 2005:2006, lon = c(-2.4, -1.6), lat = c(41, 43.5))\n\nnc_plot <- getSpatialMap(nc, 'mean') \n\n# if you want to further downscale nc, you can use the following function.\nnc1 <- downscaleNcdf(nc, year = 2005, lon = c(-2.2, -1.75), lat = c(43, 44))\nnc1_plot <- getSpatialMap(nc1, 'mean')\n```\n\n\n\n\n\n\n\n## 2.2 Spatial Map Plot\n\nAs described before, the following analysis is based the list based hyfo output file. You can call elements by `$`.\n\nIf we want to see the mean daily precipitation.\n```{r, fig.height=6, fig.width=9}\ndata(tgridData)\na <- getSpatialMap(tgridData, method = 'meanAnnual')\n\n# If a dataset is an ensemble forecast, you can use argument member to choose\nfilePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n \n# Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\nvarname <- getNcdfVar(filePath)\n\nnc <- loadNcdf(filePath, varname)\n\n# choose the 3rd member\na <- getSpatialMap(nc, method = 'mean', member = 2)\n# If member not assigned, the mean value of the members will be plotted.\na <- getSpatialMap(nc, method = 'mean')\n```\n\nThere are several methods to be seleted in the function, details can be found by `?getSpatialMap`.\n\nSometimes there exists a great difference in the whole map, e.g., the following value, `c(100, 2, 2,6, 1,7)`, since the maximum value is too large, so in the plot, by normal plot scale, we can only recognize value 100 and the rest, it's hard for us to tell the difference between 2, 2.6, and 1.7 from the plot. In this situation, the value needs to be processed before plotting. Here `scale` provides a way to decide the plot scale.\n\n`scale` passes the arguments to the `trans` argument in `ggplot2`. The most common scale is \"sqrt\" and \"log10\", which focus more on the minutiae. Default is \"identity\", which means no change to the plot scale.\n```{r, fig.height=6, fig.width=9}\na <- getSpatialMap(tgridData, method = 'meanAnnual', scale = 'sqrt')\n```\n\nHere in our example, because the region is too small, and the differences is not so big, so it's not so obvious to tell from the plot. But if in a map, both dry region and wet region is included, that will be more obvious to see the difference between the plot scales.\n\n\nAlso, if you are not satisfied with the title, x axis and y axis, you can assgin yourself.\n```{r, fig.height=6, fig.width=9}\na <- getSpatialMap(tgridData, method = 'meanAnnual', scale = 'sqrt', \n title = 'aaa', x = 'aaa', y = 'aaa')\n```\n\n\n\n\n\n\n\n\n\n\n## 2.3 Add Background (catchment and gauging stations)\n\n\n\n\n\n\nThe default background is the world map, while if you have other backgrounds like catchment shape file and station location file, you are welcome to import them as background.\n\n\n\n\n\n\n\n### 2.3.1 Add catchment shape file\n\nCatchment shape file needs to be processed with a very simple step. It's based on the package `rgdal`, details can be found by `?shp2cat`\n\n```{r, fig.height=6, fig.width=9}\n# Use the test file provided by hyfo\nfile <- system.file(\"extdata\", \"testCat.shp\", package = \"hyfo\")\ncat <- shp2cat(file)\n# cat is the catchment file.\n```\n\nThen the catchment file `cat` can be inputed as background.\n\n```{r, fig.height=6, fig.width=9}\na <- getSpatialMap(tgridData, method = 'meanAnnual', catchment = cat)\n```\n\n\n\n\n\n\n\n### 2.3.2 Add station locations\n\nPoints file needs to be read into dataframe, and special column has to be assigned, details can be found by `?getSpatialMap_mat`\n```{r, fig.height=6, fig.width=9, results='hide'}\n# Use the points file provided by hyfo\nfile <- system.file(\"extdata\", \"points.txt\", package = \"hyfo\")\npoints <- read.table(file, header = TRUE, sep = ',' )\ngetSpatialMap(tgridData, method = 'winter', points = points, catchment = cat)\n\n```\n\nAs can be seen above, the color of the points represents the elevation, the size of the points represents the value, e.g., rainfall value.\n\nYou can generate your own point file and use it as background, or you can also find the original file in the package, and replace the old information with your information.\n\n\n\n\n\n\n\n\n## 2.4 Variable Bar Plot \n\nBisides spatial map, bar plot can also be plotted. The value in the bar plot is spatially averaged, i.e. the value in the bar plot is the mean value over the region.\n\nAnnual precipitation.\n\n```{r, fig.height=6, fig.width=9}\ndata(tgridData)\na <- getPreciBar(tgridData, method = 'annual')\n```\n\nMean monthly precipitation over the whole period, with the ranges for each month. But not all kinds of bar plot have a plot range.\n\n```{r, fig.height=6, fig.width=9}\na <- getPreciBar(tgridData, method = 'meanMonthly')\na <- getPreciBar(tgridData, method = 'meanMonthly', plotRange = FALSE)\n```\n\nSeasonal precipitation, and monthly precipitation can also be plotted. \n\n```{r, fig.height=6, fig.width=9}\na <- getPreciBar(tgridData, method = 'spring')# spring precipitation for each year\na <- getPreciBar(tgridData, method = 3) # march precipitation for each year\n```\n\n\n\n\n\n\n\n\n## 2.5 Analysis and Comparison\n\nFor some cases, analysis and comparison are necesssary, which are also provided by `hyfo`. \n\nThere are three different kinds of output from `getSpatialMap` and `getPreciBar`, respectively, `output = 'data'`, `output = 'ggplot'` and `output = 'plot'`. \n\n`output = 'data'` is default in the function and do not need to be declare when input. It is mainly used in analyzing and replot the results.\n\n`output = 'ggplot'` is used when combining different plots.\n\n`output = 'plot'` is used when a layer output is needed. the output can be directly printed, and can be mannually combined by the plot arrange functions, e.g., `grid.arrange()`\n\n##### Note:\n**All the comparisons must be comparable, e.g.,**\n\n* For `getSpatialMap_comb`, the maps to be compared should be with same size and resolution, in other words, they should be fully overlapped by each other. Check `?getSpatialMap_comb` for details.\n* For `getPreciBar_comb`, the bar plots to be compared should belong to the same kind, e.g., spring and winter, January and December, and couldn't be spring and annual. Details can be found by `?getPreciBar_comb`\n\n\n\n\n\n\n### 2.5.1 Spatial Map\n\nThe default \"data\" output provides a matrix, representing the raster information of the spatial map. \n\n```{r, fig.height=6, fig.width=9}\na <- getSpatialMap(tgridData, method = 'meanAnnual')\na\n```\n\nThis matrix is upside down from what you can see from the plot. **DO NOT try to change this matrix.**\n`hyfo` can deal with it.\n\n```{r, fig.show= 'hold', fig.height=6, fig.width=9}\n# For re-plot the matrix, input the matrix, and then the map can be replot.\nb <- getSpatialMap_mat(a)\n\n# Without title and x and y, also you can assign yourself.\nb <- getSpatialMap_mat(a, title = 'aaa', x = 'aaa', y = '')\n```\n\nThe matrix can be used to make different analysis and plot again.\n\n##### Note\n**If the matrix doesn't come from `getSpatialMap`, dimension name of longitude and latitude needs to be provided to the matrix, in order to be plotted.**\n```{r, fig.show='hold', fig.height=6, fig.width=9, results='hide'}\n\na1 <- getSpatialMap(tgridData, method = 'mean')\n\n# To make some changes to mean value.\nb <- a1 * 3 -1\ngetSpatialMap_mat(b, title = '', x = '', y = '')\n\n# Bias, variation and other analysis can also be processed \n# the same way. \n# Just apply the analysis to the matrix and \n# use getSpatialMap_mat to plot.\n```\n\nIf multi-plot is needed, `hyfo` can also combine different plots together. Use `output = ggplot`, which gives back the a special format that can be easily used by `ggplot2`\n\n```{r, fig.show='hide', fig.height=6, fig.width=9}\n\na1 <- getSpatialMap(tgridData, method = 'spring', output = 'ggplot', name = 'spring')\na2 <- getSpatialMap(tgridData, method = 'summer', output = 'ggplot', name = 'summer')\na3 <- getSpatialMap(tgridData, method = 'autumn', output = 'ggplot', name = 'autumn')\na4 <- getSpatialMap(tgridData, method = 'winter', output = 'ggplot', name = 'winter')\n\n```\n\n```{r, fig.show='hold', fig.height=8, fig.width=12}\n\ngetSpatialMap_comb(a1, a2, a3, a4, nrow = 2)# you cannot assign title\n```\n\n```{r, fig.show='hold', fig.height=12, fig.width=9}\ngetSpatialMap_comb(a1, a2, a3, a4, nrow = 4)\n\n```\n\n`getSpatialMap_comb` accepts list (using `list =`) object too, which is easier for multi-plot. First list of 12 months are got. **NOTE: If input is a list, the argument should be `list = yourlist`, not directly put the list in the argument.**\n```{r, fig.show='hide'}\nc <- lapply(1:12, function(x) getSpatialMap(tgridData, method = x, output = 'ggplot', name = x))\n```\n\nThen they are combined.\n```{r, fig.show='hold', fig.height=10, fig.width=12}\ngetSpatialMap_comb(list = c, nrow = 4)\n```\n\n\n\n\n\n\n\n\n\n### 2.5.2 Bar Plot\n\nBasically, bar plot follows the same rule as part 2.4.1 spatial map, only a few cases that needs to pay attention.\n\n```{r, fig.show='hide', fig.height=6, fig.width=9, results='hide'}\nb1 <- getPreciBar(tgridData, method = 'spring', output = 'ggplot', name = 'spring')\nb2 <- getPreciBar(tgridData, method = 'summer', output = 'ggplot', name = 'summer')\nb3 <- getPreciBar(tgridData, method = 'autumn', output = 'ggplot', name = 'autumn')\nb4 <- getPreciBar(tgridData, method = 'winter', output = 'ggplot', name = 'winter')\n```\n\n```{r, fig.show='hold', fig.height=8, fig.width=9}\ngetPreciBar_comb(b1, b2, b3, b4, nrow = 2)\n```\n\n```{r, fig.show='hide', fig.height=6, fig.width=9, results='hide'}\nc <- lapply(1:12, function(x) getPreciBar(tgridData, method = x, output = 'ggplot', name = x))\n```\n\n```{r, fig.show='hold', fig.height=8, fig.width=9}\ngetPreciBar_comb(list = c, nrow = 4)\n```\n\n\n\n\n\n\n\n\n## 2.6 Model Input\n\n### 2.6.1 Extract time series from Forecasting Dataset\n\nIf there are different members existing in the dataset, `hyfo` can extract them and generate a dataframe for the easy input to the model. If the dataset doesn't have a member part, then `hyfo` will extract only one single time seres. \n\n\n```{r, fig.show='hold', fig.height=6, fig.width=9}\nfilePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n \n# Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\nvarname <- getNcdfVar(filePath)\n\nnc <- loadNcdf(filePath, varname)\n\na <- getFrcEnsem(nc)\n\n\n# If there is no member session in the dataset, a single time sereis will be extracted.\n\na1 <- getFrcEnsem(tgridData)\n```\n\nThe default output is spatially averaged, if there are more than one cells in the dataset, the mean value of the cells will be calculated. While if you are interested in special cell, you can assign the cell value, for how to assign, please check the details in `?getFrcEnsem`. You can also directly use longitude and latitude to extract time series, using `coord = `\n\n```{r, fig.height=6, fig.width=9, results='hide'}\ngetSpatialMap(nc, 'mean')\n\na <- getFrcEnsem(nc, cell = c(6,2))\n# From the map, cell = c(6, 2) means lon = -1.4, lat = 43.2, so you can use corrd to locate\n# your research area and extract time series.\nb <- getFrcEnsem(nc, coord = c(-1.4, 43.2))\n\n```\n\nIf you want to combine different plots together, you can use `_comb` function to combine plots.\n\n```{r, fig.show='hide', fig.height=4, fig.width=7}\na1 <- getFrcEnsem(nc, cell = c(2,3), output = 'ggplot', name = 'a1')\na2 <- getFrcEnsem(nc, cell = c(2,4), output = 'ggplot', name = 'a2')\na3 <- getFrcEnsem(nc, cell = c(2,5), output = 'ggplot', name = 'a3')\n```\n```{r, fig.show='hold', fig.height=9, fig.width=7}\ngetEnsem_comb(a1, a2, a3, nrow = 3)\n```\n\nPlot rules are just the same as described in 1.3.3, please check if needed.\n\n\n\n\n\n\n\n\n# 3. Anarbe Case\n\nThe functions with anarbe case end with `_anarbe`, all of them are used to collect different available published data in anarbe catchment in Spain. The data comes from two website: [here](http://meteo.navarra.es/estaciones/mapadeestaciones.cfm) and [here](http://www4.gipuzkoa.net/oohh/web/esp/02.asp), there are precipitation or discharge data on those website, and can be downloaded directly. \n\nSince the available files on those website are arranged by a year or five years, for long term data collection, a tools is necessary for collecting data from different files.\n\n##### Note:\nFor excel files, if you have access to the dam regulation excel file of the dam anarbe, you can use `collectData_excel_anarbe` in the package, but this function is commented in the original code, cannot be used directly. Go to original file in the library or go to github [here](https://github.com/Yuanchao-Xu/hyfo/blob/master/R/collectData_excel.R), copy the original code.\n\nThere are two csv files and txt files included in the package, which can be used as examples.\n\n```{r}\nfile <- system.file(\"extdata\", \"1999.csv\", package = \"hyfo\")\nfolder <- strsplit(file, '1999')[[1]][1]\n\na <- collectData_csv_anarbe(folder, output = TRUE)\nstr(a)\nb <- collectData_txt_anarbe(folder, output = TRUE)\nstr(b)\n```\n\n\n\n", + "contents" : "---\ntitle: '[hyfo Easy Start](http://yuanchao-xu.github.io/hyfo/)'\nauthor: \"Yuanchao Xu\"\ndate: '`r Sys.Date()`'\noutput:\n pdf_document:\n toc: yes\n toc_depth: 3\n html_vignette:\n toc: yes\n toc_depth: 3\n html_document:\n toc: yes\nvignette: |\n %\\VignetteIndexEntry{hyfo easy start} \n %\\VignetteEngine{knitr::rmarkdown} \n %\\VignetteEncoding{ASCII}\n---\n\n# Introduction\n\n**Official Website is [here](http://yuanchao-xu.github.io/hyfo/)**\n\nhyfo is designed for hydrology and climate forecasting anaylasis, containing a number of tools including data extration, data processing and data visulization.\n\n**If you feel hyfo is of a little help, please cite it as following:**\n\nXu, Yuanchao(2015). hyfo: Hydrology and Climate Forecasting R Package for Data Analysis and Visualization. Retrieved from http://yuanchao-xu.github.io/hyfo/\n\n#### TIPS\n* For the hydrology tools part, the minimum time unit is a day, i.e., it mainly focuses on water resource and some long term analysis. For flood analysis part, it will be added in future.\n\n\n* One important characteristic by which hyfo can be distinguished from others is its convenience in multiple plots and series plots. Most data visualization tool in hyfo provides the output that can be directly re-plot by `ggplot2`, if `output = 'ggplot'` is assigned in the argument of the function, which will be easier for the users to generated series/multiple plots afterwards. When `output = 'ggplot'` is selected, you also have to assigne a `name = 'yourname'` in the argument, for the convenience of generating multiplots in future. All the functions ending with `_comb` can generated series/multiple plots, details can be found in the user mannual. \n\n\n* For the forecasting tools part, `hyfo` mainly focuses on the post processing of the gridData derived from forecasts or other sources. The input is a list file, usually an NetCDF file. There are `getNcdfVar()`, `loadNcdf()` and `writeNcdf()` prepared in hyfo, for you to deal with NetCDF file. \n\n* If you don't like the tile, x axis, y axis of the plot, just set them as '', e.g. `title = ''`\n\n* For R beginners, R provides different functions to write to file. `write.table` is a popular choice, and after write the results to a file, you can directly copy paste to your model or to other uses.\n\n* The functions end with `_anarbe` are the functions designed specially for some case in Spain, those functions mostly are about data collection of the anarbe catchment, which will be introduced in the end of this mannual.\n\n##### Installation\n* You can go [here](http://yuanchao-xu.github.io/hyfo/) to download installation file, and use IDE like Rstudio to install from file, both `tar.gz` and `zip` formats are provided.\n\n* Also you can use the following code to install the latest version.\n```{r, eval=FALSE}\n install.packages('devtools')\n # Ignore the warning that Rtool is not installed, unless you want other function from devtools.\n # If you have \"devtools\" installed already, you just need to run the following code.\n devtools::install_github('Yuanchao-Xu/hyfo')\n```\n\n\n\n\n\n\n# 1. Hydrology\n\n##### Note\n\nIf you are an experienced R user, and know how to read data in R, deal with dataframe, generate date and list, please start from next charpter, \"1.2 Rainfall Analysis\"\n\n\n\n\n\n\n\n## 1.1 Start from Raw Data\n\n\n\n\n\n\n### 1.1.1 From File\n\n`hyfo` does provide a common tool for collecting data from different type of files, including \"txt\", \n\n\"csv\" and \"excel\", which has to be assigned to the argument `fileType`.\n\nNow let's use internal data as an example.\n\n```{r, fig.show = 'hold', fig.height = 4, fig.width = 5}\nlibrary(hyfo)#load the package.\n# get the folder containing different csv (or other type) files.\nfile <- system.file(\"extdata\", \"1999.csv\", package = \"hyfo\")\nfolder <- strsplit(file, '1999')[[1]][1]\n\n# Extract and combine content from different files and in each file, the extracted zone is \n# from row 10 to row 20, Column 1 to column2.\na <- collectData(folder, fileType = 'csv', range = c(10, 20, 1, 2))\nstr(a)\n```\n\n`a` cannot be directly inputed in `hyfo`, it still needs some process.\n```{r}\n# Check the date to see if it follows the format in ?as.Date(), if not, \n# use as.Date to convert. \na <- data.frame(a)\n#get date\ndate <- a[, 1]\n\n# The original format is d/m/year, convert to formal format.\ndate <- as.Date(date, format = '%d/%m/%Y')\na[, 1] <- date\n\n# Now a has become `a` time series dataframe, which is the atom element of the analysis. \n#`hyfo` deals with list containing different time series dataframe. In this example, \n#there is only one dataframe, and more examples please refer to the following chapter.\ndatalist <- list(a)\n\n# Use getAnnual as an example, here since `a` is not a complete time series, \n# the result is only base on the input.\n# getAnnual gives the annual precipitation of each year, \n# and will be introduced in the next chapter.\ngetAnnual(datalist)\n```\n\n\n\n\n\n\n\n\n\n### 1.1.2 Mannually\n\nFollowing example shows a simple way to generate dataframe with start date, end date, and the value. Here in the example, `sample()` is used to generate random values, while in real case it will be a vector containing time series values.\n```{r, fig.show = 'hold', fig.height = 4, fig.width = 7}\n# Generate timeseries datalist. Each data frame consists of a Date and a value.\nlibrary(hyfo)\nAAA <- data.frame(\n Date = seq(as.Date('1990-10-28'), as.Date('1997-4-1'), 1), # Date column\n AAA = sample(1:10, length(seq(as.Date('1990-10-28'), # value column\n as.Date('1997-4-1'), 1)), repl = TRUE))\n\nBBB <- data.frame(\n Date = seq(as.Date('1993-3-28'), as.Date('1999-1-1'),1), \n BBB = sample(1:10, length(seq(as.Date('1993-3-28'), \n as.Date('1999-1-1'),1)), repl = TRUE))\n\nCCC <- data.frame(\n Date = seq(as.Date('1988-2-2'), as.Date('1996-1-1'),1), \n CCC = sample(1:10, length(seq(as.Date('1988-2-2'), \n as.Date('1996-1-1'),1)), repl = TRUE)) \n\ndatalist <- list(AAA, BBB, CCC)# dput() and dget() can be used to save and load list file.\na <- getAnnual(datalist)\n```\n\n\n\n\n\n\n\n\n\n\n\n## 1.2 Raw Data Analysis\n\nAfter having the raw data, usually we need to have an overview of the rainfall in order to further process the data, `getAnnual` can provide the information based on annual rainfall for all the input time series.\n\nhyfo also provides time series plot `plotTS` and `plotTS_comb`, for you to plot single time series or multiple time series. And missing values will also be shown in the plot.\n\nAssuming we have three gauging stations named \"AAA\", \"BBB\", \"CCC\", the precipitation information can be get by the following:\n```{r, fig.show='hold', fig.height=4, fig.width=7}\n# testdl is a datalist provided by the package as a test. \n# It's a list containing different time series.\ndata(testdl)\na <- getAnnual(testdl)\n```\n\nAs shown above, the annual precipitation and the number of missing values are shown in the figure. Knowing how many missing values you have is alway important when calculating the mean annual precipitation. \n\nNow we want to get the mean annual precipitation.\n```{r, fig.show='hold', fig.height=4, fig.width=7}\na <- getAnnual(testdl, output = 'mean')\na\n```\n\nMean annual precipitation is calculated, but as we can see in the figure before, it's not reliable, since there are a lot of missing values in AAA and CCC, especially in AAA, in 1993, there are more than 30 missing values in a year. So we have to decide which is the threshold for the valid record. the default is 355, which means in a year (355 or 365 days), if the valid records (not missing) exceeds 355, then this year is taken into consideration in the mean annual preicipitation calculation.\n```{r, fig.height=4, fig.width=7}\ngetAnnual(testdl, output = 'mean', minRecords = 300)\ngetAnnual(testdl, output = 'mean', minRecords = 365)\n```\n\nIf you are not satisfied with the title and x axis and y axis, you can assign them yourself.\n```{r, fig.height=4, fig.width=7, results='hide'}\na <- getAnnual(testdl, output = 'mean', title = 'aaa', x = 'aaa', y = 'aaa')\n```\n\nIf you want to calculate annual rainfall for a single dataframe containing one time series. You can use the argument `dataframe =`. NOTE, if you don't put `dataframe =`, hyfo may take it as a list, which will give an error.\n```{r, fig.height=4, fig.width=7}\na <- getAnnual(dataframe = testdl[[1]])\n```\n\n`plotTS` is for you to plot time series, with missing values shown in the plot. And also you can use `plotTS_comb` to generate multiple time series plots\n\n```{r, fig.height=4, fig.width=7}\na1 <- plotTS(TS = testdl[[1]])\n\n# You can also choose 'bar' as time series type, default is 'line'. But most of time, \n# they are not # so different.\na2 <- plotTS(TS = testdl[[1]], type = 'bar')\n\n\n# To use comb function, you have to change output type to 'ggplot'\n\na1 <- plotTS(TS = testdl[[1]], output = 'ggplot', name = 1)\na2 <- plotTS(TS = testdl[[2]], output = 'ggplot', name = 2)\n```\n```{r, fig.height=8, fig.width=7}\nplotTS_comb(a1, a2, nrow = 2)\n```\n\n\n\n\n\n\n\n\n\n\n## 1.3 Further Process for Model Input\n\n### 1.3.1 Extract Common Period from Different Time Series\n\nNow we have the general information of the precipitation, if we want to use them in a model, we have to extract the common period of them, and use the common period precipitation to analyze.\n```{r, fig.height=4, fig.width=7, results='hide'}\ntestdl_new <- extractPeriod(testdl, commonPeriod = TRUE )\nstr(testdl_new)\n```\n\nIf we want to extract data from a certain period, we can assgin start and end date.\n\n```{r, fig.height=4, fig.width=7, results='hide'}\n# Extract period of the winter of 1994\ntestdl_new <- extractPeriod(testdl, startDate = '1994-12-01', endDate = '1995-03-01' )\nstr(testdl_new)\n```\n\nAbove is for us to extract period from different datalist, if we have a single time series, and we want to extract certain period from the single time series. We can make a small change to the argument : add `TS =`, a single time series can contain more than 1 column of value, e.g. the result from `list2dataframe`.\n\n```{r, fig.height=4, fig.width=7, results='hide'}\n# First change the list from above process to dataframe\ndataframe <- list2Dataframe(testdl_new)\n# now we have a dataframe to extract certain period.\ndataframe <- extractPeriod(dataframe = dataframe, startDate = '1994-12-01', endDate = '1995-03-01' )\nstr(testdl_new)\n```\n\n\n\n\n\n\n\n\n\n\n\n\n\n### 1.3.2 Fill Gaps (rainfall data gaps)\n\nAlthough we have got the precipitation of the common period, we can still see that there are some missing values inside, which we should fill.\n```{r, fig.show='hold', fig.height=4, fig.width=7}\ntestdl_new <- extractPeriod(testdl, commonPeriod = TRUE )\na <- getAnnual(testdl_new)\na\n```\n\nFirst we have to transform the datalist to dataframe, which can be done by the code below:\n```{r, fig.show='hold', fig.height=4, fig.width=7}\ndf <- list2Dataframe(testdl_new)\nhead(df)\n```\n\nFrom above, we can see that in the gauging station \"AAA\", there are some missing value marked as \"NA\". Now we are going to fill these gaps.\n\nThe gap filling is based on the correlation and linear regression between each two gauging stations, correlation table, correlation Order and Linear Coefficients are also printed when doing the calculation. Details can be found in `?fillGap`.\n```{r, fig.show='hold', fig.height=4, fig.width=7}\ndf_filled <- fillGap(df)\nhead(df_filled)\n```\n\nDefault correlation period is \"daily\", while sometimes the daily rainfall correlation of precipitation is not so strong, we can also select the correlation period.\n```{r, fig.show='hold', fig.height=4, fig.width=7}\ndf_filled <- fillGap(df, corPeriod = 'monthly')\nhead(df_filled)\ndf_filled <- fillGap(df, corPeriod = 'yearly')\nhead(df_filled)\n```\n\n\n\n\n\n\n\n\n\n\n\n### 1.3.3 Get Ensemble Hydrological Forecast from Historical Data (ESP method)\n\n\nThe basic forecasts are made from the historical data, to see, how the historical data act in the same situation. Using the same period from the historical data to generate an ensemble forcast. \n\nE.g., we have a period of data from 2000 to 2007, we assume 2004 to be the forecast year. Then, use 2004 as an example, the data in 2000, 2001, 2002, 2003, 2005, 2006, 2007 will be taken to generate an ensemble forecast of 6 members(except 2004).\n\n\nSet example year, e.g., year 1994.\n```{r, fig.show='hold', fig.height=4, fig.width=9}\ndata(testdl)\n\na <- testdl[[1]]\na1 <- getHisEnsem(a, example = c('1994-1-1', '1994-12-31'))\n\n```\n\nBoth cumulative and normal plot are provided, default is \"norm\", means normal plot without any process. If words other that \"norm\", \"plot\", there will be no plot. If there are missing values inside, cumulative plot will stop when finds missing values. As can be seen from below.\n\n```{r, fig.show='hold', fig.height=4, fig.width=9}\na2 <- getHisEnsem(a, example = c('1995-1-1', '1996-3-1'))# Default is plot = 'norm'\na3 <- getHisEnsem(a, example = c('1995-1-1', '1995-3-1'), plot = 'cum')\n```\n\n\nExample period can be any time, can be a year or some months.\n```{r, fig.show='hold', fig.height=4, fig.width=9}\na2 <- getHisEnsem(a, example = c('1995-1-1', '1996-3-1'))\na3 <- getHisEnsem(a, example = c('1995-1-1', '1995-8-11'))\n```\n\n`interval` means the interval between each member. Check `?getHisEnsem` for detailed instruction. Default is 365, representing one year.\n```{r, fig.height=4, fig.width=9, results='hide'}\n# If interval is two years.\na2 <- getHisEnsem(a, example = c('1995-1-1', '1996-3-1'), interval = 730)\nstr(a2)\n# If interval is three months.\na3 <- getHisEnsem(a, example = c('1995-1-1', '1995-8-11'), interval = 90)\nstr(a3)\n# If interval is 171 days.\na4 <- getHisEnsem(a, example = c('1995-1-1', '1995-8-11'), interval = 171)\nstr(a4)\n```\n\nFor some models, like MIKE NAM, it's necessary to run model a few days before the forecasting time, to warm up the model. In this case `buffer` is needed to generate the \"warm up period\".\n```{r, fig.height=4, fig.width=9, results='hide'}\n# If the model needs 14 days to warm up.\na2 <- getHisEnsem(a, example = c('1995-1-1', '1996-3-1'), interval = 730, buffer = 14, plot = 'cum')\nstr(a2)\n```\n\nFrom `str(a2)` we can see that the data has 14 more rows, and the start date is changed to \"1994-12-18\"\n\nAlso, if costomized title and xy axis are needed, you can set yourself.\n\n```{r, fig.height=4, fig.width=9}\na2 <- getHisEnsem(a, example = c('1995-1-1', '1996-3-1'), title = 'aaa', x = 'a')\n```\n\nIf you want to combine different ensemble together, there is a regular `_comb` function `getEnsem_comb` to combine different plots together.\n\n```{r, fig.show='hold', fig.height=4, fig.width=9}\na1 <- getHisEnsem(a, example = c('1994-1-1', '1994-12-31'), plot = 'cum', output = 'ggplot', name = 1)\na2 <- getHisEnsem(a, example = c('1994-3-1', '1995-3-31'), plot = 'cum', output = 'ggplot', name = 2)\na3 <- getHisEnsem(a, example = c('1994-5-1', '1995-4-30'), plot = 'cum', output = 'ggplot', name = 3)\n```\n\n```{r, fig.show='hold', fig.height=9, fig.width=9}\ngetEnsem_comb(a1, a2, a3, nrow = 3)\n```\n\n### 1.3.4 Monthly Data and Daily Data Conversion\n\nSometimes you have the monthly data, and want to generate the daily data, sometimes the opposite situation. `monDay` can help you with the conversion.\n\nIf you have daily data and want to convert it to a monthly data.\n```{r}\ndata(testdl)\nTS <- testdl[[2]] # Get daily data\nTS_new <- monDay(TS, method = 'day2mon')\n```\n\nIf you have monthly data and want to convert it to a daily data.\n\n```{r}\n# First generate a monthly data.\nTS <- data.frame(Date = seq(as.Date('1999-9-15'), length = 30, by = '1 month'), stats::runif(30, 3, 10))\nTS_new <- monDay(TS, method = 'mon2day')\n```\n\nMore information please check `?monDay`.\n\n\n\n\n\n\n\n\n\n## 1.4 Seasonal and Monthly Precipitation Analysis\n\nSometimes we need to know not only the annual precipitation, but also the precipitation of a certain month or certain season. `getPreciBar` is in charge of different analysis. It can analyze both grid file and singe timeseries. IF the input is a time series, the argument `TS =` must be put.\n\n`info` argument will give information about max, min, mean, and median, if selected TRUE.\n\n```{r, fig.height=5, fig.width=7}\ndata(testdl)\nTS <- testdl[[1]]\na <- getPreciBar(TS = TS, method = 'spring')\n# if info = T, the information will be given at the bottom.\na <- getPreciBar(TS = TS, method = 'spring', info = TRUE)\n\n```\nIf missing value is wanted, set `omitNA = FALSE`.\n```{r, fig.height=5, fig.width=7}\na <- getPreciBar(TS = TS, method = 'winter', omitNA = FALSE)\n```\nGet special month precipitation, e.g. march.\n```{r, fig.height=5, fig.width=7}\na <-getPreciBar(TS = TS, method = 3)\n```\nWe can also get annual precipitation, plot figure and assign title ans axis.\n```{r, fig.height=5, fig.width=7}\na <- getPreciBar(TS = TS, method = 'annual', x = 'aaa', title = 'aaa')\n```\n\n\nIf `output = 'ggplot'` is chosen, the the output can be used in `getPreciBar_comb`, to generate multiple plots.\n\n```{r, fig.height=5, fig.width=7}\na1 <- getPreciBar(TS = TS, method = 1, output = 'ggplot', name = 'Jan')\na2 <- getPreciBar(TS = TS, method = 2, output = 'ggplot', name = 'Feb')\ngetPreciBar_comb(a1, a2)\n```\n\n\n\n\n\n\n# 2. Climate Forecasting\n\n* For the climate forecasting part, `hyfo` mainly focuses on the post processing of the gridData derived from forecasts or other sources. The input is a list file, usually an NetCDF file. There are `getNcdfVar()`, `loadNcdf()` and `writeNcdf()` prepared in hyfo, for you to deal with NetCDF file. `loadNcdf()` will give a list based hyfo output file.\n\n\n\n##### Note\n\nIf an **ensemble forecast** data is loaded, there will be one dimension called \"member\", by default, `hyfo` will calculate the mean of different members. If you want to see a special member, add `member` argument to `getSpatialMap`, e.g., `getSpatialMap(tgridData, method = 'meanAnnual', member = 3)`, `getPreciBar(tgridData, method = 'annual', member = 14)`\n\n\n\n\n\n## 2.1 Load, write and downscale NetCDF file\n\nThere are three main functions dealing with `getNcdfVar()`, `loadNcdf()` and `writeNcdf()`. `getNcdfVar()` is for get the variable name if you don't know the name. Then you can load NetCDF file, and get a hyfo output, from which you will use in further analysis. Maybe you want to change some thing with the original NetCDF file. You can load first, then, make changes to hyfo output file and then write back to NetCDF file. Following examples shows the procedure.\n```{r, fig.height=6, fig.width=9}\n# First open the test NETcDF file.\nfilePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n \n# Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\nvarname <- getNcdfVar(filePath)\n\nnc <- loadNcdf(filePath, varname)\n\n# nc is a list based hyfo output file, with this file, you can make further analysis.\n\n# E.g. you want to make some changes to the data\nnc$Data <- nc$Data - 0.3\n\n```\n```{r, eval=FALSE}\n# Then write it back to file\nwriteNcdf(gridData = nc, filePath = 'test.nc')\n\n\n```\n\n\n`hyfo` can also do downscale job. When you load the file, you can directly assign the year, longitude and latitude. And if you already have a hyfo list file, you can use `downscaleNcdf` to downscale your list file. \n\n```{r, fig.height=6, fig.width=9}\n# First open the test NETcDF file.\nfilePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n \n# Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\nvarname <- getNcdfVar(filePath)\n\nnc <- loadNcdf(filePath, varname, year = 2005:2006, lon = c(-2.4, -1.6), lat = c(41, 43.5))\n\nnc_plot <- getSpatialMap(nc, 'mean') \n\n# if you want to further downscale nc, you can use the following function.\nnc1 <- downscaleNcdf(nc, year = 2005, lon = c(-2.2, -1.75), lat = c(43, 44))\nnc1_plot <- getSpatialMap(nc1, 'mean')\n```\n\n\n\n\n\n\n\n## 2.2 Spatial Map Plot\n\nAs described before, the following analysis is based the list based hyfo output file. You can call elements by `$`.\n\nIf we want to see the mean daily precipitation.\n```{r, fig.height=6, fig.width=9}\ndata(tgridData)\na <- getSpatialMap(tgridData, method = 'meanAnnual')\n\n# If a dataset is an ensemble forecast, you can use argument member to choose\nfilePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n \n# Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\nvarname <- getNcdfVar(filePath)\n\nnc <- loadNcdf(filePath, varname)\n\n# choose the 3rd member\na <- getSpatialMap(nc, method = 'mean', member = 2)\n# If member not assigned, the mean value of the members will be plotted.\na <- getSpatialMap(nc, method = 'mean')\n```\n\nThere are several methods to be seleted in the function, details can be found by `?getSpatialMap`.\n\nSometimes there exists a great difference in the whole map, e.g., the following value, `c(100, 2, 2,6, 1,7)`, since the maximum value is too large, so in the plot, by normal plot scale, we can only recognize value 100 and the rest, it's hard for us to tell the difference between 2, 2.6, and 1.7 from the plot. In this situation, the value needs to be processed before plotting. Here `scale` provides a way to decide the plot scale.\n\n`scale` passes the arguments to the `trans` argument in `ggplot2`. The most common scale is \"sqrt\" and \"log10\", which focus more on the minutiae. Default is \"identity\", which means no change to the plot scale.\n```{r, fig.height=6, fig.width=9}\na <- getSpatialMap(tgridData, method = 'meanAnnual', scale = 'sqrt')\n```\n\nHere in our example, because the region is too small, and the differences is not so big, so it's not so obvious to tell from the plot. But if in a map, both dry region and wet region is included, that will be more obvious to see the difference between the plot scales.\n\n\nAlso, if you are not satisfied with the title, x axis and y axis, you can assgin yourself.\n```{r, fig.height=6, fig.width=9}\na <- getSpatialMap(tgridData, method = 'meanAnnual', scale = 'sqrt', \n title = 'aaa', x = 'aaa', y = 'aaa')\n```\n\n\n\n\n\n\n\n\n\n\n## 2.3 Add Background (catchment and gauging stations)\n\n\n\n\n\n\nThe default background is the world map, while if you have other backgrounds like catchment shape file and station location file, you are welcome to import them as background.\n\n\n\n\n\n\n\n### 2.3.1 Add catchment shape file\n\nCatchment shape file needs to be processed with a very simple step. It's based on the package `rgdal`, details can be found by `?shp2cat`\n\n```{r, fig.height=6, fig.width=9}\n# Use the test file provided by hyfo\nfile <- system.file(\"extdata\", \"testCat.shp\", package = \"hyfo\")\ncat <- shp2cat(file)\n# cat is the catchment file.\n```\n\nThen the catchment file `cat` can be inputed as background.\n\n```{r, fig.height=6, fig.width=9}\na <- getSpatialMap(tgridData, method = 'meanAnnual', catchment = cat)\n```\n\n\n\n\n\n\n\n### 2.3.2 Add station locations\n\nPoints file needs to be read into dataframe, and special column has to be assigned, details can be found by `?getSpatialMap_mat`\n```{r, fig.height=6, fig.width=9, results='hide'}\n# Use the points file provided by hyfo\nfile <- system.file(\"extdata\", \"points.txt\", package = \"hyfo\")\npoints <- read.table(file, header = TRUE, sep = ',' )\ngetSpatialMap(tgridData, method = 'winter', points = points, catchment = cat)\n\n```\n\nAs can be seen above, the color of the points represents the elevation, the size of the points represents the value, e.g., rainfall value.\n\nYou can generate your own point file and use it as background, or you can also find the original file in the package, and replace the old information with your information.\n\n\n\n\n\n\n\n\n## 2.4 Variable Bar Plot \n\nBisides spatial map, bar plot can also be plotted. The value in the bar plot is spatially averaged, i.e. the value in the bar plot is the mean value over the region.\n\nAnnual precipitation.\n\n```{r, fig.height=6, fig.width=9}\ndata(tgridData)\na <- getPreciBar(tgridData, method = 'annual')\n```\n\nMean monthly precipitation over the whole period, with the ranges for each month. But not all kinds of bar plot have a plot range.\n\n```{r, fig.height=6, fig.width=9}\na <- getPreciBar(tgridData, method = 'meanMonthly')\na <- getPreciBar(tgridData, method = 'meanMonthly', plotRange = FALSE)\n```\n\nSeasonal precipitation, and monthly precipitation can also be plotted. \n\n```{r, fig.height=6, fig.width=9}\na <- getPreciBar(tgridData, method = 'spring')# spring precipitation for each year\na <- getPreciBar(tgridData, method = 3) # march precipitation for each year\n```\n\n\n\n\n\n\n\n\n## 2.5 Analysis and Comparison\n\nFor some cases, analysis and comparison are necesssary, which are also provided by `hyfo`. \n\nThere are three different kinds of output from `getSpatialMap` and `getPreciBar`, respectively, `output = 'data'`, `output = 'ggplot'` and `output = 'plot'`. \n\n`output = 'data'` is default in the function and do not need to be declare when input. It is mainly used in analyzing and replot the results.\n\n`output = 'ggplot'` is used when combining different plots.\n\n`output = 'plot'` is used when a layer output is needed. the output can be directly printed, and can be mannually combined by the plot arrange functions, e.g., `grid.arrange()`\n\n##### Note:\n**All the comparisons must be comparable, e.g.,**\n\n* For `getSpatialMap_comb`, the maps to be compared should be with same size and resolution, in other words, they should be fully overlapped by each other. Check `?getSpatialMap_comb` for details.\n* For `getPreciBar_comb`, the bar plots to be compared should belong to the same kind, e.g., spring and winter, January and December, and couldn't be spring and annual. Details can be found by `?getPreciBar_comb`\n\n\n\n\n\n\n### 2.5.1 Spatial Map\n\nThe default \"data\" output provides a matrix, representing the raster information of the spatial map. \n\n```{r, fig.height=6, fig.width=9}\na <- getSpatialMap(tgridData, method = 'meanAnnual')\na\n```\n\nThis matrix is upside down from what you can see from the plot. **DO NOT try to change this matrix.**\n`hyfo` can deal with it.\n\n```{r, fig.show= 'hold', fig.height=6, fig.width=9}\n# For re-plot the matrix, input the matrix, and then the map can be replot.\nb <- getSpatialMap_mat(a)\n\n# Without title and x and y, also you can assign yourself.\nb <- getSpatialMap_mat(a, title = 'aaa', x = 'aaa', y = '')\n```\n\nThe matrix can be used to make different analysis and plot again.\n\n##### Note\n**If the matrix doesn't come from `getSpatialMap`, dimension name of longitude and latitude needs to be provided to the matrix, in order to be plotted.**\n```{r, fig.show='hold', fig.height=6, fig.width=9, results='hide'}\n\na1 <- getSpatialMap(tgridData, method = 'mean')\n\n# To make some changes to mean value.\nb <- a1 * 3 -1\ngetSpatialMap_mat(b, title = '', x = '', y = '')\n\n# Bias, variation and other analysis can also be processed \n# the same way. \n# Just apply the analysis to the matrix and \n# use getSpatialMap_mat to plot.\n```\n\nIf multi-plot is needed, `hyfo` can also combine different plots together. Use `output = ggplot`, which gives back the a special format that can be easily used by `ggplot2`\n\n```{r, fig.show='hide', fig.height=6, fig.width=9}\n\na1 <- getSpatialMap(tgridData, method = 'spring', output = 'ggplot', name = 'spring')\na2 <- getSpatialMap(tgridData, method = 'summer', output = 'ggplot', name = 'summer')\na3 <- getSpatialMap(tgridData, method = 'autumn', output = 'ggplot', name = 'autumn')\na4 <- getSpatialMap(tgridData, method = 'winter', output = 'ggplot', name = 'winter')\n\n```\n\n```{r, fig.show='hold', fig.height=8, fig.width=12}\n\ngetSpatialMap_comb(a1, a2, a3, a4, nrow = 2)# you cannot assign title\n```\n\n```{r, fig.show='hold', fig.height=12, fig.width=9}\ngetSpatialMap_comb(a1, a2, a3, a4, nrow = 4)\n\n```\n\n`getSpatialMap_comb` accepts list (using `list =`) object too, which is easier for multi-plot. First list of 12 months are got. **NOTE: If input is a list, the argument should be `list = yourlist`, not directly put the list in the argument.**\n```{r, fig.show='hide'}\nc <- lapply(1:12, function(x) getSpatialMap(tgridData, method = x, output = 'ggplot', name = x))\n```\n\nThen they are combined.\n```{r, fig.show='hold', fig.height=10, fig.width=12}\ngetSpatialMap_comb(list = c, nrow = 4)\n```\n\n\n\n\n\n\n\n\n\n### 2.5.2 Bar Plot\n\nBasically, bar plot follows the same rule as part 2.4.1 spatial map, only a few cases that needs to pay attention.\n\n```{r, fig.show='hide', fig.height=6, fig.width=9, results='hide'}\nb1 <- getPreciBar(tgridData, method = 'spring', output = 'ggplot', name = 'spring')\nb2 <- getPreciBar(tgridData, method = 'summer', output = 'ggplot', name = 'summer')\nb3 <- getPreciBar(tgridData, method = 'autumn', output = 'ggplot', name = 'autumn')\nb4 <- getPreciBar(tgridData, method = 'winter', output = 'ggplot', name = 'winter')\n```\n\n```{r, fig.show='hold', fig.height=8, fig.width=9}\ngetPreciBar_comb(b1, b2, b3, b4, nrow = 2)\n```\n\n```{r, fig.show='hide', fig.height=6, fig.width=9, results='hide'}\nc <- lapply(1:12, function(x) getPreciBar(tgridData, method = x, output = 'ggplot', name = x))\n```\n\n```{r, fig.show='hold', fig.height=8, fig.width=9}\ngetPreciBar_comb(list = c, nrow = 4)\n```\n\n\n\n\n\n\n\n\n## 2.6 Model Input\n\n### 2.6.1 Extract time series from Forecasting Dataset\n\nIf there are different members existing in the dataset, `hyfo` can extract them and generate a dataframe for the easy input to the model. If the dataset doesn't have a member part, then `hyfo` will extract only one single time seres. \n\n\n```{r, fig.show='hold', fig.height=6, fig.width=9}\nfilePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n \n# Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\nvarname <- getNcdfVar(filePath)\n\nnc <- loadNcdf(filePath, varname)\n\na <- getFrcEnsem(nc)\n\n\n# If there is no member session in the dataset, a single time sereis will be extracted.\n\na1 <- getFrcEnsem(tgridData)\n```\n\nThe default output is spatially averaged, if there are more than one cells in the dataset, the mean value of the cells will be calculated. While if you are interested in special cell, you can assign the cell value, for how to assign, please check the details in `?getFrcEnsem`. You can also directly use longitude and latitude to extract time series, using `coord = `\n\n```{r, fig.height=6, fig.width=9, results='hide'}\ngetSpatialMap(nc, 'mean')\n\na <- getFrcEnsem(nc, cell = c(6,2))\n# From the map, cell = c(6, 2) means lon = -1.4, lat = 43.2, so you can use corrd to locate\n# your research area and extract time series.\nb <- getFrcEnsem(nc, coord = c(-1.4, 43.2))\n\n```\n\nIf you want to combine different plots together, you can use `_comb` function to combine plots.\n\n```{r, fig.show='hide', fig.height=4, fig.width=7}\na1 <- getFrcEnsem(nc, cell = c(2,3), output = 'ggplot', name = 'a1')\na2 <- getFrcEnsem(nc, cell = c(2,4), output = 'ggplot', name = 'a2')\na3 <- getFrcEnsem(nc, cell = c(2,5), output = 'ggplot', name = 'a3')\n```\n```{r, fig.show='hold', fig.height=9, fig.width=7}\ngetEnsem_comb(a1, a2, a3, nrow = 3)\n```\n\nPlot rules are just the same as described in 1.3.3, please check if needed.\n\n\n\n\n\n\n\n\n# 3. Anarbe Case\n\nThe functions with anarbe case end with `_anarbe`, all of them are used to collect different available published data in anarbe catchment in Spain. The data comes from two website: [here](http://meteo.navarra.es/estaciones/mapadeestaciones.cfm) and [here](http://www4.gipuzkoa.net/oohh/web/esp/02.asp), there are precipitation or discharge data on those website, and can be downloaded directly. \n\nSince the available files on those website are arranged by a year or five years, for long term data collection, a tools is necessary for collecting data from different files.\n\n##### Note:\nFor excel files, if you have access to the dam regulation excel file of the dam anarbe, you can use `collectData_excel_anarbe` in the package, but this function is commented in the original code, cannot be used directly. Go to original file in the library or go to github [here](https://github.com/Yuanchao-Xu/hyfo/blob/master/R/collectData_excel.R), copy the original code.\n\nThere are two csv files and txt files included in the package, which can be used as examples.\n\n```{r}\nfile <- system.file(\"extdata\", \"1999.csv\", package = \"hyfo\")\nfolder <- strsplit(file, '1999')[[1]][1]\n\na <- collectData_csv_anarbe(folder, output = TRUE)\nstr(a)\nb <- collectData_txt_anarbe(folder, output = TRUE)\nstr(b)\n```\n\n\n\n", "created" : 1440691741382.000, "dirty" : false, "encoding" : "ASCII", "folds" : "", - "hash" : "297121889", + "hash" : "727374094", "id" : "A9B56DE9", - "lastKnownWriteTime" : 1441354441, + "lastKnownWriteTime" : 1441707499, "path" : "~/hyfo/vignettes/hyfo.Rmd", "project_path" : "vignettes/hyfo.Rmd", "properties" : { diff --git a/.Rproj.user/132DF987/sdb/per/t/E6E4511D b/.Rproj.user/132DF987/sdb/per/t/E6E4511D new file mode 100644 index 0000000..081c623 --- /dev/null +++ b/.Rproj.user/132DF987/sdb/per/t/E6E4511D @@ -0,0 +1,17 @@ +{ + "contents" : "#' Extract period from list or dataframe.\n#' \n#' Extract common period or certain period from a list of different dataframes of time series, or from a \n#' dataframe.\n#' NOTE: all the dates in the datalist should follow the format in ?as.Date{base}.\n#' @param datalist A list of different dataframes of time series .\n#' @param startDate A Date showing the start of the extract period, default as NULL, check details.\n#' @param endDate A Date showing the end of the extract period, default as NULL, check details.\n#' @param commonPeriod A boolean showing whether the common period is extracted. If chosen, startDate and endDate\n#' should be NULL.\n#' @param dataframe A dataframe with first column Date, the rest columns value. If your input is a \n#' dataframe, not time series list, you can put \\code{dataframe = yourdataframe}. And certain period will be \n#' extracted. Note: if your input is a time series, that means all the columns share the same period of date.\n#' @param month extract certain months in a year. e.g. if you want to extract Jan, Feb of each year, \n#' set \\code{month = c(1, 2)}.\n#' @details \n#' \\strong{startDate and endDate}\n#' \n#' If startDate and endDate are assigned, then certain period between startDate and endDate will be returned, \n#' for both datalist input and dataframe input.\n#' \n#' If startDate and endDate are NOT assigned, then,\n#' \n#' if input is a datalist, the startDate and endDate of the common period of different datalists will be assigned\n#' to the startDate and endDate.\n#' \n#' if input is a dataframe, the startDate and endDate of the input dataframe will be assigned to the startDate\n#' and endDate . Since different value columns share a common Date column in a dataframe input. \n#' \n#' \n#' \n#' \n#' @return A list or a dataframe with all the time series inside containing the same period.\n#' @examples\n#' # Generate timeseries datalist. Each data frame consists of a Date and a value.\n#' \n#' AAA <- data.frame(\n#' # date column\n#' Date = seq(as.Date('1990-10-28'),as.Date('1997-4-1'),1),\n#' # value column\n#' AAA = sample(1:100,length(seq(as.Date('1990-10-28'),as.Date('1997-4-1'),1)), repl = TRUE))\n#' \n#' BBB <- data.frame(\n#' Date = seq(as.Date('1993-3-28'),as.Date('1999-1-1'),1), \n#' BBB = sample(1:100,length(seq(as.Date('1993-3-28'),as.Date('1999-1-1'),1)), repl = TRUE))\n#' \n#' CCC <- data.frame(\n#' Date = seq(as.Date('1988-2-2'),as.Date('1996-1-1'),1), \n#' CCC = sample(1:100,length(seq(as.Date('1988-2-2'),as.Date('1996-1-1'),1)), repl = TRUE)) \n#' \n#' list <- list(AAA, BBB, CCC)# dput() and dget() can be used to save and load list file.\n#' \n#' list_com <- extractPeriod(list, commonPeriod = TRUE)\n#' \n#' # list_com is the extracted datalist.\n#' str(list_com)\n#' \n#' # If startDate and endDate is provided, the record between them will be extracted.\n#' # make sure startDate is later than any startDate in each dataframe and endDate is \n#' # earlier than any endDate in each dataframe.\n#' \n#' data(testdl)\n#' datalist_com1 <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1')\n#' \n#' \n#' dataframe <- list2Dataframe(datalist_com1)\n#' # ow we have a dataframe to extract certain months.\n#' dataframe_new <- extractPeriod(dataframe = dataframe, month = c(1,2,3) )\n#' \n#' \n#' @importFrom zoo as.Date\n#' @references \n#' Achim Zeileis and Gabor Grothendieck (2005). zoo: S3 Infrastructure for Regular and Irregular Time\n#' Series. Journal of Statistical Software, 14(6), 1-27. URL http://www.jstatsoft.org/v14/i06/\n#' @export\nextractPeriod <- function(datalist, startDate = NULL, endDate = NULL, commonPeriod = FALSE, \n dataframe = NULL, month = NULL) {\n if (!is.null(dataframe)) {\n dataset <- extractPeriod_dataframe(dataframe, startDate = startDate, endDate = endDate, month = month)\n \n \n } else {\n if (!is.null(startDate) & !is.null(endDate) & commonPeriod == FALSE) {\n dataset <- lapply(datalist, extractPeriod_dataframe, startDate = startDate, endDate = endDate, month = month)\n } else if (is.null(startDate) & is.null(endDate) & commonPeriod == TRUE) {\n \n Dates <- lapply(datalist, extractPeriod_getDate) \n Dates <- do.call('rbind', Dates)\n \n startDate <- as.Date(max(Dates[, 1]))\n endDate <- as.Date(min(Dates[, 2]))\n \n dataset <- lapply(datalist, extractPeriod_dataframe, startDate = startDate, endDate = endDate, month = month)\n \n } else {\n stop('Enter startDate and endDate, set commonPeriod as False, or simply set commonPeriod as TRUE')\n }\n }\n \n return(dataset)\n}\n\n\n\nextractPeriod_dataframe <- function(dataframe, startDate, endDate, month = NULL) {\n # to check whether first column is a date format\n if (!grepl('-|/', dataframe[1, 1])) {\n stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} \n and use as.Date to convert.')\n } \n dataframe[, 1] <- as.Date(dataframe[, 1])\n \n if (is.null(startDate)) startDate <- dataframe[1, 1]\n if (is.null(endDate)) endDate <- tail(dataframe[, 1], 1)\n \n startIndex <- which(dataframe[, 1] == startDate)\n endIndex <- which(dataframe[, 1] == endDate)\n if (length(startIndex) == 0 | length(endIndex) == 0) {\n stop('startDate and endDate exceeds the date limits in dataframe. Check datalsit please.')\n }\n output <- dataframe[startIndex:endIndex, ]\n \n if (!is.null(month)) {\n Date <- as.POSIXlt(output[, 1])\n mon <- Date$mon + 1\n \n # %in% can deal with multiple equalities\n DateIndex <- which(mon %in% month)\n \n output <- output[DateIndex, ]\n }\n \n return(output) \n}\n\n\n#' @importFrom utils tail\n#' @references \n#' R Core Team (2015). R: A language and environment for statistical computing. R Foundation for\n#' Statistical Computing, Vienna, Austria. URL http://www.R-project.org/.\nextractPeriod_getDate <- function(dataset) {\n \n if (!grepl('-|/', dataset[1, 1])) {\n stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base}, \n and use as.Date to convert.')\n }\n start <- as.Date(dataset[1, 1])\n end <- as.Date(tail(dataset[, 1], 1))\n \n \n return(c(start, end))\n}\n", + "created" : 1441719940194.000, + "dirty" : false, + "encoding" : "ASCII", + "folds" : "", + "hash" : "2693849240", + "id" : "E6E4511D", + "lastKnownWriteTime" : 1441721046, + "path" : "~/hyfo/R/extractPeriod.R", + "project_path" : "R/extractPeriod.R", + "properties" : { + }, + "relative_order" : 2, + "source_on_save" : false, + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/EAF9DB09 b/.Rproj.user/132DF987/sdb/per/t/EAF9DB09 deleted file mode 100644 index cd7b29d..0000000 --- a/.Rproj.user/132DF987/sdb/per/t/EAF9DB09 +++ /dev/null @@ -1,18 +0,0 @@ -{ - "contents" : "#' Get variable name of the NetCDF file.\n#' \n#' Get variable name in the NetCDF file. After knowning the name, you can use \\code{loadNcdf} to load\n#' the target variable.\n#' \n#' @param filePath A path pointing to the netCDF file.\n#' @return The names of the varialbes in the file.\n#' @examples \n#' # First open the test NETcDF file.\n#' filePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n#' \n#' # Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\n#' varname <- getNcdfVar(filePath)\n#' \n#' @import ncdf\n#' @references \n#' David Pierce (2014). ncdf: Interface to Unidata netCDF data files. R package version 1.6.8.\n#' http://CRAN.R-project.org/package=ncdf\n#' \n#' \n#' @export\ngetNcdfVar <- function(filePath) {\n nc <- open.ncdf(filePath)\n names <- names(nc$var)\n return(names)\n}\n\n\n#' Load NetCDF file\n#' \n#' @param filePath A path pointing to the NetCDF file, version3.\n#' @param varname A character representing the variable name, you can use \\code{getNcdfVar} to\n#' get the basic information about the variables and select the target.\n#' @param tz A string representing the time zone, default is GMT, if you know what time zone is \n#' you can assign it in the argument. If \\code{tz = ''}, current time zone will be taken.\n#' @param ... Arguments will be passed to \\code{downscaleNcdf}\n#' @return A list object from \\code{hyfo} containing the information to be used in the analysis, \n#' or biascorrection.\n#' @examples \n#' # First open the test NETcDF file.\n#' filePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n#' \n#' # Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\n#' varname <- getNcdfVar(filePath)\n#' \n#' nc <- loadNcdf(filePath, varname)\n#' \n#' @export\n#' @import ncdf\n#' @references \n#' David Pierce (2014). ncdf: Interface to Unidata netCDF data files. R package version 1.6.8.\n#' http://CRAN.R-project.org/package=ncdf\n#' \n#' file structure refers to\n#' \n#' Santander MetGroup (2015). ecomsUDG.Raccess: R interface to the ECOMS User Data Gateway. R package\n#' version 2.2-6. http://meteo.unican.es/ecoms-udg\n#' \nloadNcdf <- function(filePath, varname, tz = 'GMT', ...) {\n nc <- open.ncdf(filePath)\n \n var <- nc$var\n # Use name to locate the variable\n call_1 <- as.call(c(\n list(as.name('$'), var, varname)\n ))\n var <- eval(call_1)\n if(is.null(var)) stop('No such variable name, check source file.')\n \n # First needs to identify the variable name, load the right data\n message('Loading data...')\n nc_data <- get.var.ncdf(nc, var)\n message('Processing...')\n \n dimNames <- unlist(lapply(1:length(var$dim), function(x) var$dim[[x]]$name))\n \n # Only deals with the most common dimensions, futher dimensions will be added in future.\n dimIndex <- match(c('lon', 'lat', 'time', 'member'), dimNames)\n \n gridData <- list()\n gridData$Variable$varName <- varname\n gridData$xyCoords$x <- var$dim[[dimIndex[1]]]$vals\n gridData$xyCoords$y <- var$dim[[dimIndex[2]]]$vals\n \n # Time part needs to be taken seperately\n \n timeUnit <- strsplit(var$dim[[dimIndex[3]]]$units, split = ' since')[[1]][1]\n timeDiff <- var$dim[[dimIndex[3]]]$vals\n # To get real time, time since when has to be grabbed from the dataset.\n timeSince <- as.POSIXlt(strsplit(var$dim[[dimIndex[3]]]$units, split = 'since')[[1]][2], tz = tz)\n \n \n# Date <- rep(timeSince, length(timeDiff))\n \n \n unitDic <- data.frame(weeks = 'weeks', days = 'days', hours = 'hours',\n minutes = 'mins', seconds = 'secs')\n \n timeDiff <- as.difftime(timeDiff, units = as.character(unitDic[1, timeUnit]))\n \n# if (grepl('day', timeUnit)) {\n# Date$mday <- Date$mday + timeDiff\n# } else if (grepl('second', timeUnit)) {\n# Date$sec <- Date$sec + timeDiff\n# }\n Date <- timeSince + timeDiff\n \n if (length(Date) == 1) {\n warning(\"Only one time step is taken, time dimension is dropped in the original data.\n But after loading, the time dimension (with length : 1) will be added.\")\n }\n gridData$Dates$start <- as.character(Date)\n \n # Assing data to grid data\n # At leaset should be 3 dimensions, lon, lat, time. So if less than 3, \n \n if (length(dim(nc_data)) < 3) {\n dim(nc_data) <- c(dim(nc_data), 1) \n message('Time dimension is added, make sure in your original data, only time dimension is dropped.')\n }\n gridData$Data <- nc_data\n attributes(gridData$Data)$dimensions <- dimNames\n \n if (!is.na(dimIndex[4])) gridData$Members <- var$dim[[dimIndex[4]]]$vals\n \n gridData$Loaded <- 'by hyfo package, http://yuanchao-xu.github.io/hyfo/'\n close.ncdf(nc)\n \n output <- downscaleNcdf(gridData, ...)\n \n return(output)\n \n}\n\n\n\n\n#' Downscale NetCDF file\n#' @param gridData A hyfo list file or the list file from \\code{loadECOMS{ecomsUDG.Raccess}}\n#' or \\code{loadGridData{ecomsUDG.Raccess}}\n#' @param year A vector of the target year. e.g. \\code{year = 2000}, \\code{year = 1980:2000}\n#' @param lon A vector of the range of the downscaled longitude, should contain a max value\n#' and a min value. e.g. \\code{lon = c(-1.5, 2,5)}\n#' @param lat A vector of the range of the downscaled latitude, should contain a max value\n#' and a min value. e.g. \\code{lat = c(32,2, 36)}\n#' @return A downscaled hyfo list file.\n#' @examples \n#' # First open the test NETcDF file.\n#' filePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n#' \n#' \n#' # Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\n#' varname <- getNcdfVar(filePath)\n#' \n#' nc <- loadNcdf(filePath, varname)\n#' \n#' # Then write to your work directory\n#' \n#' nc1 <- downscaleNcdf(nc, year = 2006, lon = c(-2, -0.5), lat = c(43.2, 43.7))\n#' \n#' \n#' @export \n#' @references \n#' David Pierce (2014). ncdf: Interface to Unidata netCDF data files. R package version 1.6.8.\n#' http://CRAN.R-project.org/package=ncdf\n#' \n#' file structure refers to\n#' \n#' Santander MetGroup (2015). ecomsUDG.Raccess: R interface to the ECOMS User Data Gateway. R package\n#' version 2.2-6. http://meteo.unican.es/ecoms-udg\n#' \ndownscaleNcdf <- function(gridData, year = NULL, lon = NULL, lat = NULL) {\n \n if (!is.null(year)) {\n Dates <- as.POSIXlt(gridData$Dates$start)\n yearIndex <- Dates$year + 1900\n \n targetYearIndex <- unlist(lapply(year, function(x) which(yearIndex == x)))\n if (length(targetYearIndex) == 0) stop('Check your input year, it may exceed the years \n in the input dataset.')\n gridData$Dates$start <- gridData$Dates$start[targetYearIndex]\n gridData$Dates$end <- gridData$Dates$end[targetYearIndex]\n \n timeDim <- match('time', attributes(gridData$Data)$dimensions)\n \n gridData$Data <- chooseDim(gridData$Data, timeDim, targetYearIndex)\n }\n \n if (!is.null(lon)) {\n \n lonIndex <- gridData$xyCoords$x\n \n lonI1 <- which(abs(lonIndex - min(lon)) == min(abs(lonIndex - min(lon)), na.rm = TRUE)) \n lonI2 <- which(abs(lonIndex - max(lon)) == min(abs(lonIndex - max(lon)), na.rm = TRUE)) \n \n targetLonIndex <- lonI1:lonI2\n if (length(targetLonIndex) == 0) stop('Your input lon is too small, try to expand the \n longitude range.') \n gridData$xyCoords$x <- gridData$xyCoords$x[targetLonIndex]\n lonDim <- match('lon', attributes(gridData$Data)$dimensions)\n \n gridData$Data <- chooseDim(gridData$Data, lonDim, targetLonIndex)\n }\n \n \n if (!is.null(lat)) {\n latIndex <- gridData$xyCoords$y\n \n latI1 <- which(abs(latIndex - min(lat)) == min(abs(latIndex - min(lat)), na.rm = TRUE)) \n latI2 <- which(abs(latIndex - max(lat)) == min(abs(latIndex - max(lat)), na.rm = TRUE)) \n \n targetLatIndex <- latI1:latI2\n \n if (length(targetLonIndex) == 0) stop('Your input lat is too small, try to expand the \n latitude range.') \n gridData$xyCoords$y <- gridData$xyCoords$y[targetLatIndex]\n latDim <- match('lat', attributes(gridData$Data)$dimensions)\n gridData$Data <- chooseDim(gridData$Data, latDim, targetLatIndex)\n }\n \n return(gridData)\n \n}\n\n\n\n\n\n\n\n\n\n\n#' Write to NetCDF file using hyfo list file\n#' @param gridData A hyfo list file or the list file from \\code{loadECOMS{ecomsUDG.Raccess}}\n#' or \\code{loadGridData{ecomsUDG.Raccess}}\n#' @param filePath A path of the new NetCDF file, should end with \".nc\"\n#' @param missingValue A number representing the missing value in the NetCDF file, default\n#' is 1e20\n#' #' @param tz A string representing the time zone, default is GMT, if you know what time zone is \n#' you can assign it in the argument. If \\code{tz = ''}, current time zone will be taken.\n#' @param units A string showing in which unit you are putting in the NetCDF file, it can be \n#' seconds or days and so on. If not specified, the function will pick up the possible largest \n#' time units from \\code{c('weeks', 'days', 'hours', 'mins', 'secs')}\n#' @return An NetCDF version 3 file.\n#' @examples \n#' # First open the test NETcDF file.\n#' filePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n#' \n#' \n#' # Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\n#' varname <- getNcdfVar(filePath)\n#' \n#' nc <- loadNcdf(filePath, varname)\n#' \n#' # Then write to your work directory\n#' \n#' writeNcdf(nc, 'test.nc')\n#' \n#' \n#' @export \n#' @import ncdf\n#' @references \n#' David Pierce (2014). ncdf: Interface to Unidata netCDF data files. R package version 1.6.8.\n#' http://CRAN.R-project.org/package=ncdf\n#' \n#' file structure refers to\n#' \n#' Santander MetGroup (2015). ecomsUDG.Raccess: R interface to the ECOMS User Data Gateway. R package\n#' version 2.2-6. http://meteo.unican.es/ecoms-udg\n#' \n#' \nwriteNcdf <- function(gridData, filePath, missingValue = 1e20, tz = 'GMT', units = NULL) {\n \n name <- gridData$Variable$varName\n # First defines dimensions.\n dimLon <- dim.def.ncdf('lon', 'degree', gridData$xyCoords$x)\n dimLat <- dim.def.ncdf('lat', 'degree', gridData$xyCoords$y)\n dimMem <- NULL\n if (!is.null(gridData$Members)) {\n dimMem <- dim.def.ncdf('member', 'members', 1:length(gridData$Members))\n }\n \n \n # Time needs to be treated seperately\n dates <- as.POSIXlt(gridData$Dates$start, tz = tz)\n if (is.null(units)) {\n units <- getTimeUnit(dates)\n time <- difftime(dates, dates[1], units = units)\n } else {\n time <- difftime(dates, dates[1], units = units)\n }\n timeUnits <- paste(units, 'since', dates[1])\n dimTime <- dim.def.ncdf('time', timeUnits, time)\n \n \n # Depending on whether there is a member part of the dataset.\n \n dimList <- list(dimLon, dimLat, dimTime, dimMem)\n # delete the NULL list, in order that there is no member part in the data.\n dimList <- Filter(Negate(is.null), dimList)\n # Then difines data\n var <- var.def.ncdf( name, \"units\", dimList, missingValue)\n \n nc <- create.ncdf(filePath, var)\n \n # This part comes from the library downscaleR, can be deleted if you don't \n # use {ecomsUDG.Raccess}, by adding this, the file can be read by the package {ecomsUDG.Raccess}\n att.put.ncdf(nc, \"time\", \"standard_name\",\"time\")\n att.put.ncdf(nc, \"time\", \"axis\",\"T\")\n att.put.ncdf(nc, \"time\", \"_CoordinateAxisType\",\"Time\")\n #att.put.ncdf(nc, \"time\", \"_ChunkSize\",1)\n att.put.ncdf(nc, \"lon\", \"standard_name\",\"longitude\")\n att.put.ncdf(nc, \"lon\", \"_CoordinateAxisType\",\"Lon\")\n att.put.ncdf(nc, \"lat\", \"standard_name\",\"latitude\")\n att.put.ncdf(nc, \"lat\", \"_CoordinateAxisType\",\"Lat\")\n if (!is.null(dimMem)){\n att.put.ncdf(nc, \"member\", \"standard_name\",\"realization\")\n att.put.ncdf(nc, \"member\", \"_CoordinateAxisType\",\"Ensemble\")\n #att.put.ncdf(nc, \"member\", \"ref\",\"http://www.uncertml.org/samples/realisation\")\n }\n \n \n # This part has to be put\n att.put.ncdf(nc, 0, \"Conventions\",\"CF-1.4\")\n att.put.ncdf(nc, 0, 'WrittenBy', 'hyfo(http://yuanchao-xu.github.io/hyfo/)')\n \n dimIndex <- match(c('lon', 'lat', 'time', 'member'), attributes(gridData$Data)$dimensions)\n dimIndex <- na.omit(dimIndex)\n data <- aperm(gridData$Data, dimIndex)\n put.var.ncdf(nc, name, data)\n close.ncdf(nc)\n \n}\n\n# For internaluse by writeNcdf\ngetTimeUnit <- function(dates) {\n units <- c('weeks', 'days', 'hours', 'mins', 'secs')\n output <- NULL\n for (unit in units) {\n time <- difftime(dates, dates[1], units = unit)\n rem <- sapply(time, function(x) x%%1)\n if (!any(rem != 0)) {\n output <- unit\n break\n }\n } \n return(output)\n}\n\n\n# Save for future use. \n#' @import ncdf\n#' @references \n#' David Pierce (2014). ncdf: Interface to Unidata netCDF data files. R package version 1.6.8.\n#' http://CRAN.R-project.org/package=ncdf\ngetExtralDim <- function(...) {\n dimList <- list(...)\n \n \n}", - "created" : 1441265985784.000, - "dirty" : false, - "encoding" : "ASCII", - "folds" : "", - "hash" : "2249661437", - "id" : "EAF9DB09", - "lastKnownWriteTime" : 1440689176, - "path" : "~/hyfo/R/Ncdf_related.R", - "project_path" : "R/Ncdf_related.R", - "properties" : { - "tempName" : "Untitled1" - }, - "relative_order" : 4, - "source_on_save" : false, - "type" : "r_source" -} \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 1bc6649..7be3517 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: hyfo Type: Package Title: Hydrology and Climate Forecasting R Package for Data Analysis and Visualization -Version: 1.1.7 +Version: 1.2.0 Date: 2015-07-02 Authors@R: person("Yuanchao", "Xu", email = "xuyuanchao37@gmail.com", role = c("aut", "cre")) diff --git a/R/biasCorrect.R b/R/biasCorrect.R index d75bd1f..aea672c 100644 --- a/R/biasCorrect.R +++ b/R/biasCorrect.R @@ -39,6 +39,8 @@ #' Hindcast is also called re-forecast, is the forecast of the past. E.g. you have a forecast from year 2000-2010, assuming now you are in 2005. So from 2000-2005, this period #' is the hindcast period, and 2005-2010, this period is the forecast period. #' +#' Hindcast can be the same as forecast, i.e., you can use forecast itself as hindcast to train the bias correction. +#' #' #' \strong{How it works} #' diff --git a/R/extractPeriod.R b/R/extractPeriod.R index 98e3201..c100ecd 100644 --- a/R/extractPeriod.R +++ b/R/extractPeriod.R @@ -4,13 +4,41 @@ #' dataframe. #' NOTE: all the dates in the datalist should follow the format in ?as.Date{base}. #' @param datalist A list of different dataframes of time series . -#' @param startDate A Date showing the start of the extract period, default as NULL. -#' @param endDate A Date showing the end of the extract period, default as NULL. +#' @param startDate A Date showing the start of the extract period, default as NULL, check details. +#' @param endDate A Date showing the end of the extract period, default as NULL, check details. #' @param commonPeriod A boolean showing whether the common period is extracted. If chosen, startDate and endDate #' should be NULL. #' @param dataframe A dataframe with first column Date, the rest columns value. If your input is a #' dataframe, not time series list, you can put \code{dataframe = yourdataframe}. And certain period will be #' extracted. Note: if your input is a time series, that means all the columns share the same period of date. +#' @param year extract certain year in the entire time series. if you want to extract year 2000, set \code{year = 2000} +#' @param month extract certain months in a year. e.g. if you want to extract Jan, Feb of each year, +#' set \code{month = c(1, 2)}. +#' @details +#' \strong{startDate and endDate} +#' +#' If startDate and endDate are assigned, then certain period between startDate and endDate will be returned, +#' for both datalist input and dataframe input. +#' +#' If startDate and endDate are NOT assigned, then, +#' +#' if input is a datalist, the startDate and endDate of the common period of different datalists will be assigned +#' to the startDate and endDate. +#' +#' if input is a dataframe, the startDate and endDate of the input dataframe will be assigned to the startDate +#' and endDate . Since different value columns share a common Date column in a dataframe input. +#' +#' \strong{year and month} +#' +#' For year crossing month input, hyfo will take from the year before. E.g. if \code{month = c(10, 11, 12, 1)}, +#' and \code{year = 1999}, hyfo will take month 10, 11 and 12 from year 1998, and month 1 from 1999.You DO NOT +#' have to set \code{year = 1998 : 1999}. +#' +#' Well, if you set \code{year = 1998 : 1999}, hyfo will take month 10, 11 and 12 from year 1997, and month 1 from 1998, +#' then, take month 10, 11 and 12 from year 1998, month 1 from 1999. So you only have to care about the latter year. +#' +#' +#' #' @return A list or a dataframe with all the time series inside containing the same period. #' @examples #' # Generate timeseries datalist. Each data frame consists of a Date and a value. @@ -44,46 +72,49 @@ #' datalist_com1 <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1') #' #' +#' dataframe <- list2Dataframe(datalist_com1) +#' # now we have a dataframe to extract certain months. +#' dataframe_new <- extractPeriod(dataframe = dataframe, month = c(1,2,3) ) +#' +#' #' @importFrom zoo as.Date #' @references #' Achim Zeileis and Gabor Grothendieck (2005). zoo: S3 Infrastructure for Regular and Irregular Time #' Series. Journal of Statistical Software, 14(6), 1-27. URL http://www.jstatsoft.org/v14/i06/ #' @export extractPeriod <- function(datalist, startDate = NULL, endDate = NULL, commonPeriod = FALSE, - dataframe = NULL) { + dataframe = NULL, year = NULL, month = NULL) { if (!is.null(dataframe)) { - dataset <- extractPeriod_dataframe(dataframe, startDate = startDate, endDate = endDate) - } else { + dataset <- extractPeriod_dataframe(dataframe, startDate = startDate, endDate = endDate, year = year, + month = month) + + } else { if (!is.null(startDate) & !is.null(endDate) & commonPeriod == FALSE) { - dataset <- lapply(datalist, extractPeriod_dataframe, startDate = startDate, endDate = endDate) - } else if (is.null(startDate) & is.null(endDate) & commonPeriod == TRUE) { + dataset <- lapply(datalist, extractPeriod_dataframe, startDate = startDate, endDate = endDate, year = year, + month = month) + } else if (is.null(startDate) & is.null(endDate) & commonPeriod == TRUE) { - Dates <- lapply(datalist, extractPeriod_getDate) - Dates <- do.call('rbind', Dates) + Dates <- lapply(datalist, extractPeriod_getDate) + Dates <- do.call('rbind', Dates) - startDate <- as.Date(max(Dates[, 1])) - endDate <- as.Date(min(Dates[, 2])) + startDate <- as.Date(max(Dates[, 1])) + endDate <- as.Date(min(Dates[, 2])) - dataset <- lapply(datalist, extractPeriod_dataframe, startDate = startDate, endDate = endDate) + dataset <- lapply(datalist, extractPeriod_dataframe, startDate = startDate, endDate = endDate, year = year, + month = month) - } else { - stop('Enter startDate and endDate, set commonPeriod as False, or simply set commonPeriod as TRUE') - } + } else { + stop('Enter startDate and endDate, set commonPeriod as False, or simply set commonPeriod as TRUE') + } } return(dataset) } -#' Extract data from a dataframe with startDate and endDate -#' -#' @param dataframe A time series with first column being a series of date or time. -#' @param startDate A date representing the start date. -#' @param endDate A date representing the end date. -#' @return The extracted dataframe between \code{startDate} and \code{endDate}. -# @export -extractPeriod_dataframe <- function(dataframe, startDate, endDate) { + +extractPeriod_dataframe <- function(dataframe, startDate, endDate, year = NULL, month = NULL) { # to check whether first column is a date format if (!grepl('-|/', dataframe[1, 1])) { stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} @@ -91,6 +122,9 @@ extractPeriod_dataframe <- function(dataframe, startDate, endDate) { } dataframe[, 1] <- as.Date(dataframe[, 1]) + if (is.null(startDate)) startDate <- dataframe[1, 1] + if (is.null(endDate)) endDate <- tail(dataframe[, 1], 1) + startIndex <- which(dataframe[, 1] == startDate) endIndex <- which(dataframe[, 1] == endDate) if (length(startIndex) == 0 | length(endIndex) == 0) { @@ -98,6 +132,40 @@ extractPeriod_dataframe <- function(dataframe, startDate, endDate) { } output <- dataframe[startIndex:endIndex, ] + # month needs to be firstly extracted, then year, otherwise, redundant months will be extracted. + if (!is.null(month)) { + Date <- as.POSIXlt(output[, 1]) + mon <- Date$mon + 1 + + # %in% can deal with multiple equalities + DateIndex <- which(mon %in% month) + + output <- output[DateIndex, ] + } + + + if (!is.null(year)) { + Date <- as.POSIXlt(output[, 1]) + yea <- Date$year + 1900 + mon <- Date$mon + 1 + + if (!any(sort(month) != month) | is.null(month)) { + DateIndex <- which(yea %in% year) + output <- output[DateIndex, ] + + # if year crossing than sort(month) != month + } else { + + startIndex <- which(yea == year - 1 & mon == month[1])[1] + endIndex <- tail(which(yea == year & mon == tail(month, 1)), 1) + + output <- output[startIndex:endIndex, ] + } + + } + + + return(output) } diff --git a/R/getSpatialMap.R b/R/getSpatialMap.R index 45b4f1b..902240d 100644 --- a/R/getSpatialMap.R +++ b/R/getSpatialMap.R @@ -53,6 +53,8 @@ getSpatialMap <- function(dataset, method = NULL, member = 'mean', ...) { att <- attributes(data)$dimensions dimIndex <- seq(1, length(att)) dimIndex1 <- match(c('lon', 'lat', 'time'), att)# match can apply to simple cases + + # for array this works, or setdiff can be used here to find the nomatch element. dimIndex2 <- dimIndex[-dimIndex1]# choose nomatch diff --git a/man/biasCorrect.Rd b/man/biasCorrect.Rd index 1ab8c93..a78fbf1 100644 --- a/man/biasCorrect.Rd +++ b/man/biasCorrect.Rd @@ -52,6 +52,8 @@ should have \strong{EVERY} attributes that forecast has. Hindcast is also called re-forecast, is the forecast of the past. E.g. you have a forecast from year 2000-2010, assuming now you are in 2005. So from 2000-2005, this period is the hindcast period, and 2005-2010, this period is the forecast period. +Hindcast can be the same as forecast, i.e., you can use forecast itself as hindcast to train the bias correction. + \strong{How it works} diff --git a/man/extractPeriod.Rd b/man/extractPeriod.Rd index 1631ca2..3268eaf 100644 --- a/man/extractPeriod.Rd +++ b/man/extractPeriod.Rd @@ -5,14 +5,14 @@ \title{Extract period from list or dataframe.} \usage{ extractPeriod(datalist, startDate = NULL, endDate = NULL, - commonPeriod = FALSE, dataframe = NULL) + commonPeriod = FALSE, dataframe = NULL, year = NULL, month = NULL) } \arguments{ \item{datalist}{A list of different dataframes of time series .} -\item{startDate}{A Date showing the start of the extract period, default as NULL.} +\item{startDate}{A Date showing the start of the extract period, default as NULL, check details.} -\item{endDate}{A Date showing the end of the extract period, default as NULL.} +\item{endDate}{A Date showing the end of the extract period, default as NULL, check details.} \item{commonPeriod}{A boolean showing whether the common period is extracted. If chosen, startDate and endDate should be NULL.} @@ -20,6 +20,11 @@ should be NULL.} \item{dataframe}{A dataframe with first column Date, the rest columns value. If your input is a dataframe, not time series list, you can put \code{dataframe = yourdataframe}. And certain period will be extracted. Note: if your input is a time series, that means all the columns share the same period of date.} + +\item{year}{extract certain year in the entire time series. if you want to extract year 2000, set \code{year = 2000}} + +\item{month}{extract certain months in a year. e.g. if you want to extract Jan, Feb of each year, +set \code{month = c(1, 2)}.} } \value{ A list or a dataframe with all the time series inside containing the same period. @@ -29,6 +34,29 @@ Extract common period or certain period from a list of different dataframes of t dataframe. NOTE: all the dates in the datalist should follow the format in ?as.Date{base}. } +\details{ +\strong{startDate and endDate} + +If startDate and endDate are assigned, then certain period between startDate and endDate will be returned, +for both datalist input and dataframe input. + +If startDate and endDate are NOT assigned, then, + + if input is a datalist, the startDate and endDate of the common period of different datalists will be assigned + to the startDate and endDate. + + if input is a dataframe, the startDate and endDate of the input dataframe will be assigned to the startDate + and endDate . Since different value columns share a common Date column in a dataframe input. + +\strong{year and month} + +For year crossing month input, hyfo will take from the year before. E.g. if \code{month = c(10, 11, 12, 1)}, +and \code{year = 1999}, hyfo will take month 10, 11 and 12 from year 1998, and month 1 from 1999.You DO NOT +have to set \code{year = 1998 : 1999}. + +Well, if you set \code{year = 1998 : 1999}, hyfo will take month 10, 11 and 12 from year 1997, and month 1 from 1998, +then, take month 10, 11 and 12 from year 1998, month 1 from 1999. So you only have to care about the latter year. +} \examples{ # Generate timeseries datalist. Each data frame consists of a Date and a value. @@ -59,6 +87,11 @@ str(list_com) data(testdl) datalist_com1 <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1') + + +dataframe <- list2Dataframe(datalist_com1) +# now we have a dataframe to extract certain months. +dataframe_new <- extractPeriod(dataframe = dataframe, month = c(1,2,3) ) } \references{ Achim Zeileis and Gabor Grothendieck (2005). zoo: S3 Infrastructure for Regular and Irregular Time diff --git a/man/extractPeriod_dataframe.Rd b/man/extractPeriod_dataframe.Rd deleted file mode 100644 index ecc2059..0000000 --- a/man/extractPeriod_dataframe.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2 (4.1.0.9001): do not edit by hand -% Please edit documentation in R/extractPeriod.R -\name{extractPeriod_dataframe} -\alias{extractPeriod_dataframe} -\title{Extract data from a dataframe with startDate and endDate} -\usage{ -extractPeriod_dataframe(dataframe, startDate, endDate) -} -\arguments{ -\item{dataframe}{A time series with first column being a series of date or time.} - -\item{startDate}{A date representing the start date.} - -\item{endDate}{A date representing the end date.} -} -\value{ -The extracted dataframe between \code{startDate} and \code{endDate}. -} -\description{ -Extract data from a dataframe with startDate and endDate -} - diff --git a/vignettes/hyfo.Rmd b/vignettes/hyfo.Rmd index 7382394..f0df5db 100644 --- a/vignettes/hyfo.Rmd +++ b/vignettes/hyfo.Rmd @@ -232,7 +232,7 @@ plotTS_comb(a1, a2, nrow = 2) ## 1.3 Further Process for Model Input -### 1.3.1 Extract Common Period from Different Time Series +### 1.3.1 Extract Certain Period or Months from Different Time Series Now we have the general information of the precipitation, if we want to use them in a model, we have to extract the common period of them, and use the common period precipitation to analyze. ```{r, fig.height=4, fig.width=7, results='hide'} @@ -253,7 +253,7 @@ Above is for us to extract period from different datalist, if we have a single t ```{r, fig.height=4, fig.width=7, results='hide'} # First change the list from above process to dataframe dataframe <- list2Dataframe(testdl_new) -# not we have a dataframe to extract certain period. +# now we have a dataframe to extract certain period. dataframe <- extractPeriod(dataframe = dataframe, startDate = '1994-12-01', endDate = '1995-03-01' ) str(testdl_new) ```