diff --git a/.Rhistory b/.Rhistory index fb27a78..de58d86 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,512 +1,512 @@ -# Generate the call to [ -call <- as.call(c( -list(as.name("["), quote(array)), -indices, -list(drop = drop))) -# Print it, just to make it easier to see what's going on -# Print(call) -# Finally, evaluate it -output <- eval(call) -if (length(dim(output)) == length(dimnames)) { -attributes(output)$dimensions <- dimnames -} else if (length(dim(output)) < length(dimnames)){ -# In this case, one dimension is dropped, if value is a number -# and drop == T, this situation can appear. So the dropped dimemsion -# should be the chosen dimension. -i <- 1:length(dimnames) -# get rid of the dropped dimensin -i <- i[-dim] -attributes(output)$dimensions <- dimnames[i] +# For every column, it's biascorrected respectively. +frc_data <- lapply(2:n, function(x) biasCorrect_core(frc[, x], hindcast[, x], obs[, 2], method = method, +scaleType = scaleType, preci = preci)) +frc_data <- do.call('cbind', frc_data) +} else stop('Wrong TS input, check your TS dimension.') +} else if (input == 'hyfo') { +print('Under development...') } -return(output) +names <- colnames(frc) +frc <- data.frame(frc[, 1], frc_data) +colnames(frc) <- names +return(frc) } -reshapeMatrix <- function(matrix) { -# This is for the map plot to keep the ratio x : y == 4:3 -# mainly used in map plot in ggplot2. -# So the input matrix should be reshaped, add in some NA values, -# in order to be shown appropriately in ggplot. -lon <- as.numeric(colnames(matrix)) -lat <- as.numeric(rownames(matrix)) -dx <- mean(diff(lon)) -dy <- mean(diff(lat)) -lx <- max(lon) - min(lon) -ly <- max(lat) - min(lat) -if (0.75 * lx < ly) { -# In this case, x needs to be made longer -xhalf <- 0.67 * ly -xadd <- xhalf - lx / 2 -# calculate how many columns needs to be added. -nxadd <- abs(round(xadd / dx)) -madd1 <- matrix(data = NA, nrow = length(lat), ncol = nxadd) -madd2 <- madd1 -colnames(madd1) <- seq(to = min(lon) - dx, length = nxadd, by = dx) -colnames(madd2) <- seq(from = max(lon) + dx, length = nxadd, by = dx) -matrix_new <- cbind(madd1, matrix, madd2) -} else if (0.75 * lx > ly) { -yhalf <- 0.38 * lx -yadd <- yhalf - ly / 2 -nyadd <- abs(round(yadd / dy)) -madd1 <- matrix(data = NA, nrow = nyadd, ncol = length(lon)) -madd2 <- madd1 -rownames(madd1) <- seq(to = max(lat) + dy, length = nyadd, by = -dy) -rownames(madd2) <- seq(from = min(lat) - dx, length = nyadd, by = -dy) -matrix_new <- rbind(madd1, matrix, madd2) -} else { -matrix_new <- matrix +# this is only used to calculate the value column, +biasCorrect_core <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', preci = FALSE){ +# default is the simplest method in biascorrection, just do simple addition and subtraction. +if (method == 'delta') { +# comes from downscaleR biascorrection method +frcMean <- mean(obs[, 2], na.rm = TRUE) +hindcastMean <- mean(hindcast[, 2], na.rm = TRUE) +frc <- obs - hindcastMean + frcMean +} else if (method == 'scaling') { +obsMean <- mean(obs, na.rm = TRUE) +hindcastMean <- mean(hindcast, na.rm = TRUE) +if (scaleType == 'multi') { +frc <- frc / hindcastMean * obsMean +} else if (scaleType == 'add') { +frc <- frc - hindcastMean + obsMean } -return(matrix_new) +} else if (method == 'eqm') { +# To be added, right now too complicated and not so much use. } -a1 <- getSpatialMap(tgridData, method = 'mean') -#' Get spatial map of the input dataset. +return(frc) +} +aaa <- biasCorrect(frc, hindcast, obs) +debug(biasCorrect) +aaa <- biasCorrect(frc, hindcast, obs) +as.POSIXlt(hindcast[, 1]) != as.POSIXlt(obs[, 1]) +hindcast[, 1] != obs[, 1] +as.POSIXlt(hindcast[, 1]) != as.POSIXlt(obs[, 1]) +as.POSIXlt(hindcast[, 1]) != as.POSIXlt(obs[, 1]) +aaa <- biasCorrect(frc, hindcast, obs) +hindcast[, 1] != obs[, 1] +as.POSIXlt(hindcast[, 1]) != as.POSIXlt(obs[, 1]) +obs +obs[, 2] +#' Biascorrect the input timeseries or hyfo dataset +#' +#' Biascorrect the input time series or dataset, the input time series or dataset should consist of observation, hindcast, and forecast. +#' observation and hindcast should belong to the same period, in order to calibrate. Then the modified forecast +#' will be returned. If the input is a time series, first column should be date column and rest columns should be +#' the value column. If the input is a hyfo dataset, the dataset should be the result of \code{loadNcdf}, or a list +#' file with the same format. #' -#' @param dataset A list containing different information, should be the result of reading netcdf file using -#' \code{library(ecomsUDG.Raccess)}. -#' @param method A string showing different calculating method for the map. More information please refer to +#' @param frc a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, +#' representing the forecast to be calibrated. +#' @param hindcast a hyfo grid data output or a dataframe(time series) consists of Date column and one or more value columns, +#' representing the hindcast data. This data will be used in the calibration of the forecast, so it's better to have the same date period as +#' observation data. Check details for more information. +#' @param obs a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, +#' representing the observation data. +#' @param method bias correct method, including 'delta', 'scaling'... +#' @param scaleType only when the method "scaling" is chosen, scaleType will be available. Two different types +#' of scaling method, 'add' and 'mult', which means additive and multiplicative scaling method. More info check #' details. -#' @param member A number showing which member is selected to get, if the dataset has a "member" dimension. Default -#' is NULL, if no member assigned, and there is a "member" in dimensions, the mean value of the members will be -#' taken. -#' @param ... Check \code{?getSpatialMap_mat} for details, e.g., x, y, title, catchment, -#' points, output, -#' @return A matrix representing the raster map is returned, and the map is plotted. +#' @param input If input is a time series, \code{input = 'TS'} needs to be assigned, or hyfo will take it as +#' an hyfo output grid file. Default is time series input, where in most of the cases we prefer. If your input +#' is a hyfo output file, \code{input = 'hyfo'}. +#' @param preci If the precipitation is biascorrected, then you have to assign \code{preci = TRUE}. Since for +#' precipitation, some biascorrect methods may not apply to, or some methods are specially for precipitation. +#' Default is FALSE. #' @details -#' There are following methods to be selected, -#' "meanAnnual": annual rainfall of each year is plotted. -#' "winter", "spring", "autumn", "summer": MEAN seasonal rainfall of each year is plotted. -#' Month(number 1 to 12): MEAN month rainfall of each year is plotted, e.g. MEAN march rainfall of each year. -#' "mean", "max", "min": mean daily, maximum daily, minimum daily precipitation. -#' @examples #' -#' #gridData provided in the package is the result of \code {loadGridData{ecomsUDG.Raccess}} -#' data(tgridData) -#' getSpatialMap(tgridData, method = 'meanAnnual') -#' getSpatialMap(tgridData, method = 'winter') +#' \strong {Hindcast} +#' +#' Since climate forecast is based on global condition, when downscaling to different regions, it may include +#' some bias, biascorrection is used then to fix the bias. In order to bias correct, we need to pick up some +#' data from the forecast to train with the observation, which is called hindcast in this function. Hindcast +#' should have \strong{EVERY} attributes that forecast has. +#' +#' Hindcast is also called re-forecast, is the forecast of the past. E.g. you have a forecast from year 2000-2010, assuming now you are in 2005. So from 2000-2005, this period +#' is the hindcast period, and 2005-2010, this period is the forecast period. +#' +#' +#' \strong {How it works} #' +#' Forecast product has to be calibrated, usually the system is doing forecast in real time. So, e.g., if the +#' forecast starts from year 2000, assuming you are in year 2003, then you will have 3 years' hindcast +#' data (year 2000 - 2003), which can be used to calibrate. And your forecast period is (2003-2004) #' -#' getSpatialMap(tgridData, method = 'winter', catchment = testCat) +#' E.g. you have observation from 2001 - 2002, this is your input obs. Then you can take the same +#' period (2001-2002) from the forecast, which is the hindcast period. For forecast, you can take any period. +#' The program will evaluate the obs and hindcast, to get the modification of the forecast, and then add the +#' modification to the forecast data. #' -#' file <- system.file("extdata", "points.txt", package = "hyfo") -#' points <- read.table(file, header = TRUE, sep = ',' ) -#' getSpatialMap(tgridData, method = 'winter', catchment = testCat, points = points) +#' \strong {method} #' +#' Different methods used in the bias correction. +#' +#' \strong {delta} +#' This method consists on adding to the observations the mean change signal (delta method). +#' This method is applicable to any kind of variable but it is preferable to avoid it for bounded variables +#' (e.g. precipitation, wind speed, etc.) because values out of the variable range could be obtained +#' (e.g. negative wind speeds...) +#' +#' +#' +#' @references +#' Bias correction methods come from \code{biasCorrection} from \code{dowscaleR} +#' Santander Meteorology Group (2015). downscaleR: Climate data manipulation and statistical downscaling. R +#' package version 0.6-0. https://github.com/SantanderMetGroup/downscaleR/wiki #' @export -getSpatialMap <- function(dataset, method = NULL, member = 'mean', ...) { -#check input dataset -checkWord <- c('Data', 'xyCoords', 'Dates') -if (any(is.na(match(checkWord, attributes(dataset)$names)))) { -stop('Input dataset is incorrect, it should contain "Data", "xyCoords", and "Dates", -check help for details.') +biasCorrect <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', input = 'TS', preci = FALSE){ +if (input == 'TS') { +# First check if the first column is Date +if (!grepl('-|/', obs[1, 1]) | !grepl('-|/', hindcast[1, 1]) | !grepl('-|/', frc[1, 1])) { +stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} +and use as.Date to convert.If your input is a hyfo dataset, put input = "hyfo" as an +argument, check help for more info.') } -#range of the dataset just loaded -lon <- dataset$xyCoords$x -lat <- dataset$xyCoords$y -startTime <- as.POSIXlt(dataset$Dates$start, tz = 'GMT') -yearIndex <- startTime$year + 1900 -monthIndex <-startTime$mon + 1 -data <- dataset$Data -# Dimension needs to be arranged. Make sure first and second dimension is lat and lon. -att <- attributes(data)$dimensions -dimIndex <- seq(1, length(att)) -dimIndex1 <- match(c('lon', 'lat', 'time'), att)# match can apply to simple cases -dimIndex2 <- dimIndex[-dimIndex1]# choose nomatch -data <- aperm(data, c(dimIndex1, dimIndex2)) -attributes(data)$dimensions <- att[c(dimIndex1, dimIndex2)] -# Because in the following part, only 3 dimensions are allowed, so data has to be processed. -if (member == 'mean' & any(attributes(data)$dimensions == 'member')) { -dimIndex3 <- which(attributes(data)$dimensions != 'member') -data <- apply(data, MARGIN = dimIndex3, FUN = mean, na.rm = TRUE) -message('Mean value of the members are returned.') -} else if (member != 'mean' & any(attributes(data)$dimensions == 'member')) { -dimIndex3 <- which(attributes(data)$dimensions == 'member') -data <- chooseDim(data, dimIndex3, member, drop = TRUE) -} else if (member != 'mean' & !any(attributes(data)$dimensions == 'member')){ -stop('There is no member part in the dataset, but you choose one, check the input -dataset or change your arguments.') +# change to date type is easier, but in case in future the flood part is added, Date type doesn't have +# hour, min and sec, so, it's better to convert it into POSIxlt. +if (as.POSIXlt(hindcast[, 1]) != as.POSIXlt(obs[, 1])) { +warning('time of obs and time of hindcast are not the same, which may cause inaccuracy in +the calibration.') } -if (is.null(method)) { -warning('You should shoose a method, unless input is a matrix directly to be plotted.') -#in case the dataset is ready to plot and no need to calculate -} else if (method == 'meanAnnual') { -#mean value of the annual precipitation over the period of the data -#time <- proc.time() -if (length(unique(monthIndex)) < 12) { -warning ('There are less than 12 months in a year, the results may be inaccurate.') +if (ncol(frc) == 2) { +frc_data <- biasCorrect_core(frc[, 2], hindcast[, 2], obs[, 2], method = method, +scaleType = scaleType, preci = preci) +} else if (ncol(frc) > 2) { +# In this case more than one value columns exist in the dataset, both frc and hindcast. +n <- ncol(frc) +# For every column, it's biascorrected respectively. +frc_data <- lapply(2:n, function(x) biasCorrect_core(frc[, x], hindcast[, x], obs[, 2], method = method, +scaleType = scaleType, preci = preci)) +frc_data <- do.call('cbind', frc_data) +} else stop('Wrong TS input, check your TS dimension.') +} else if (input == 'hyfo') { +print('Under development...') } -data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, method = 'annual') -#newTime <- proc.time() - time -title_d <- 'Mean Annual Precipitation (mm / year)' -} else if (method == 'winter') { -#mean value of the seasonal precipitation, in this case, winter -#time <- proc.time() -wm <- match(c(12, 1, 2), unique(monthIndex)) -if (length(which(!is.na(wm))) < 3) { -stop ('Winter has less than 3 months, check data and try to calculate every month -seperately or choose another season.') +names <- colnames(frc) +frc <- data.frame(frc[, 1], frc_data) +colnames(frc) <- names +return(frc) } -data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, monthIndex = monthIndex, -method = 'winter') -#newTime <- proc.time() - time -title_d <- 'Mean Winter Precipitation (mm / winter)' -} else if (method == 'spring') { -wm <- match(c(3, 4, 5), unique(monthIndex)) -if (length(which(!is.na(wm))) < 3) { -stop ('Spring has less than 3 months, check data and try to calculate every month -seperately or choose another season.') +# this is only used to calculate the value column, +biasCorrect_core <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', preci = FALSE){ +# default is the simplest method in biascorrection, just do simple addition and subtraction. +if (method == 'delta') { +# comes from downscaleR biascorrection method +frcMean <- mean(obs, na.rm = TRUE) +hindcastMean <- mean(hindcast, na.rm = TRUE) +frc <- obs - hindcastMean + frcMean +} else if (method == 'scaling') { +obsMean <- mean(obs, na.rm = TRUE) +hindcastMean <- mean(hindcast, na.rm = TRUE) +if (scaleType == 'multi') { +frc <- frc / hindcastMean * obsMean +} else if (scaleType == 'add') { +frc <- frc - hindcastMean + obsMean } -data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, monthIndex = monthIndex, -method = 'spring') -title_d <- 'Mean Spring Precipitation (mm / spring)' -} else if (method == 'summer') { -wm <- match(c(6, 7, 8), unique(monthIndex)) -if (length(which(!is.na(wm))) < 3) { -stop ('Summer has less than 3 months, check data and try to calculate every month -seperately or choose another season.') +} else if (method == 'eqm') { +# To be added, right now too complicated and not so much use. } -data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, monthIndex = monthIndex, -method = 'summer') -title_d <- 'Mean Summer Precipitation (mm / summer)' -} else if (method == 'autumn') { -wm <- match(c(9, 10, 11), unique(monthIndex)) -if (length(which(!is.na(wm))) < 3) { -stop ('Autumn has less than 3 months, check data and try to calculate every month -seperately or choose another season.') +return(frc) } -data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, monthIndex = monthIndex, -method = 'autumn') -title_d <- 'Mean Autumn Precipitation (mm / autumn)' -} else if (method == 'mean') { -#sum value of the dataset, this procedure is to get the mean value -data_new <- apply(data, MARGIN = c(2, 1), FUN = mean, na.rm = TRUE) -title_d <- 'Mean Daily Precipitation (mm / day)' -} else if (method == 'max') { -data_new <- apply(data, MARGIN = c(2, 1), FUN = suppressWarnings(max), na.rm = TRUE) -title_d <- 'Max Daily Precipitation (mm / day)' -} else if (method == 'min') { -data_new <- apply(data, MARGIN = c(2, 1), FUN = suppressWarnings(min), na.rm = TRUE) -title_d <- 'Min Daily Precipitation (mm / day)' -} else if (is.numeric(method)) { -data_new <- apply(data, MARGIN = c(2, 1), FUN = getMeanPreci, yearIndex = yearIndex, monthIndex = monthIndex, -method = method) -title_d <- paste(month.abb[method], 'Precipitation (mm / month)', sep = ' ') -} else { -wrongMethod <- method -stop(paste('no method called', wrongMethod)) -} -# This is to give attributes to the matrix and better be melted in ggplot. -colnames(data_new) <- round(lon, 2) -rownames(data_new) <- round(lat, 2) -# If ... also has a title argument, this will cause conflicts. so title has to be renamed as title_d -# This has to be paid a lot of attention when use ... to pass arguments. -output <- getSpatialMap_mat(matrix = data_new, title_d = title_d, ...) -return(output) -} -#' rePlot raster matrix -#' -#' replot the matrix output from \code{getSpatialMap}, when \code{output = 'data'} or output is default -#' value. -#' -#' @param matrix A matrix raster, should be the result of \code{getSpatialMap()}, output should be default -#' or 'data' -#' @param title_d A string showing the title of the plot, defaut is NULL. -#' @param catchment A catchment file geting from \code{shp2cat()} in the package, if a catchment is available for background. -#' @param points A dataframe, showing other information, e.g., location of the gauging stations. The -#' the data.frame should be with columes "name, lon, lat, z, value". -#' @param output A string showing the type of the output, if \code{output = 'ggplot'}, the returned -#' data can be used in ggplot and \code{getSpatialMap_comb()}; if \code{output = 'plot'}, the returned data is the plot containing all -#' layers' information, and can be plot directly or used in grid.arrange; if not set, the raster matrix data -#' will be returned. -#' @param name If \code{output = 'ggplot'}, name has to be assigned to your output, in order to differentiate -#' different outputs in the later multiplot using \code{getSpatialMap_comb}. -#' @param info A boolean showing whether the information of the map, e.g., max, mean ..., default is FALSE. -#' @param scale A string showing the plot scale, 'identity' or 'sqrt'. -#' @param colors Most of time you don't have to set this, but if you are not satisfied with the -#' default color, you can set your own palette here. e.g., \code{colors = c('red', 'blue')}, then -#' the value from lowest to highest, will have the color from red to blue. More info about colors, -#' please check ?palette(). -#' @param ... \code{title, x, y} showing the title and x and y axis of the plot. e.g. \code{title = 'aaa'} -#'default is about precipitation. -#' @return A matrix representing the raster map is returned, and the map is plotted. -#' @examples -#' data(tgridData)# the result of \code{loadNcdf} -#' #the output type of has to be default or 'data'. -#' a1 <- getSpatialMap(tgridData, method = 'mean') -#' a2 <- getSpatialMap(tgridData, method = 'max') -#' a3 <- getSpatialMap(tgridData, method = 'winter') -#' a4 <- getSpatialMap(tgridData, method = 'summer') -#' #For example, if we want to investigate the difference between mean value and max. -#' -#' a5 <- a2 - a1 -#' getSpatialMap_mat(a4) -#' -#' #Or to investigate the difference between winter value and summer value. -#' a6 <- a3 - a4 -#' getSpatialMap_mat(a6) +aaa <- biasCorrect(frc, hindcast, obs) +aaa +aaa1 <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi') +aaa1 +aaa1 == aaa +aaa1 <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi') +debug(biasCorrect) +aaa1 <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi') +as.POSIXlt(hindcast[, 1]) - as.POSIXlt(obs[, 1]) +typeof(as.POSIXlt(hindcast[, 1])) +typeof(as.POSIXlt(hindcast[1, 1])) +str(as.POSIXlt(hindcast[1, 1])) +str(as.Date(hindcast[1, 1])) +as.POSIXlt(hindcast[, 1]) != as.POSIXlt(obs[, 1]) +str(as.Date(hindcast[1, 1])) +str(as.Date(hindcast[1, 1])) +as.POSIXlt(hindcast[, 1]) == as.POSIXlt(obs[, 1]) +!(as.POSIXlt(hindcast[, 1]) == as.POSIXlt(obs[, 1])) +#' Biascorrect the input timeseries or hyfo dataset #' -#' @export -#' @import ggplot2 plyr maps maptools rgeos -#' @importFrom stats median -#' @importFrom reshape2 melt -#' @references -#' R Core Team (2015). R: A language and environment for statistical computing. R Foundation for -#' Statistical Computing, Vienna, Austria. URL http://www.R-project.org/. +#' Biascorrect the input time series or dataset, the input time series or dataset should consist of observation, hindcast, and forecast. +#' observation and hindcast should belong to the same period, in order to calibrate. Then the modified forecast +#' will be returned. If the input is a time series, first column should be date column and rest columns should be +#' the value column. If the input is a hyfo dataset, the dataset should be the result of \code{loadNcdf}, or a list +#' file with the same format. +#' +#' @param frc a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, +#' representing the forecast to be calibrated. +#' @param hindcast a hyfo grid data output or a dataframe(time series) consists of Date column and one or more value columns, +#' representing the hindcast data. This data will be used in the calibration of the forecast, so it's better to have the same date period as +#' observation data. Check details for more information. +#' @param obs a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, +#' representing the observation data. +#' @param method bias correct method, including 'delta', 'scaling'... +#' @param scaleType only when the method "scaling" is chosen, scaleType will be available. Two different types +#' of scaling method, 'add' and 'mult', which means additive and multiplicative scaling method. More info check +#' details. +#' @param input If input is a time series, \code{input = 'TS'} needs to be assigned, or hyfo will take it as +#' an hyfo output grid file. Default is time series input, where in most of the cases we prefer. If your input +#' is a hyfo output file, \code{input = 'hyfo'}. +#' @param preci If the precipitation is biascorrected, then you have to assign \code{preci = TRUE}. Since for +#' precipitation, some biascorrect methods may not apply to, or some methods are specially for precipitation. +#' Default is FALSE. +#' @details +#' +#' \strong {Hindcast} +#' +#' Since climate forecast is based on global condition, when downscaling to different regions, it may include +#' some bias, biascorrection is used then to fix the bias. In order to bias correct, we need to pick up some +#' data from the forecast to train with the observation, which is called hindcast in this function. Hindcast +#' should have \strong{EVERY} attributes that forecast has. +#' +#' Hindcast is also called re-forecast, is the forecast of the past. E.g. you have a forecast from year 2000-2010, assuming now you are in 2005. So from 2000-2005, this period +#' is the hindcast period, and 2005-2010, this period is the forecast period. #' -#' Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, -#' 21(12), 1-20. URL http://www.jstatsoft.org/v21/i12/. #' -#' Hadley Wickham (2011). The Split-Apply-Combine Strategy for Data Analysis. Journal of Statistical -#' Software, 40(1), 1-29. URL http://www.jstatsoft.org/v40/i01/. +#' \strong {How it works} #' -#' Original S code by Richard A. Becker and Allan R. Wilks. R version by Ray Brownrigg. Enhancements -#' by Thomas P Minka (2015). maps: Draw Geographical Maps. R package version -#' 2.3-11. http://CRAN.R-project.org/package=maps +#' Forecast product has to be calibrated, usually the system is doing forecast in real time. So, e.g., if the +#' forecast starts from year 2000, assuming you are in year 2003, then you will have 3 years' hindcast +#' data (year 2000 - 2003), which can be used to calibrate. And your forecast period is (2003-2004) #' -#' Roger Bivand and Nicholas Lewin-Koh (2015). maptools: Tools for Reading and Handling Spatial -#' Objects. R package version 0.8-36. http://CRAN.R-project.org/package=maptools +#' E.g. you have observation from 2001 - 2002, this is your input obs. Then you can take the same +#' period (2001-2002) from the forecast, which is the hindcast period. For forecast, you can take any period. +#' The program will evaluate the obs and hindcast, to get the modification of the forecast, and then add the +#' modification to the forecast data. #' -#' Roger Bivand and Colin Rundel (2015). rgeos: Interface to Geometry Engine - Open Source (GEOS). R -#' package version 0.3-11. http://CRAN.R-project.org/package=rgeos +#' \strong {method} #' +#' Different methods used in the bias correction. #' +#' \strong {delta} +#' This method consists on adding to the observations the mean change signal (delta method). +#' This method is applicable to any kind of variable but it is preferable to avoid it for bounded variables +#' (e.g. precipitation, wind speed, etc.) because values out of the variable range could be obtained +#' (e.g. negative wind speeds...) #' -getSpatialMap_mat <- function(matrix, title_d = NULL, catchment = NULL, points = NULL, output = 'data', -name = NULL, info = FALSE, scale = 'identity', colors = NULL, ...) { -#check input -checkWord <- c('lon', 'lat', 'z', 'value') -if (is.null(attributes(matrix)$dimnames)) { -stop('Input matrix is incorrect, check help to know how to get the matrix.') -} else if (!is.null(catchment) & class(catchment) != "SpatialPolygonsDataFrame") { -stop('Catchment format is incorrect, check help to get more details. ') -} else if (!is.null(points) & any(is.na(match(checkWord, attributes(points)$names)))) { -stop('Points should be a dataframe with colnames "lon, lat, z, value".') +#' +#' +#' @references +#' Bias correction methods come from \code{biasCorrection} from \code{dowscaleR} +#' Santander Meteorology Group (2015). downscaleR: Climate data manipulation and statistical downscaling. R +#' package version 0.6-0. https://github.com/SantanderMetGroup/downscaleR/wiki +#' @export +biasCorrect <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', input = 'TS', preci = FALSE){ +if (input == 'TS') { +# First check if the first column is Date +if (!grepl('-|/', obs[1, 1]) | !grepl('-|/', hindcast[1, 1]) | !grepl('-|/', frc[1, 1])) { +stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} +and use as.Date to convert.If your input is a hyfo dataset, put input = "hyfo" as an +argument, check help for more info.') +} +# change to date type is easier, but in case in future the flood part is added, Date type doesn't have +# hour, min and sec, so, it's better to convert it into POSIxlt. +if (! (as.POSIXlt(hindcast[, 1]) == as.POSIXlt(obs[, 1]))) { +warning('time of obs and time of hindcast are not the same, which may cause inaccuracy in +the calibration.') } -#ggplot -#for the aes option in ggplot, it's independent from any other command through all ggplot, and aes() function -#get data from the main dataset, in this case, data_ggplot. for other functions in ggplot, if it wants to use -#data from the main dataset as parameters, it has to use aes() function. if not, it has to use data available -#in the environment. -#in other words, all the parameters in aes(), they have to come from the main dataset. Otherwise, just put them -#outside aes() as normal parameters. -if (info == TRUE) { -plotMax <- round(max(matrix, na.rm = TRUE), 2) -plotMin <- round(min(matrix, na.rm = TRUE), 2) -plotMean <- round(mean(matrix, na.rm = TRUE), 2) -plotMedian <- round(median(matrix, na.rm = TRUE), 2) -word <- paste('\n\n', paste('Max', '=', plotMax), ',', paste('Min', '=', plotMin), ',', -paste('Mean', '=', plotMean), ',', paste('Median', '=', plotMedian)) -} else { -word <- NULL +if (ncol(frc) == 2) { +frc_data <- biasCorrect_core(frc[, 2], hindcast[, 2], obs[, 2], method = method, +scaleType = scaleType, preci = preci) +} else if (ncol(frc) > 2) { +# In this case more than one value columns exist in the dataset, both frc and hindcast. +n <- ncol(frc) +# For every column, it's biascorrected respectively. +frc_data <- lapply(2:n, function(x) biasCorrect_core(frc[, x], hindcast[, x], obs[, 2], method = method, +scaleType = scaleType, preci = preci)) +frc_data <- do.call('cbind', frc_data) +} else stop('Wrong TS input, check your TS dimension.') +} else if (input == 'hyfo') { +print('Under development...') } -x_word <- paste('Longitude', word) -world_map <- map_data('world') -# For some cases, matrix has to be reshaped, because it's too fat or too slim, to make -# it shown on the map, the ratio is x : y is 4 : 3. -matrix <- reshapeMatrix(matrix) -# cannot remove NA, or the matrix shape will be changed. -data_ggplot <- melt(matrix, na.rm = FALSE) -colnames(data_ggplot) <- c('lat', 'lon', 'value') -theme_set(theme_bw()) -# if (is.null(colors)) colors <- c('yellow', 'orange', 'red') -if (is.null(colors)) colors <- rev(rainbow(n = 20, end = 0.7)) -mainLayer <- with(data_ggplot, { -ggplot(data = data_ggplot) + -geom_tile(aes(x = lon, y = lat, fill = value)) + -#scale_fill_discrete()+ -scale_fill_gradientn(colours = colors, na.value = 'transparent') +#usually scale = 'sqrt' -#guide = guide_colorbar, colorbar and legend are not the same. -guides(fill = guide_colourbar(title='Rainfall (mm)', barheight = rel(9), trans = scale)) +#usually scale = 'sqrt' -geom_map(data = world_map, map = world_map, aes(map_id = region), fill = 'transparent', -color='black') + -# guides(fill = guide_colorbar(title='Rainfall (mm)', barheight = 15))+ -xlab(x_word) + -ylab('Latitude') + -ggtitle(title_d) + -labs(empty = NULL, ...) +#in order to pass "...", arguments shouldn't be empty. -theme(plot.title = element_text(size = rel(1.8), face = 'bold'), -axis.title.x = element_text(size = rel(1.7)), -axis.title.y = element_text(size = rel(1.7)), -axis.text.x = element_text(size = rel(1.9)), -axis.text.y = element_text(size = rel(1.9)), -legend.text = element_text(size = rel(1.3)), -legend.title = element_text(size = rel(1.3))) -# coord_fixed(ratio = 1, xlim = xlim, ylim = ylim) -# geom_rect(xmin=min(lon)+0.72*(max(lon)-min(lon)), -# xmax=min(lon)+0.99*(max(lon)-min(lon)), -# ymin=min(lat)+0.02*(max(lat)-min(lat)), -# ymax=min(lat)+0.28*(max(lat)-min(lat)), -# fill='white',colour='black')+ -# annotate('text', x = min(lon), y = min(lat), label=word, hjust = 0, vjust = -1) -}) -printLayer <- mainLayer -#catchment conversion -if (is.null(catchment) == FALSE) { -a <- catchment -a@data$id <- rownames(a@data) -b <- fortify(a, region = 'id') -c <- join(b, a@data, by = 'id') -catchmentLayer <- with(c, { -geom_polygon(data = c, aes(long, lat, group = group), color = 'black', -fill = 'transparent') -}) -printLayer <- printLayer + catchmentLayer +names <- colnames(frc) +frc <- data.frame(frc[, 1], frc_data) +colnames(frc) <- names +return(frc) } -#plot points -if (is.null(points) == FALSE) { -pointLayer <- with(points, { -geom_point(data = points, aes(x = lon, y = lat, size = value, colour = z), -guide = guide_legend(barheight = rel(3))) -}) -printLayer <- printLayer + pointLayer +# this is only used to calculate the value column, +biasCorrect_core <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', preci = FALSE){ +# default is the simplest method in biascorrection, just do simple addition and subtraction. +if (method == 'delta') { +# comes from downscaleR biascorrection method +frcMean <- mean(obs, na.rm = TRUE) +hindcastMean <- mean(hindcast, na.rm = TRUE) +frc <- obs - hindcastMean + frcMean +} else if (method == 'scaling') { +obsMean <- mean(obs, na.rm = TRUE) +hindcastMean <- mean(hindcast, na.rm = TRUE) +if (scaleType == 'multi') { +frc <- frc / hindcastMean * obsMean +} else if (scaleType == 'add') { +frc <- frc - hindcastMean + obsMean } -print(printLayer) -if (output == 'ggplot') { -if (is.null(name)) stop('"name" argument not found, -If you choose "ggplot" as output, please assign a name.') -data_ggplot$Name <- rep(name, dim(data_ggplot)[1]) -return (data_ggplot) -} else if (output == 'plot') { -return(printLayer) -} else { -return(matrix) +} else if (method == 'eqm') { +# To be added, right now too complicated and not so much use. } +return(frc) } -#' Combine maps together -#' @param ... different maps generated by \code{getSpatialMap(, output = 'ggplot')}, see details. -#' @param nrow A number showing the number of rows. -#' @param list If input is a list containing different ggplot data, use \code{list = inputlist}. -#' @param x A string of x axis name. -#' @param y A string of y axis name. -#' @param title A string of the title. -#' @param output A boolean, if chosen TRUE, the output will be given. -#' @return A combined map. -#' @examples -#' data(tgridData)# the result of \code{loadGridData{ecomsUDG.Raccess}} -#' #The output should be 'ggplot' -#' a1 <- getSpatialMap(tgridData, method = 'summer', output = 'ggplot', name = 'a1') -#' a2 <- getSpatialMap(tgridData, method = 'winter', output = 'ggplot', name = 'a2') -#'# a3 <- getSpatialMap(tgridData, method = 'mean', output = 'ggplot', name = 'a3') -#'# a4 <- getSpatialMap(tgridData, method = 'max', output = 'ggplot', name = 'a4') -#' getSpatialMap_comb(a1, a2) -#' getSpatialMap_comb(a1, a2, nrow = 2) +debug(biasCorrect) +aaa1 <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi') +obsMean +#' Biascorrect the input timeseries or hyfo dataset +#' +#' Biascorrect the input time series or dataset, the input time series or dataset should consist of observation, hindcast, and forecast. +#' observation and hindcast should belong to the same period, in order to calibrate. Then the modified forecast +#' will be returned. If the input is a time series, first column should be date column and rest columns should be +#' the value column. If the input is a hyfo dataset, the dataset should be the result of \code{loadNcdf}, or a list +#' file with the same format. #' +#' @param frc a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, +#' representing the forecast to be calibrated. +#' @param hindcast a hyfo grid data output or a dataframe(time series) consists of Date column and one or more value columns, +#' representing the hindcast data. This data will be used in the calibration of the forecast, so it's better to have the same date period as +#' observation data. Check details for more information. +#' @param obs a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, +#' representing the observation data. +#' @param method bias correct method, including 'delta', 'scaling'... +#' @param scaleType only when the method "scaling" is chosen, scaleType will be available. Two different types +#' of scaling method, 'add' and 'mult', which means additive and multiplicative scaling method. More info check +#' details. +#' @param input If input is a time series, \code{input = 'TS'} needs to be assigned, or hyfo will take it as +#' an hyfo output grid file. Default is time series input, where in most of the cases we prefer. If your input +#' is a hyfo output file, \code{input = 'hyfo'}. +#' @param preci If the precipitation is biascorrected, then you have to assign \code{preci = TRUE}. Since for +#' precipitation, some biascorrect methods may not apply to, or some methods are specially for precipitation. +#' Default is FALSE. #' @details -#' For \code{getSpatialMap_comb}, the maps to be compared should be with same size and resolution, -#' in other words, they should be fully overlapped by each other. #' -#' If they have different resolutions, use \code{interpGridData{ecomsUDG.Raccess}} to interpolate. +#' \strong {Hindcast} +#' +#' Since climate forecast is based on global condition, when downscaling to different regions, it may include +#' some bias, biascorrection is used then to fix the bias. In order to bias correct, we need to pick up some +#' data from the forecast to train with the observation, which is called hindcast in this function. Hindcast +#' should have \strong{EVERY} attributes that forecast has. +#' +#' Hindcast is also called re-forecast, is the forecast of the past. E.g. you have a forecast from year 2000-2010, assuming now you are in 2005. So from 2000-2005, this period +#' is the hindcast period, and 2005-2010, this period is the forecast period. +#' +#' +#' \strong {How it works} +#' +#' Forecast product has to be calibrated, usually the system is doing forecast in real time. So, e.g., if the +#' forecast starts from year 2000, assuming you are in year 2003, then you will have 3 years' hindcast +#' data (year 2000 - 2003), which can be used to calibrate. And your forecast period is (2003-2004) +#' +#' E.g. you have observation from 2001 - 2002, this is your input obs. Then you can take the same +#' period (2001-2002) from the forecast, which is the hindcast period. For forecast, you can take any period. +#' The program will evaluate the obs and hindcast, to get the modification of the forecast, and then add the +#' modification to the forecast data. +#' +#' \strong {method} +#' +#' Different methods used in the bias correction. +#' +#' \strong {delta} +#' This method consists on adding to the observations the mean change signal (delta method). +#' This method is applicable to any kind of variable but it is preferable to avoid it for bounded variables +#' (e.g. precipitation, wind speed, etc.) because values out of the variable range could be obtained +#' (e.g. negative wind speeds...) +#' +#' #' -#' @export -#' @import ggplot2 maps #' @references -#' H. Wickham. ggplot2: elegant graphics for data analysis. Springer New York, 2009. -getSpatialMap_comb <- function(..., list = NULL, nrow = 1, x = '', y = '', title = '', -output = FALSE) { -if (!is.null(list)) { -data_ggplot <- do.call('rbind', list) -} else { -maps <- list(...) -checkBind(maps, 'rbind') -data_ggplot <- do.call('rbind', maps) -} -if (!class(data_ggplot) == 'data.frame') { -warning('Your input is probably a list, but you forget to add "list = " before it. -Try again, or check help for more information.') -} else if (is.null(data_ggplot$Name)) { -stop('No "Name" column in the input data, check the arguments in getSpatialMap(), if -output = "ggplot" is assigned, more info please check ?getSpatialMap().') +#' Bias correction methods come from \code{biasCorrection} from \code{dowscaleR} +#' Santander Meteorology Group (2015). downscaleR: Climate data manipulation and statistical downscaling. R +#' package version 0.6-0. https://github.com/SantanderMetGroup/downscaleR/wiki +#' @export +biasCorrect <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', input = 'TS', preci = FALSE){ +if (input == 'TS') { +# First check if the first column is Date +if (!grepl('-|/', obs[1, 1]) | !grepl('-|/', hindcast[1, 1]) | !grepl('-|/', frc[1, 1])) { +stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} +and use as.Date to convert.If your input is a hyfo dataset, put input = "hyfo" as an +argument, check help for more info.') } -data_ggplot$Name <- factor(data_ggplot$Name, levels = data_ggplot$Name, ordered = TRUE) -# lim <- getLim(data_ggplot$lon, data_ggplot$lat) -# xlim <- lim[[1]] -# ylim <- lim[[2]] -world_map <- map_data('world') -theme_set(theme_bw()) -mainLayer <- with(data_ggplot, { -ggplot(data = data_ggplot) + -geom_tile(aes(x = lon, y = lat, fill = value)) + -#scale_fill_gradient(high = 'red', low = 'yellow')+ -scale_fill_gradientn(colours = c('yellow', 'orange', 'red'), na.value = 'transparent') +#usually scale = 'sqrt' -geom_map(data = world_map, map = world_map, aes(map_id = region), fill = 'transparent', color = 'black') + -# guides(fill = guide_colourbar(title='Rainfall (mm)', barheight = rel(9), trans = scale)) +# -facet_wrap(~ Name, nrow = nrow) + -theme(plot.title = element_text(size = rel(1.8), face = 'bold'), -axis.title.x = element_text(size = rel(1.7)), -axis.title.y = element_text(size = rel(1.7)), -axis.text.x = element_text(size = rel(1.9)), -axis.text.y = element_text(size = rel(1.9)), -legend.text = element_text(size = rel(1.3)), -legend.title = element_text(size = rel(1.3))) + -# no solultion for some very fat or very slim, in facet ggplot2, so, it's not buitiful. -#coord_equal() + -labs(x = x, y = y, title = title) -}) -suppressWarnings(print(mainLayer)) -if (output == TRUE) return(data_ggplot) +# change to date type is easier, but in case in future the flood part is added, Date type doesn't have +# hour, min and sec, so, it's better to convert it into POSIxlt. +# if condition only accepts one condition, for list comparison, there are a lot of conditions, better +# further process it, like using any. +if (any(as.POSIXlt(hindcast[, 1]) != as.POSIXlt(obs[, 1]))) { +warning('time of obs and time of hindcast are not the same, which may cause inaccuracy in +the calibration.') } -chooseDim <- function(array, dim, value, drop = FALSE) { -# Create list representing arguments supplied to [ -# bquote() creates an object corresponding to a missing argument -dimnames <- attributes(array)$dimensions -indices <- rep(list(bquote()), length(dim(array))) -indices[[dim]] <- value -if (dim(array)[dim] < max(value)) { -stop('Chosen member exceeds the member range of the dataset.') +if (ncol(frc) == 2) { +frc_data <- biasCorrect_core(frc[, 2], hindcast[, 2], obs[, 2], method = method, +scaleType = scaleType, preci = preci) +} else if (ncol(frc) > 2) { +# In this case more than one value columns exist in the dataset, both frc and hindcast. +n <- ncol(frc) +# For every column, it's biascorrected respectively. +frc_data <- lapply(2:n, function(x) biasCorrect_core(frc[, x], hindcast[, x], obs[, 2], method = method, +scaleType = scaleType, preci = preci)) +frc_data <- do.call('cbind', frc_data) +} else stop('Wrong TS input, check your TS dimension.') +} else if (input == 'hyfo') { +print('Under development...') } -# Generate the call to [ -call <- as.call(c( -list(as.name("["), quote(array)), -indices, -list(drop = drop))) -# Print it, just to make it easier to see what's going on -# Print(call) -# Finally, evaluate it -output <- eval(call) -if (length(dim(output)) == length(dimnames)) { -attributes(output)$dimensions <- dimnames -} else if (length(dim(output)) < length(dimnames)){ -# In this case, one dimension is dropped, if value is a number -# and drop == T, this situation can appear. So the dropped dimemsion -# should be the chosen dimension. -i <- 1:length(dimnames) -# get rid of the dropped dimensin -i <- i[-dim] -attributes(output)$dimensions <- dimnames[i] +names <- colnames(frc) +frc <- data.frame(frc[, 1], frc_data) +colnames(frc) <- names +return(frc) } -return(output) +# this is only used to calculate the value column, +biasCorrect_core <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', preci = FALSE){ +# default is the simplest method in biascorrection, just do simple addition and subtraction. +if (method == 'delta') { +# comes from downscaleR biascorrection method +frcMean <- mean(obs, na.rm = TRUE) +hindcastMean <- mean(hindcast, na.rm = TRUE) +frc <- obs - hindcastMean + frcMean +} else if (method == 'scaling') { +obsMean <- mean(obs, na.rm = TRUE) +hindcastMean <- mean(hindcast, na.rm = TRUE) +if (scaleType == 'multi') { +frc <- frc / hindcastMean * obsMean +} else if (scaleType == 'add') { +frc <- frc - hindcastMean + obsMean } -reshapeMatrix <- function(matrix) { -# This is for the map plot to keep the ratio x : y == 4:3 -# mainly used in map plot in ggplot2. -# So the input matrix should be reshaped, add in some NA values, -# in order to be shown appropriately in ggplot. -lon <- as.numeric(colnames(matrix)) -lat <- as.numeric(rownames(matrix)) -dx <- mean(diff(lon)) -dy <- mean(diff(lat)) -lx <- max(lon) - min(lon) -ly <- max(lat) - min(lat) -if (0.75 * lx < ly) { -# In this case, x needs to be made longer -xhalf <- 0.67 * ly -xadd <- xhalf - lx / 2 -# calculate how many columns needs to be added. -nxadd <- abs(round(xadd / dx)) -madd1 <- matrix(data = NA, nrow = length(lat), ncol = nxadd) -madd2 <- madd1 -colnames(madd1) <- seq(to = min(lon) - dx, length = nxadd, by = dx) -colnames(madd2) <- seq(from = max(lon) + dx, length = nxadd, by = dx) -matrix_new <- cbind(madd1, matrix, madd2) -} else if (0.75 * lx > ly) { -yhalf <- 0.38 * lx -yadd <- yhalf - ly / 2 -nyadd <- abs(round(yadd / dy)) -madd1 <- matrix(data = NA, nrow = nyadd, ncol = length(lon)) -madd2 <- madd1 -rownames(madd1) <- seq(to = max(lat) + dy, length = nyadd, by = -dy) -rownames(madd2) <- seq(from = min(lat) - dx, length = nyadd, by = -dy) -matrix_new <- rbind(madd1, matrix, madd2) -} else { -matrix_new <- matrix +} else if (method == 'eqm') { +# To be added, right now too complicated and not so much use. } -return(matrix_new) +return(frc) } -a1 <- getSpatialMap(tgridData, method = 'mean') +aaa1 <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi') +debug(biasCorrect) +aaa1 <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi') +aaa1 <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi') +obsMean +hindcastMean +frc +frc1 <- frc / hindcastMean * obsMean +frc1 +frc1 - frc +frc1/obsMean +frc1/obsMean * hindcastMean +frc +frc1/obsMean * hindcastMean == frc +frc +frc * 3 +frc * 100 +frc * 100 /5 +frc +frc1 +obsMean +hindcastMean +obs / hindcastMean +frc1 +frc +0.09 * 0.108 +0.093 * 0.108 +frc1 +frc[35] +frc[35] /hindcastMean * obsMean +frc1[35] +frc1 +q +devtools::document() +?extractPeriod +data(testdl) +datalist <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1') +frc <- datalist[[1]] +hindcast <- datalist[[2]] +obs <- datalist[[3]] +# default method is delta +frc_new <- biasCorrect(frc, hindcast, obs) +undebug(biasCorrect) +frc_new +frc_new <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi') +frc_new +devtools::build() +?biasCorrection +??biasCorrection +devtools::document() +devtools::check() +devtools::document() +devtools::check() +devtools::document() +devtools::check() +devtools::build() +devtools::build() +install.packages("~/hyfo_1.1.7.tar.gz", repos = NULL, type = "source") +?biasCorrection +library(hyfo) +?biasCorrection +?biasCorrect +devtools::document() +devtools::build() +install.packages("~/hyfo_1.1.7.tar.gz", repos = NULL, type = "source") +library(hyfo) +?biasCorrect +datalist <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1') +frc <- datalist[[1]] +hindcast <- datalist[[2]] +obs <- datalist[[3]] +frc_new <- biasCorrect(frc, hindcast, obs) +frc_new +frc_new <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi') +frc_new +?getFrcEnsem +filePath <- system.file("extdata", "tnc.nc", package = "hyfo") +varname <- getNcdfVar(filePath) +nc <- loadNcdf(filePath, varname) +a <- getFrcEnsem(nc) +a1 <- getFrcEnsem(tgridData) +a <- getFrcEnsem(nc, cell = c(6,2)) +b <- getFrcEnsem(nc, coord = c(-1.4, 43.2)) diff --git a/.Rproj.user/132DF987/pcs/files-pane.pper b/.Rproj.user/132DF987/pcs/files-pane.pper index 9dd00d9..bd1c4f2 100644 --- a/.Rproj.user/132DF987/pcs/files-pane.pper +++ b/.Rproj.user/132DF987/pcs/files-pane.pper @@ -1,5 +1,5 @@ { - "path" : "~/hyfo/R", + "path" : "~/hyfo", "sortOrder" : [ { "ascending" : false, diff --git a/.Rproj.user/132DF987/pcs/source-pane.pper b/.Rproj.user/132DF987/pcs/source-pane.pper index 3249574..1743e40 100644 --- a/.Rproj.user/132DF987/pcs/source-pane.pper +++ b/.Rproj.user/132DF987/pcs/source-pane.pper @@ -1,3 +1,3 @@ { - "activeTab" : 2 + "activeTab" : 0 } \ No newline at end of file diff --git a/.Rproj.user/132DF987/pcs/windowlayoutstate.pper b/.Rproj.user/132DF987/pcs/windowlayoutstate.pper index ae73a17..03ec539 100644 --- a/.Rproj.user/132DF987/pcs/windowlayoutstate.pper +++ b/.Rproj.user/132DF987/pcs/windowlayoutstate.pper @@ -1,13 +1,13 @@ { "left" : { "panelheight" : 941, - "splitterpos" : 375, + "splitterpos" : 374, "topwindowstate" : "NORMAL", "windowheight" : 979 }, "right" : { "panelheight" : 941, - "splitterpos" : 577, + "splitterpos" : 576, "topwindowstate" : "NORMAL", "windowheight" : 979 } diff --git a/.Rproj.user/132DF987/pcs/workbench-pane.pper b/.Rproj.user/132DF987/pcs/workbench-pane.pper index dc53ceb..6785be6 100644 --- a/.Rproj.user/132DF987/pcs/workbench-pane.pper +++ b/.Rproj.user/132DF987/pcs/workbench-pane.pper @@ -1,4 +1,4 @@ { "TabSet1" : 0, - "TabSet2" : 1 + "TabSet2" : 3 } \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/51993C6D b/.Rproj.user/132DF987/sdb/per/t/51993C6D new file mode 100644 index 0000000..b493ac7 --- /dev/null +++ b/.Rproj.user/132DF987/sdb/per/t/51993C6D @@ -0,0 +1,18 @@ +{ + "contents" : "#' Get ensemble forecast from historical data.\n#' \n#' getHisEnsem use historical data as the forecasting input time series.\n#' \n#' @param TS A time series dataframe, with first column Date, and second column value.\n#' @param example A vector containing two strings showing the start and end date, which represent the \n#' forecasting period. Check details for more information.\n#'\n#' the program will extract every possible period in TS you provided to generate the ensemble. Check details for \n#' more information.\n#' @param interval A number representing the interval of each ensemble member. NOTE: \"interval\" takes\n#' 365 as a year, and 30 as a month, regardless of leap year and months with 31 days. So if you want the interval \n#' to be 2 years, set \\code{interval = 730}, which equals 2 * 365 ; if two months, set \\code{interval = 60}; \n#' 2 days, \\code{interval = 2}, for other numbers that cannot be divided by 365 or 30 without remainder, it will treat the \n#' number as days.By defualt interval is set to be 365, a year.\n#' @param buffer A number showing how many days are used as buffer period for models. Check details for more\n#' information.\n#' \n#' @param plot A string showing whether the plot will be shown, e.g., 'norm' means normal plot (without any process), \n#' 'cum' means cummulative plot, default is 'norm'. For other words there will be no plot.\n#' @param output A string showing which type of output you want. Default is \"data\", if \"ggplot\", the \n#' data that can be directly plotted by ggplot2 will be returned, which is easier for you to make series\n#' plots afterwards. NOTE: If \\code{output = 'ggplot'}, the missing value in the data will\n#' be replaced by \\code{mv}, if assigned, default mv is 0.\n#' \n#' @param name If \\code{output = 'ggplot'}, name has to be assigned to your output, in order to differentiate\n#' different outputs in the later multiplot using \\code{getEnsem_comb}.\n#' \n#' @param mv A number showing representing the missing value. When calculating the cumulative value, \n#' missing value will be replaced by mv, default is 0.\n#' @param ... \\code{title, x, y} showing the title and x and y axis of the plot. e.g. \\code{title = 'aaa'}\n#' \n#' @details \n#' \n#' \\code{example} E.g., if you have a time series from 2000 to 2010. Assuming you are in 2003,\n#' you want to forecast the period from 2003-2-1 to 2003-4-1. Then for each year in your input\n#' time series, every year from 1st Feb to 1st Apr will be extracted to generate the ensemble\n#' forecasts. In this case your input example should be \\code{example = c('2003-2-1', '2003-4-1')}\n#' \n#' \\code{interval} doesn't care about leap year and the months with 31 days, it will take 365 as a year, and 30 as a month.\n#' e.g., if the interval is from 1999-2-1 to 1999-3-1, you should just set interval to 30, although the real interval is 28\n#' days.\n#' \n#' \\code{example} and \\code{interval} controls how the ensemble will be generated. e.g. if the time series is from \n#' 1990-1-1 to 2001-1-1.\n#' \n#' if \\code{example = c('1992-3-1', '1994-1-1')} and \\code{interval = 1095}, note, 1095 = 365 * 3, so the program treat\n#' this as 3 years.\n#' \n#' Then you are supposed to get the ensemble consisting of following part:\n#' \n#' 1. 1992-3-1 to 1994-1-1 first one is the example, and it's NOT start from 1990-3-1.\n#' 2. 1995-3-1 to 1997-1-1 second one starts from 1993, because \"interval\" is 3 years.\n#' 3. 1998-3-1 to 2000-1-1\n#' \n#' because the last one \"2000-3-1 to 2002-1-1\", 2002 exceeds the original TS range, so it will not be included.\n#' \n#' Sometimes, there are leap years and months with 31 days included in some ensemble part, in which case the length of the data will\n#' be different, e.g., 1999-1-1 to 1999-3-1 is 1 day less than 2000-1-1 to 2000-3-1. In this situation,\n#' the data will use example as a standard. If the example is 1999-1-1 to 1999-3-1, then the latter one\n#' will be changed to 2001-1-1 to 2000-2-29, which keeps the start Date and change the end Date.\n#' \n#' If the end date is so important that cannot be changed, try to solve this problem by resetting\n#' the example period, to make the event included in the example.\n#' \n#' Good set of example and interval can generate good ensemble.\n#' \n#' \\code{buffer}\n#' Sometimes the model needs to run for a few days to warm up, before the forecast. E.g., if a forecast starts at\n#' '1990-1-20', for some model like MIKE NAM model, the run needs to be started about 14 days. So the input timeseries\n#' should start from '1990-1-6'.\n#' \n#' Buffer is mainly used for the model hotstart. Sometimes the hot start file cannot contain all the parameters needed,\n#' only some important parameters. In this case, the model needs to run for some time, to make other parameters ready\n#' for the simulation.\n#' \n#' \n#' \\code{name}\n#' Assuming you have two ggplot outputs, you want to plot them together. In this situation, you\n#' need a name column to differentiate one ggplot output from the other. You can assigne this name\n#' by the argument directly, name has to be assigned if \\code{output = 'ggplot'} is selected,\n#' @return A ensemble time series using historical data as forecast.\n#' \n#' @examples\n#' \n#' data(testdl)\n#' \n#' a <- testdl[[1]]\n#' \n#' # Choose example from \"1994-2-4\" to \"1996-1-4\"\n#' b <- getHisEnsem(a, example = c('1994-2-4', '1996-1-4'))\n#' \n#' # Default interval is one year, can be set to other values, check help for information.\n#' \n#' # Take 7 months as interval\n#' b <- getHisEnsem(a, example = c('1994-2-4', '1996-1-4'), interval = 210, plot = 'cum') \n#' # Take 30 days as buffer\n#' b <- getHisEnsem(a, example = c('1994-2-4', '1996-1-4'), interval = 210, buffer = 30)\n#' @importFrom reshape2 melt \n#' @importFrom grDevices rainbow\n#' @import ggplot2\n#' @references \n#' Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software,\n#' 21(12), 1-20. URL http://www.jstatsoft.org/v21/i12/.\n#' \n#' H. Wickham. ggplot2: elegant graphics for data analysis. Springer New York, 2009.\n#' @export\n\ngetHisEnsem <- function (TS, example, interval = 365, buffer = 0, plot = 'norm', output = 'data', \n name = NULL, mv = 0, ...) {\n if (!grepl('-|/', TS[1, 1])) {\n stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} \n and use as.Date to convert.')\n } else if (!grepl('-|/', example[1]) | !grepl('-|/', example[1])) {\n stop('Wrong date format in the example, check the format in ?as.Date{base} \n and use as.Date to convert.')\n } else {\n \n \n \n TS[, 1] <- as.Date(TS[, 1])\n example <- as.Date(example ,tz = '')\n exL <- example[2] - example[1]\n # Test if example is in the range of the TS\n a <- which(TS[, 1] == example[1] | TS[, 1] == example[2])\n if (length(a) < 2) stop('Example is out of the time series, reset example.')\n \n \n \n if (interval %% 365 == 0) {\n d <- interval / 365\n \n # Get sequence of start and end date.\n \n startDate <- rev(seq(from = example[1], to = min(TS[, 1]), by = paste(-d, 'years')))\n endDate <- seq(from = example[2], to = max(TS[, 1]), by = paste(d, 'years'))\n\n n <- length(startDate) + length(endDate) - 1 # example is counted twice, should be subtracted. \n \n # Generate full start date series.\n startDate <- seq(min(startDate), length = n, by = paste(d, 'years'))\n endDate <- startDate + exL\n \n } else if (interval %% 30) {\n d <- interval / 30\n \n # Get sequence of start and end date.\n \n startDate <- rev(seq(from = example[1], to = min(TS[, 1]), by = paste(-d, 'months')))\n endDate <- seq(from = example[2], to = max(TS[, 1]), by = paste(d, 'months'))\n \n n <- length(startDate) + length(endDate) - 1\n \n startDate <- seq(min(startDate), length = n, by = paste(d, 'months'))\n endDate <- startDate + exL\n \n } else {\n d <- interval\n \n # Get sequence of start and end date.\n \n startDate <- rev(seq(from = example[1], to = min(TS[, 1]), by = paste(-d, 'days')))\n endDate <- seq(from = example[2], to = max(TS[, 1]), by = paste(d, 'days'))\n \n n <- length(startDate) + length(endDate) - 1\n \n startDate <- seq(min(startDate), length = n, by = paste(d, 'days'))\n endDate <- startDate + exL\n }\n \n data <- mapply(FUN = function(x, y) extractPeriod_dataframe(dataframe = TS, startDate = x, endDate = y),\n x = startDate, y = endDate)\n \n data <- lapply(1:n, function(x) data.frame(data[, x]))\n \n if (buffer > 0) {\n bufferStart <- example[1] - buffer\n bufferEnd <- example[1] - 1\n bufferTS <- extractPeriod_dataframe(TS, bufferStart, bufferEnd)\n \n data <- lapply(data, function(x) rbind(bufferTS, x))\n \n } else if (buffer < 0) {\n stop ('Buffer should be positive, or reset example.')\n }\n \n \n data_output <- list2Dataframe(data)\n colnames(data_output) <- c('Date', as.character(startDate))\n \n # Rearrange dataframe to make example the first column.\n ind <- match(c('Date', as.character(example[1])), colnames(data_output))\n # when use cbind, to ensure the output is also a dataframe, one inside cbind should be dataframe\n # Even output is alread a dataframe, but when ind is a single number, then output[ind] will\n # not be a dataframe, but an array.\n data_output <- cbind(data.frame(data_output[ind]), data_output[-ind])\n ex_date <- seq(from = example[1] - buffer, to = example[2], by = 1)\n data_output$Date <- ex_date\n colnames(data_output)[2] <- 'Observation'\n \n meanV <- apply(data_output[, 2:ncol(data_output)], MARGIN = 1, FUN = mean, na.rm = TRUE)\n \n data_output <- cbind(data.frame(Date = data_output[, 1]), Mean = meanV, \n data_output[, 2:ncol(data_output)])\n \n data_ggplot <- melt(data_output, id.var = 'Date')\n NAIndex <- is.na(data_ggplot$value)\n data_ggplot$nav <- rep(0, nrow(data_ggplot))\n data_ggplot$nav[NAIndex] <- 1\n \n if (plot == 'norm') {\n data_ggplot$value[NAIndex] <- mv\n \n } else if (plot == 'cum') {\n data_output[is.na(data_output)] <- mv\n cum <- cbind(data.frame(Date = data_output$Date), cumsum(data_output[2:ncol(data_output)]))\n \n data_ggplot <- melt(cum, id.var = 'Date')\n } else {\n stop('plot can only be \"norm\" or \"cum\", do not assign other words')\n }\n \n #generate different colors \n colors = c('brown1', 'dodgerblue3', rainbow(n = length(unique(data_ggplot$variable)) - 2,\n start = 0.1))\n \n theme_set(theme_bw())\n mainLayer <- with(data_ggplot, {\n ggplot(data = data_ggplot) +\n aes(x = Date, y = value, color = variable, group = variable) +\n geom_line(size = 0.5) +\n geom_line(data = data_ggplot[data_ggplot$variable == 'Observation', ], size = 1.6) +\n geom_line(data = data_ggplot[data_ggplot$variable == 'Mean', ], size = 1.6) +\n geom_point(data = data_ggplot[NAIndex, ], size = 3, shape = 4, color = 'black') +\n scale_color_manual(values = colors) +\n labs(empty = NULL, ...) +#in order to pass \"...\", arguments shouldn't be empty.\n theme(axis.text.x = element_text(size = rel(1.8)),\n axis.text.y = element_text(size = rel(1.8)),\n axis.title.x = element_text(size = rel(1.8)),\n axis.title.y = element_text(size = rel(1.8)))\n })\n print(mainLayer)\n \n if (output == 'ggplot') {\n if (is.null(name)) stop('\"name\" argument not found, \n If you choose \"ggplot\" as output, please assign a name.')\n data_ggplot$name <- rep(name, nrow(data_ggplot)) \n data_ggplot$nav <- rep(0, nrow(data_ggplot))\n data_ggplot$nav[NAIndex] <- 1\n\n return(data_ggplot)\n } else {\n return(data_output)\n }\n }\n}\n\n\n\n\n\n\n#' Extract time series from forecasting data.\n#' \n#' getFrcEnsem extract timeseries from forecasting data, if forecasting data has a member session\n#' an ensemble time sereis will be returned, if forecasting data doesn't have a member session, a singe time\n#' series will be returned.\n#' \n#' @param dataset A list containing different information, should be the result of reading netcdf file using\n#' \\code{library(ecomsUDG.Raccess)}.\n#' @param cell A vector containing the locaton of the cell, e.g. c(2, 3), default is \"mean\", representing\n#' the spatially averaged value. Check details for more information.\n#' @param plot A string showing whether the plot will be shown, e.g., 'norm' means normal plot (without any process), \n#' 'cum' means cummulative plot, default is 'norm'. For other words there will be no plot.\n#' @param output A string showing which type of output you want. Default is \"data\", if \"ggplot\", the \n#' data that can be directly plotted by ggplot2 will be returned, which is easier for you to make series\n#' plots afterwards. NOTE: If \\code{output = 'ggplot'}, the missing value in the data will\n#' be replaced by \\code{mv}, if assigned, default mv is 0.\n#' @param name If \\code{output = 'ggplot'}, name has to be assigned to your output, in order to differentiate\n#' different outputs in the later multiplot using \\code{getEnsem_comb}.\n#' @param mv A number showing representing the missing value. When calculating the cumulative value, \n#' missing value will be replaced by mv, default is 0.\n#' @param coord A coordinate of longitude and latitude. e.g. corrd = c(lon, lat). If coord is assigned,\n#' cell argument will no longer be used.\n#' @param ... \\code{title, x, y} showing the title and x and y axis of the plot. e.g. \\code{title = 'aaa'}\n#' \n#' @details \n#' \n#' \\code{cell} representing the location of the cell, NOTE: this location means the index of the cell,\n#' IT IS NOT THE LONGITUDE AND LATITUDE. e.g., \\code{cell = c(2, 3)}, the program will take the 2nd longitude\n#' and 3rd latitude, by the increasing order. Longitude comes first.\n#' \n#' \\code{name}\n#' Assuming you have two ggplot outputs, you want to plot them together. In this situation, you\n#' need a name column to differentiate one ggplot output from the other. You can assigne this name\n#' by the argument directly, If name is not assigned and \\code{output = 'ggplot'} is selected, then\n#' the system time will be selected as name column.\n#' \n#' @examples \n#' \n#' filePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n\n#' # Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\n#' varname <- getNcdfVar(filePath)\n#' nc <- loadNcdf(filePath, varname)\n#' a <- getFrcEnsem(nc)\n#' \n#' # If there is no member session in the dataset, a single time sereis will be extracted.\n#' a1 <- getFrcEnsem(tgridData)\n#' \n#' \n#' # The default output is spatially averaged, if there are more than one cells in the dataset, \n#' # the mean value of the cells will be calculated. While if you are interested in special cell, \n#' # you can assign the cell value. You can also directly use longitude and latitude to extract \n#' # time series.\n#' \n#' getSpatialMap(nc, 'mean')\n#' a <- getFrcEnsem(nc, cell = c(6,2))\n#' \n#' # From the map, cell = c(6, 2) means lon = -1.4, lat = 43.2, so you can use corrd to locate\n#' # your research area and extract time series.\n#' b <- getFrcEnsem(nc, coord = c(-1.4, 43.2))\n#' \n#' \n#' @return A ensemble time series extracted from forecating data.\n#' \n#' @import ggplot2\n#' @importFrom reshape2 melt\n#' @references \n#' H. Wickham. ggplot2: elegant graphics for data analysis. Springer New York, 2009.\n#' \n#' Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software,\n#' 21(12), 1-20. URL http://www.jstatsoft.org/v21/i12/.\n#' \n#' @export\ngetFrcEnsem <- function(dataset, cell = 'mean', plot = 'norm', output = 'data', name = NULL,\n mv = 0, coord = NULL, ...) {\n # cell should be a vector showing the location, or mean representing the loacation averaged.\n \n checkWord <- c('Data', 'xyCoords', 'Dates')\n if (any(is.na(match(checkWord, attributes(dataset)$names)))) {\n stop('Input dataset is incorrect, it should contain \"Data\", \"xyCoords\", and \"Dates\", \n check help for details.')\n }\n \n Date <- as.Date(dataset$Dates$start)\n data <- dataset$Data\n \n # Dimension needs to be arranged. Make sure first and second dimension is lat and lon.\n att <- attributes(data)$dimensions\n dimIndex <- seq(1, length(att))\n dimIndex1 <- match(c('lon', 'lat', 'time'), att)# match can apply to simple cases\n dimIndex2 <- dimIndex[-dimIndex1]# choose nomatch\n \n data <- aperm(data, c(dimIndex1, dimIndex2))\n attributes(data)$dimensions <- att[c(dimIndex1, dimIndex2)]\n \n if (!is.null(coord)) {\n if (length(coord) == 2) {\n # corrdinates\n lonC <- coord[1]\n latC <- coord[2]\n \n lon <- dataset$xyCoords$x\n lat <- dataset$xyCoords$y\n \n # Index\n lonI <- which(abs(lon - lonC) == min(abs(lon - lonC), na.rm = TRUE)) \n latI <- which(abs(lat - latC) == min(abs(lat - latC), na.rm = TRUE))\n \n cell <- c(max(lonI), max(latI))\n \n } else stop('Wrong coord input, should be c(lon, lat). Lon and lat should be within the dataset range.')\n } \n \n \n \n \n if (!any(attributes(data)$dimensions == 'member')){\n message('There is no member part in the dataset, there will be only one column of value\n returned.')\n \n if (length(cell) == 2) {\n data_ensem <- data[cell[1], cell[2], ]\n \n } else if (cell == 'mean') {\n data_ensem <- apply(data, MARGIN = 3, FUN = mean, na.rm = TRUE)\n # colnames <- 1:ncol(data_ensem)\n \n } else {\n stop('Wrong cell input, check help for information.')\n }\n \n } else {\n \n if (length(cell) == 2) {\n data_ensem <- data[cell[1], cell[2], , ]\n meanV <- apply(data_ensem, MARGIN = 1, FUN = mean, na.rm = TRUE)\n data_ensem <- data.frame('Mean' = meanV, data_ensem) \n \n } else if (cell == 'mean') {\n data_ensem <- apply(data, MARGIN = c(3, 4), FUN = mean, na.rm = TRUE)\n # colnames <- 1:ncol(data_ensem)\n meanV <- apply(data_ensem, MARGIN = 1, FUN = mean, na.rm = TRUE)\n data_ensem <- data.frame('Mean' = meanV, data_ensem)\n \n } else {\n stop('Wrong cell input, check help for information.')\n }\n }\n\n \n data_output <- data.frame(Date, data_ensem)\n data_ggplot <- melt(data_output, id.var = 'Date')\n NAIndex <- is.na(data_ggplot$value)\n \n \n if (plot == 'norm') {\n data_ggplot$value[NAIndex] <- mv\n } else if (plot == 'cum') {\n data_output[is.na(data_output)] <- mv\n cum <- cbind(data.frame(Date = data_output$Date), cumsum(data_output[2:ncol(data_output)]))\n \n data_ggplot <- melt(cum, id.var = 'Date')\n \n }\n \n colors = c('brown1', rainbow(n = length(unique(data_ggplot$variable)) - 1,\n start = 0.1))\n \n theme_set(theme_bw())\n mainLayer <- with(data_ggplot, {\n ggplot(data = data_ggplot) +\n aes(x = Date, y = value, color = variable) +\n geom_line(size = 0.5) +\n geom_line(data = data_ggplot[data_ggplot$variable == 'Mean', ], size = 1.6, color = 'red') +\n geom_point(data = data_ggplot[NAIndex, ], size = 2, shape = 4, color = 'black') +\n scale_color_manual(values = colors) +\n theme(axis.text.x = element_text(size = rel(1.8)),\n axis.text.y = element_text(size = rel(1.8)),\n axis.title.x = element_text(size = rel(1.8)),\n axis.title.y = element_text(size = rel(1.8))) +\n labs(empty = NULL, ...)#in order to pass \"...\", arguments shouldn't be empty.\n \n })\n print(mainLayer)\n \n if (output == 'ggplot') {\n if (is.null(name)) stop('\"name\" argument not found, \n If you choose \"ggplot\" as output, please assign a name.')\n \n data_ggplot$name <- rep(name, nrow(data_ggplot)) \n data_ggplot$nav <- rep(0, nrow(data_ggplot))\n data_ggplot$nav[NAIndex] <- 1\n return(data_ggplot)\n } else {\n return(data_output)\n }\n}\n\n\n\n#' Combine ensembles together\n#' @param ... different ensembles generated by \\code{getHisEnsem(, output = 'ggplot')} \n#' or \\code{getFrcEnsem(, output = 'ggplot')}, see details.\n#' @param nrow A number showing the number of rows.\n#' @param list If input is a list containing different ggplot data, use \\code{list = inputlist}.\n#' @param legend A boolean representing whether you want the legend. Sometimes when you combine\n#' plots, there will be a lot of legends, if you don't like it, you can turn it off by setting\n#' \\code{legend = FALSE}, default is TRUE.\n#' @param x A string of x axis name.\n#' @param y A string of y axis name.\n#' @param title A string of the title.\n#' @param output A boolean, if chosen TRUE, the output will be given.\n#' @return A combined ensemble plot.\n#' @export\n#' @import ggplot2\n#' @references \n#' H. Wickham. ggplot2: elegant graphics for data analysis. Springer New York, 2009.\n\ngetEnsem_comb <- function(..., list = NULL, nrow = 1, legend = TRUE, x = '', y = '', title = '', \n output = FALSE) {\n \n if (!is.null(list)) {\n data_ggplot <- do.call('rbind', list)\n } else {\n plots <- list(...)\n checkBind(plots, 'rbind')\n data_ggplot <- do.call('rbind', plots)\n } \n #data_ggplot$name <- factor(data_ggplot$name, levels = data_ggplot$name, ordered = TRUE)\n \n if (!class(data_ggplot) == 'data.frame') {\n warning('Your input is probably a list, but you forget to add \"list = \" before it.\n Try again, or check help for more information.')\n } else if (is.null(data_ggplot$name)) {\n stop('No \"Name\" column in the input data, check the arguments in getFreEnsem() or getHisEnsem(), if \n output = \"ggplot\" is assigned, more info please check ?getFreEnsem() or ?getHisEnsem().')\n }\n \n colors = c('brown1', 'dodgerblue3', rainbow(n = length(unique(data_ggplot$variable)) - 2,\n start = 0.1))\n \n theme_set(theme_bw())\n mainLayer <- with(data_ggplot, {\n ggplot(data = data_ggplot) +\n aes(x = Date, y = value, color = variable) +\n geom_line(size = 0.5) +\n geom_line(data = data_ggplot[data_ggplot$variable == 'Mean', ], size = 1.6) +\n geom_line(data = data_ggplot[data_ggplot$variable == 'Observation', ], size = 1.6) +\n geom_point(data = data_ggplot[data_ggplot$nav == 1, ], size = 2, shape = 4, color = 'black') +\n scale_color_manual(values = colors) +\n theme(axis.text.x = element_text(size = rel(1.8)),\n axis.text.y = element_text(size = rel(1.8)),\n axis.title.x = element_text(size = rel(1.8)),\n axis.title.y = element_text(size = rel(1.8))) +\n facet_wrap( ~ name, nrow = nrow) +\n labs(x = x, y = y, title = title)\n \n })\n if (legend == FALSE) {\n mainLayer <- mainLayer + \n theme(legend.position = 'none')\n# following ones are to add label, may be added in future.\n# geom_text(data = data_ggplot[data_ggplot$Date == '2003-12-10', ], aes(label = variable), hjust = 0.7, vjust = 1)\n# geom_text(data = data_ggplot[data_ggplot$variable == 'Mean', ], aes(label = variable), hjust = 0.7, vjust = 1)\n }\n \n \n print(mainLayer)\n \n if (output == TRUE) return(data_ggplot)\n \n}", + "created" : 1441271740923.000, + "dirty" : false, + "encoding" : "ASCII", + "folds" : "", + "hash" : "1081962038", + "id" : "51993C6D", + "lastKnownWriteTime" : 1441615395, + "path" : "~/hyfo/R/getEnsemble.R", + "project_path" : "R/getEnsemble.R", + "properties" : { + "tempName" : "Untitled1" + }, + "relative_order" : 5, + "source_on_save" : false, + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/579C533F b/.Rproj.user/132DF987/sdb/per/t/579C533F new file mode 100644 index 0000000..5976ecf --- /dev/null +++ b/.Rproj.user/132DF987/sdb/per/t/579C533F @@ -0,0 +1,18 @@ +{ + "contents" : "\n\n#' Biascorrect the input timeseries or hyfo dataset\n#' \n#' Biascorrect the input time series or dataset, the input time series or dataset should consist of observation, hindcast, and forecast.\n#' observation and hindcast should belong to the same period, in order to calibrate. Then the modified forecast\n#' will be returned. If the input is a time series, first column should be date column and rest columns should be \n#' the value column. If the input is a hyfo dataset, the dataset should be the result of \\code{loadNcdf}, or a list\n#' file with the same format.\n#' \n#' @param frc a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, \n#' representing the forecast to be calibrated.\n#' @param hindcast a hyfo grid data output or a dataframe(time series) consists of Date column and one or more value columns, \n#' representing the hindcast data. This data will be used in the calibration of the forecast, so it's better to have the same date period as\n#' observation data. Check details for more information.\n#' @param obs a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, \n#' representing the observation data.\n#' @param method bias correct method, including 'delta', 'scaling'...\n#' @param scaleType only when the method \"scaling\" is chosen, scaleType will be available. Two different types\n#' of scaling method, 'add' and 'mult', which means additive and multiplicative scaling method. More info check \n#' details.\n#' @param input If input is a time series, \\code{input = 'TS'} needs to be assigned, or hyfo will take it as \n#' an hyfo output grid file. Default is time series input, where in most of the cases we prefer. If your input\n#' is a hyfo output file, \\code{input = 'hyfo'}.\n#' @param preci If the precipitation is biascorrected, then you have to assign \\code{preci = TRUE}. Since for\n#' precipitation, some biascorrect methods may not apply to, or some methods are specially for precipitation. \n#' Default is FALSE.\n#' @details \n#' \n#' Since climate forecast is based on global condition, when downscaling to different regions, it may include\n#' some bias, biascorrection is used then to fix the bias.\n#' \n#' \\strong{Hindcast}\n#' \n#' In order to bias correct, we need to pick up some\n#' data from the forecast to train with the observation, which is called hindcast in this function. Hindcast\n#' should have \\strong{EVERY} attributes that forecast has.\n#' \n#' Hindcast is also called re-forecast, is the forecast of the past. E.g. you have a forecast from year 2000-2010, assuming now you are in 2005. So from 2000-2005, this period\n#' is the hindcast period, and 2005-2010, this period is the forecast period.\n#'\n#'\n#' \\strong{How it works}\n#' \n#' Forecast product has to be calibrated, usually the system is doing forecast in real time. So, e.g., if the \n#' forecast starts from year 2000, assuming you are in year 2003, then you will have 3 years' hindcast \n#' data (year 2000 - 2003), which can be used to calibrate. And your forecast period is (2003-2004)\n#' \n#' E.g. you have observation from 2001 - 2002, this is your input obs. Then you can take the same \n#' period (2001-2002) from the forecast, which is the hindcast period. For forecast, you can take any period.\n#' The program will evaluate the obs and hindcast, to get the modification of the forecast, and then add the \n#' modification to the forecast data.\n#' \n#' \\strong{method}\n#' \n#' Different methods used in the bias correction.\n#' \n#' \\strong{delta}\n#' \n#' This method consists on adding to the observations the mean change signal (delta method). \n#' This method is applicable to any kind of variable but it is preferable to avoid it for bounded variables\n#' (e.g. precipitation, wind speed, etc.) because values out of the variable range could be obtained \n#' (e.g. negative wind speeds...)\n#' \n#' \\strong{scaling}\n#' \n#' This method consists on scaling the simulation with the difference (additive) or quotient (multiplicative) \n#' between the observed and simulated means in the train period. The \\code{additive} or \\code{multiplicative}\n#' correction is defined by parameter \\code{scaling.type} (default is \\code{additive}).\n#' The additive version is preferably applicable to unbounded variables (e.g. temperature) \n#' and the multiplicative to variables with a lower bound (e.g. precipitation, because it also preserves the frequency). \n#' \n#' \n#' @examples \n#' \n#' # Use testdl as an example, we take frc, hindcast and obs fro testdl.\n#' data(testdl)\n#' \n#' # common period has to be extracted in order to better train the forecast.\n#' \n#' datalist <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1')\n#' \n#' frc <- datalist[[1]]\n#' hindcast <- datalist[[2]]\n#' obs <- datalist[[3]]\n#' \n#' # default method is delta\n#' frc_new <- biasCorrect(frc, hindcast, obs)\n#' \n#' # If the variable is precipitation, it cannot be negative value, so use multi scale method\n#' frc_new <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi')\n#' \n#' @references \n#' Bias correction methods come from \\code{biasCorrection} from \\code{dowscaleR}\n#' \n#' Santander Meteorology Group (2015). downscaleR: Climate data manipulation and statistical downscaling. R\n#' package version 0.6-0. https://github.com/SantanderMetGroup/downscaleR/wiki\n#' @export\n\nbiasCorrect <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', input = 'TS', preci = FALSE){\n \n if (input == 'TS') {\n # First check if the first column is Date\n if (!grepl('-|/', obs[1, 1]) | !grepl('-|/', hindcast[1, 1]) | !grepl('-|/', frc[1, 1])) {\n stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} \n and use as.Date to convert.If your input is a hyfo dataset, put input = \"hyfo\" as an\n argument, check help for more info.')\n } \n \n \n # change to date type is easier, but in case in future the flood part is added, Date type doesn't have\n # hour, min and sec, so, it's better to convert it into POSIxlt.\n \n # if condition only accepts one condition, for list comparison, there are a lot of conditions, better\n # further process it, like using any.\n if (any(as.POSIXlt(hindcast[, 1]) != as.POSIXlt(obs[, 1]))) {\n warning('time of obs and time of hindcast are not the same, which may cause inaccuracy in \n the calibration.')\n }\n \n if (ncol(frc) == 2) {\n frc_data <- biasCorrect_core(frc[, 2], hindcast[, 2], obs[, 2], method = method, \n scaleType = scaleType, preci = preci)\n } else if (ncol(frc) > 2) {\n # In this case more than one value columns exist in the dataset, both frc and hindcast.\n \n n <- ncol(frc)\n \n # For every column, it's biascorrected respectively.\n frc_data <- lapply(2:n, function(x) biasCorrect_core(frc[, x], hindcast[, x], obs[, 2], method = method,\n scaleType = scaleType, preci = preci))\n frc_data <- do.call('cbind', frc_data)\n \n } else stop('Wrong TS input, check your TS dimension.')\n \n \n } else if (input == 'hyfo') {\n print('Under development...')\n }\n\n names <- colnames(frc)\n frc <- data.frame(frc[, 1], frc_data)\n colnames(frc) <- names\n \n return(frc)\n}\n\n\n# this is only used to calculate the value column, \nbiasCorrect_core <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', preci = FALSE){\n \n\n # default is the simplest method in biascorrection, just do simple addition and subtraction.\n if (method == 'delta') {\n # comes from downscaleR biascorrection method\n frcMean <- mean(obs, na.rm = TRUE)\n hindcastMean <- mean(hindcast, na.rm = TRUE)\n frc <- obs - hindcastMean + frcMean\n \n } else if (method == 'scaling') {\n obsMean <- mean(obs, na.rm = TRUE)\n hindcastMean <- mean(hindcast, na.rm = TRUE)\n \n if (scaleType == 'multi') {\n frc <- frc / hindcastMean * obsMean\n \n } else if (scaleType == 'add') {\n frc <- frc - hindcastMean + obsMean\n }\n \n \n } else if (method == 'eqm') {\n \n # To be added, right now too complicated and not so much use.\n \n }\n \n \n return(frc)\n}", + "created" : 1440499519868.000, + "dirty" : false, + "encoding" : "ASCII", + "folds" : "", + "hash" : "2108272061", + "id" : "579C533F", + "lastKnownWriteTime" : 1441616505, + "path" : "~/hyfo/R/biasCorrect.R", + "project_path" : "R/biasCorrect.R", + "properties" : { + "tempName" : "Untitled1" + }, + "relative_order" : 1, + "source_on_save" : false, + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/606BD0E b/.Rproj.user/132DF987/sdb/per/t/606BD0E new file mode 100644 index 0000000..e6832fe --- /dev/null +++ b/.Rproj.user/132DF987/sdb/per/t/606BD0E @@ -0,0 +1,17 @@ +{ + "contents" : "% Generated by roxygen2 (4.1.0.9001): do not edit by hand\n% Please edit documentation in R/biasCorrect.R\n\\name{biasCorrect}\n\\alias{biasCorrect}\n\\title{Biascorrect the input timeseries or hyfo dataset}\n\\usage{\nbiasCorrect(frc, hindcast, obs, method = \"delta\", scaleType = \"multi\",\n input = \"TS\", preci = FALSE)\n}\n\\arguments{\n\\item{frc}{a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns,\nrepresenting the forecast to be calibrated.}\n\n\\item{hindcast}{a hyfo grid data output or a dataframe(time series) consists of Date column and one or more value columns,\nrepresenting the hindcast data. This data will be used in the calibration of the forecast, so it's better to have the same date period as\nobservation data. Check details for more information.}\n\n\\item{obs}{a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns,\nrepresenting the observation data.}\n\n\\item{method}{bias correct method, including 'delta', 'scaling'...}\n\n\\item{scaleType}{only when the method \"scaling\" is chosen, scaleType will be available. Two different types\nof scaling method, 'add' and 'mult', which means additive and multiplicative scaling method. More info check\ndetails.}\n\n\\item{input}{If input is a time series, \\code{input = 'TS'} needs to be assigned, or hyfo will take it as\nan hyfo output grid file. Default is time series input, where in most of the cases we prefer. If your input\nis a hyfo output file, \\code{input = 'hyfo'}.}\n\n\\item{preci}{If the precipitation is biascorrected, then you have to assign \\code{preci = TRUE}. Since for\nprecipitation, some biascorrect methods may not apply to, or some methods are specially for precipitation.\nDefault is FALSE.}\n}\n\\description{\nBiascorrect the input time series or dataset, the input time series or dataset should consist of observation, hindcast, and forecast.\nobservation and hindcast should belong to the same period, in order to calibrate. Then the modified forecast\nwill be returned. If the input is a time series, first column should be date column and rest columns should be\nthe value column. If the input is a hyfo dataset, the dataset should be the result of \\code{loadNcdf}, or a list\nfile with the same format.\n}\n\\details{\nSince climate forecast is based on global condition, when downscaling to different regions, it may include\nsome bias, biascorrection is used then to fix the bias.\n\n\\strong{Hindcast}\n\nIn order to bias correct, we need to pick up some\ndata from the forecast to train with the observation, which is called hindcast in this function. Hindcast\nshould have \\strong{EVERY} attributes that forecast has.\n\nHindcast is also called re-forecast, is the forecast of the past. E.g. you have a forecast from year 2000-2010, assuming now you are in 2005. So from 2000-2005, this period\nis the hindcast period, and 2005-2010, this period is the forecast period.\n\n\n\\strong{How it works}\n\nForecast product has to be calibrated, usually the system is doing forecast in real time. So, e.g., if the\nforecast starts from year 2000, assuming you are in year 2003, then you will have 3 years' hindcast\ndata (year 2000 - 2003), which can be used to calibrate. And your forecast period is (2003-2004)\n\nE.g. you have observation from 2001 - 2002, this is your input obs. Then you can take the same\nperiod (2001-2002) from the forecast, which is the hindcast period. For forecast, you can take any period.\nThe program will evaluate the obs and hindcast, to get the modification of the forecast, and then add the\nmodification to the forecast data.\n\n\\strong{method}\n\nDifferent methods used in the bias correction.\n\n\\strong{delta}\n\nThis method consists on adding to the observations the mean change signal (delta method).\nThis method is applicable to any kind of variable but it is preferable to avoid it for bounded variables\n (e.g. precipitation, wind speed, etc.) because values out of the variable range could be obtained\n (e.g. negative wind speeds...)\n\n \\strong{scaling}\n\nThis method consists on scaling the simulation with the difference (additive) or quotient (multiplicative)\nbetween the observed and simulated means in the train period. The \\code{additive} or \\code{multiplicative}\ncorrection is defined by parameter \\code{scaling.type} (default is \\code{additive}).\nThe additive version is preferably applicable to unbounded variables (e.g. temperature)\nand the multiplicative to variables with a lower bound (e.g. precipitation, because it also preserves the frequency).\n}\n\\examples{\n# Use testdl as an example, we take frc, hindcast and obs fro testdl.\ndata(testdl)\n\n# common period has to be extracted in order to better train the forecast.\n\ndatalist <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1')\n\nfrc <- datalist[[1]]\nhindcast <- datalist[[2]]\nobs <- datalist[[3]]\n\n# default method is delta\nfrc_new <- biasCorrect(frc, hindcast, obs)\n\n# If the variable is precipitation, it cannot be negative value, so use multi scale method\nfrc_new <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi')\n}\n\\references{\nBias correction methods come from \\code{biasCorrection} from \\code{dowscaleR}\n\nSantander Meteorology Group (2015). downscaleR: Climate data manipulation and statistical downscaling. R\npackage version 0.6-0. https://github.com/SantanderMetGroup/downscaleR/wiki\n}\n\n", + "created" : 1441615447496.000, + "dirty" : false, + "encoding" : "ASCII", + "folds" : "", + "hash" : "1599471990", + "id" : "606BD0E", + "lastKnownWriteTime" : 1441616512, + "path" : "~/hyfo/man/biasCorrect.Rd", + "project_path" : "man/biasCorrect.Rd", + "properties" : { + }, + "relative_order" : 6, + "source_on_save" : false, + "type" : "r_doc" +} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/A9B56DE9 b/.Rproj.user/132DF987/sdb/per/t/A9B56DE9 index 5a6be56..0fc932a 100644 --- a/.Rproj.user/132DF987/sdb/per/t/A9B56DE9 +++ b/.Rproj.user/132DF987/sdb/per/t/A9B56DE9 @@ -6,7 +6,7 @@ "folds" : "", "hash" : "297121889", "id" : "A9B56DE9", - "lastKnownWriteTime" : 1441195685, + "lastKnownWriteTime" : 1441354441, "path" : "~/hyfo/vignettes/hyfo.Rmd", "project_path" : "vignettes/hyfo.Rmd", "properties" : { diff --git a/.Rproj.user/132DF987/sdb/per/t/E7F036D3 b/.Rproj.user/132DF987/sdb/per/t/E7F036D3 new file mode 100644 index 0000000..18a6381 --- /dev/null +++ b/.Rproj.user/132DF987/sdb/per/t/E7F036D3 @@ -0,0 +1,17 @@ +{ + "contents" : "Package: hyfo\nType: Package\nTitle: Hydrology and Climate Forecasting R Package for Data Analysis and Visualization\nVersion: 1.1.7\nDate: 2015-07-02\nAuthors@R: person(\"Yuanchao\", \"Xu\", email = \"xuyuanchao37@gmail.com\",\n role = c(\"aut\", \"cre\"))\nDescription: This package can be used as a tool for hydrology and climate forecasting. \n There are several tools including data processing, data visualization and data analysis. \n For hydrological and hydraulic modellers, hyfo can be a good pre-processing and post-processing \n tool for you.\n hyfo has been tested stable on windows platform.\nLicense: GPL-2\nDepends: R (>= 3.1.0), stats (>= 3.1.3), utils(>= 3.1.3)\nImports: ggplot2 (>= 1.0.1),\n reshape2 (>= 1.4.1),\n zoo (>= 1.7-12),\n rgdal (>= 0.9-3),\n plyr (>= 1.8.3),\n moments (>= 0.14),\n lmom (>= 2.5),\n maps(>= 2.3-9),\n maptools (>= 0.8-36),\n rgeos (>= 0.3-8),\n ncdf (>= 1.6.8)\nSuggests: gridExtra,\n knitr\nVignetteBuilder: knitr\nLazyData: true\nURL: http://yuanchao-xu.github.io/hyfo/\nBugReports: https://github.com/Yuanchao-Xu/hyfo/issues\nrepository: github", + "created" : 1441616319813.000, + "dirty" : false, + "encoding" : "ASCII", + "folds" : "", + "hash" : "712805123", + "id" : "E7F036D3", + "lastKnownWriteTime" : 1441616324, + "path" : "~/hyfo/DESCRIPTION", + "project_path" : "DESCRIPTION", + "properties" : { + }, + "relative_order" : 7, + "source_on_save" : false, + "type" : "dcf" +} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/t/EAF9DB09 b/.Rproj.user/132DF987/sdb/per/t/EAF9DB09 new file mode 100644 index 0000000..cd7b29d --- /dev/null +++ b/.Rproj.user/132DF987/sdb/per/t/EAF9DB09 @@ -0,0 +1,18 @@ +{ + "contents" : "#' Get variable name of the NetCDF file.\n#' \n#' Get variable name in the NetCDF file. After knowning the name, you can use \\code{loadNcdf} to load\n#' the target variable.\n#' \n#' @param filePath A path pointing to the netCDF file.\n#' @return The names of the varialbes in the file.\n#' @examples \n#' # First open the test NETcDF file.\n#' filePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n#' \n#' # Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\n#' varname <- getNcdfVar(filePath)\n#' \n#' @import ncdf\n#' @references \n#' David Pierce (2014). ncdf: Interface to Unidata netCDF data files. R package version 1.6.8.\n#' http://CRAN.R-project.org/package=ncdf\n#' \n#' \n#' @export\ngetNcdfVar <- function(filePath) {\n nc <- open.ncdf(filePath)\n names <- names(nc$var)\n return(names)\n}\n\n\n#' Load NetCDF file\n#' \n#' @param filePath A path pointing to the NetCDF file, version3.\n#' @param varname A character representing the variable name, you can use \\code{getNcdfVar} to\n#' get the basic information about the variables and select the target.\n#' @param tz A string representing the time zone, default is GMT, if you know what time zone is \n#' you can assign it in the argument. If \\code{tz = ''}, current time zone will be taken.\n#' @param ... Arguments will be passed to \\code{downscaleNcdf}\n#' @return A list object from \\code{hyfo} containing the information to be used in the analysis, \n#' or biascorrection.\n#' @examples \n#' # First open the test NETcDF file.\n#' filePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n#' \n#' # Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\n#' varname <- getNcdfVar(filePath)\n#' \n#' nc <- loadNcdf(filePath, varname)\n#' \n#' @export\n#' @import ncdf\n#' @references \n#' David Pierce (2014). ncdf: Interface to Unidata netCDF data files. R package version 1.6.8.\n#' http://CRAN.R-project.org/package=ncdf\n#' \n#' file structure refers to\n#' \n#' Santander MetGroup (2015). ecomsUDG.Raccess: R interface to the ECOMS User Data Gateway. R package\n#' version 2.2-6. http://meteo.unican.es/ecoms-udg\n#' \nloadNcdf <- function(filePath, varname, tz = 'GMT', ...) {\n nc <- open.ncdf(filePath)\n \n var <- nc$var\n # Use name to locate the variable\n call_1 <- as.call(c(\n list(as.name('$'), var, varname)\n ))\n var <- eval(call_1)\n if(is.null(var)) stop('No such variable name, check source file.')\n \n # First needs to identify the variable name, load the right data\n message('Loading data...')\n nc_data <- get.var.ncdf(nc, var)\n message('Processing...')\n \n dimNames <- unlist(lapply(1:length(var$dim), function(x) var$dim[[x]]$name))\n \n # Only deals with the most common dimensions, futher dimensions will be added in future.\n dimIndex <- match(c('lon', 'lat', 'time', 'member'), dimNames)\n \n gridData <- list()\n gridData$Variable$varName <- varname\n gridData$xyCoords$x <- var$dim[[dimIndex[1]]]$vals\n gridData$xyCoords$y <- var$dim[[dimIndex[2]]]$vals\n \n # Time part needs to be taken seperately\n \n timeUnit <- strsplit(var$dim[[dimIndex[3]]]$units, split = ' since')[[1]][1]\n timeDiff <- var$dim[[dimIndex[3]]]$vals\n # To get real time, time since when has to be grabbed from the dataset.\n timeSince <- as.POSIXlt(strsplit(var$dim[[dimIndex[3]]]$units, split = 'since')[[1]][2], tz = tz)\n \n \n# Date <- rep(timeSince, length(timeDiff))\n \n \n unitDic <- data.frame(weeks = 'weeks', days = 'days', hours = 'hours',\n minutes = 'mins', seconds = 'secs')\n \n timeDiff <- as.difftime(timeDiff, units = as.character(unitDic[1, timeUnit]))\n \n# if (grepl('day', timeUnit)) {\n# Date$mday <- Date$mday + timeDiff\n# } else if (grepl('second', timeUnit)) {\n# Date$sec <- Date$sec + timeDiff\n# }\n Date <- timeSince + timeDiff\n \n if (length(Date) == 1) {\n warning(\"Only one time step is taken, time dimension is dropped in the original data.\n But after loading, the time dimension (with length : 1) will be added.\")\n }\n gridData$Dates$start <- as.character(Date)\n \n # Assing data to grid data\n # At leaset should be 3 dimensions, lon, lat, time. So if less than 3, \n \n if (length(dim(nc_data)) < 3) {\n dim(nc_data) <- c(dim(nc_data), 1) \n message('Time dimension is added, make sure in your original data, only time dimension is dropped.')\n }\n gridData$Data <- nc_data\n attributes(gridData$Data)$dimensions <- dimNames\n \n if (!is.na(dimIndex[4])) gridData$Members <- var$dim[[dimIndex[4]]]$vals\n \n gridData$Loaded <- 'by hyfo package, http://yuanchao-xu.github.io/hyfo/'\n close.ncdf(nc)\n \n output <- downscaleNcdf(gridData, ...)\n \n return(output)\n \n}\n\n\n\n\n#' Downscale NetCDF file\n#' @param gridData A hyfo list file or the list file from \\code{loadECOMS{ecomsUDG.Raccess}}\n#' or \\code{loadGridData{ecomsUDG.Raccess}}\n#' @param year A vector of the target year. e.g. \\code{year = 2000}, \\code{year = 1980:2000}\n#' @param lon A vector of the range of the downscaled longitude, should contain a max value\n#' and a min value. e.g. \\code{lon = c(-1.5, 2,5)}\n#' @param lat A vector of the range of the downscaled latitude, should contain a max value\n#' and a min value. e.g. \\code{lat = c(32,2, 36)}\n#' @return A downscaled hyfo list file.\n#' @examples \n#' # First open the test NETcDF file.\n#' filePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n#' \n#' \n#' # Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\n#' varname <- getNcdfVar(filePath)\n#' \n#' nc <- loadNcdf(filePath, varname)\n#' \n#' # Then write to your work directory\n#' \n#' nc1 <- downscaleNcdf(nc, year = 2006, lon = c(-2, -0.5), lat = c(43.2, 43.7))\n#' \n#' \n#' @export \n#' @references \n#' David Pierce (2014). ncdf: Interface to Unidata netCDF data files. R package version 1.6.8.\n#' http://CRAN.R-project.org/package=ncdf\n#' \n#' file structure refers to\n#' \n#' Santander MetGroup (2015). ecomsUDG.Raccess: R interface to the ECOMS User Data Gateway. R package\n#' version 2.2-6. http://meteo.unican.es/ecoms-udg\n#' \ndownscaleNcdf <- function(gridData, year = NULL, lon = NULL, lat = NULL) {\n \n if (!is.null(year)) {\n Dates <- as.POSIXlt(gridData$Dates$start)\n yearIndex <- Dates$year + 1900\n \n targetYearIndex <- unlist(lapply(year, function(x) which(yearIndex == x)))\n if (length(targetYearIndex) == 0) stop('Check your input year, it may exceed the years \n in the input dataset.')\n gridData$Dates$start <- gridData$Dates$start[targetYearIndex]\n gridData$Dates$end <- gridData$Dates$end[targetYearIndex]\n \n timeDim <- match('time', attributes(gridData$Data)$dimensions)\n \n gridData$Data <- chooseDim(gridData$Data, timeDim, targetYearIndex)\n }\n \n if (!is.null(lon)) {\n \n lonIndex <- gridData$xyCoords$x\n \n lonI1 <- which(abs(lonIndex - min(lon)) == min(abs(lonIndex - min(lon)), na.rm = TRUE)) \n lonI2 <- which(abs(lonIndex - max(lon)) == min(abs(lonIndex - max(lon)), na.rm = TRUE)) \n \n targetLonIndex <- lonI1:lonI2\n if (length(targetLonIndex) == 0) stop('Your input lon is too small, try to expand the \n longitude range.') \n gridData$xyCoords$x <- gridData$xyCoords$x[targetLonIndex]\n lonDim <- match('lon', attributes(gridData$Data)$dimensions)\n \n gridData$Data <- chooseDim(gridData$Data, lonDim, targetLonIndex)\n }\n \n \n if (!is.null(lat)) {\n latIndex <- gridData$xyCoords$y\n \n latI1 <- which(abs(latIndex - min(lat)) == min(abs(latIndex - min(lat)), na.rm = TRUE)) \n latI2 <- which(abs(latIndex - max(lat)) == min(abs(latIndex - max(lat)), na.rm = TRUE)) \n \n targetLatIndex <- latI1:latI2\n \n if (length(targetLonIndex) == 0) stop('Your input lat is too small, try to expand the \n latitude range.') \n gridData$xyCoords$y <- gridData$xyCoords$y[targetLatIndex]\n latDim <- match('lat', attributes(gridData$Data)$dimensions)\n gridData$Data <- chooseDim(gridData$Data, latDim, targetLatIndex)\n }\n \n return(gridData)\n \n}\n\n\n\n\n\n\n\n\n\n\n#' Write to NetCDF file using hyfo list file\n#' @param gridData A hyfo list file or the list file from \\code{loadECOMS{ecomsUDG.Raccess}}\n#' or \\code{loadGridData{ecomsUDG.Raccess}}\n#' @param filePath A path of the new NetCDF file, should end with \".nc\"\n#' @param missingValue A number representing the missing value in the NetCDF file, default\n#' is 1e20\n#' #' @param tz A string representing the time zone, default is GMT, if you know what time zone is \n#' you can assign it in the argument. If \\code{tz = ''}, current time zone will be taken.\n#' @param units A string showing in which unit you are putting in the NetCDF file, it can be \n#' seconds or days and so on. If not specified, the function will pick up the possible largest \n#' time units from \\code{c('weeks', 'days', 'hours', 'mins', 'secs')}\n#' @return An NetCDF version 3 file.\n#' @examples \n#' # First open the test NETcDF file.\n#' filePath <- system.file(\"extdata\", \"tnc.nc\", package = \"hyfo\")\n#' \n#' \n#' # Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name\n#' varname <- getNcdfVar(filePath)\n#' \n#' nc <- loadNcdf(filePath, varname)\n#' \n#' # Then write to your work directory\n#' \n#' writeNcdf(nc, 'test.nc')\n#' \n#' \n#' @export \n#' @import ncdf\n#' @references \n#' David Pierce (2014). ncdf: Interface to Unidata netCDF data files. R package version 1.6.8.\n#' http://CRAN.R-project.org/package=ncdf\n#' \n#' file structure refers to\n#' \n#' Santander MetGroup (2015). ecomsUDG.Raccess: R interface to the ECOMS User Data Gateway. R package\n#' version 2.2-6. http://meteo.unican.es/ecoms-udg\n#' \n#' \nwriteNcdf <- function(gridData, filePath, missingValue = 1e20, tz = 'GMT', units = NULL) {\n \n name <- gridData$Variable$varName\n # First defines dimensions.\n dimLon <- dim.def.ncdf('lon', 'degree', gridData$xyCoords$x)\n dimLat <- dim.def.ncdf('lat', 'degree', gridData$xyCoords$y)\n dimMem <- NULL\n if (!is.null(gridData$Members)) {\n dimMem <- dim.def.ncdf('member', 'members', 1:length(gridData$Members))\n }\n \n \n # Time needs to be treated seperately\n dates <- as.POSIXlt(gridData$Dates$start, tz = tz)\n if (is.null(units)) {\n units <- getTimeUnit(dates)\n time <- difftime(dates, dates[1], units = units)\n } else {\n time <- difftime(dates, dates[1], units = units)\n }\n timeUnits <- paste(units, 'since', dates[1])\n dimTime <- dim.def.ncdf('time', timeUnits, time)\n \n \n # Depending on whether there is a member part of the dataset.\n \n dimList <- list(dimLon, dimLat, dimTime, dimMem)\n # delete the NULL list, in order that there is no member part in the data.\n dimList <- Filter(Negate(is.null), dimList)\n # Then difines data\n var <- var.def.ncdf( name, \"units\", dimList, missingValue)\n \n nc <- create.ncdf(filePath, var)\n \n # This part comes from the library downscaleR, can be deleted if you don't \n # use {ecomsUDG.Raccess}, by adding this, the file can be read by the package {ecomsUDG.Raccess}\n att.put.ncdf(nc, \"time\", \"standard_name\",\"time\")\n att.put.ncdf(nc, \"time\", \"axis\",\"T\")\n att.put.ncdf(nc, \"time\", \"_CoordinateAxisType\",\"Time\")\n #att.put.ncdf(nc, \"time\", \"_ChunkSize\",1)\n att.put.ncdf(nc, \"lon\", \"standard_name\",\"longitude\")\n att.put.ncdf(nc, \"lon\", \"_CoordinateAxisType\",\"Lon\")\n att.put.ncdf(nc, \"lat\", \"standard_name\",\"latitude\")\n att.put.ncdf(nc, \"lat\", \"_CoordinateAxisType\",\"Lat\")\n if (!is.null(dimMem)){\n att.put.ncdf(nc, \"member\", \"standard_name\",\"realization\")\n att.put.ncdf(nc, \"member\", \"_CoordinateAxisType\",\"Ensemble\")\n #att.put.ncdf(nc, \"member\", \"ref\",\"http://www.uncertml.org/samples/realisation\")\n }\n \n \n # This part has to be put\n att.put.ncdf(nc, 0, \"Conventions\",\"CF-1.4\")\n att.put.ncdf(nc, 0, 'WrittenBy', 'hyfo(http://yuanchao-xu.github.io/hyfo/)')\n \n dimIndex <- match(c('lon', 'lat', 'time', 'member'), attributes(gridData$Data)$dimensions)\n dimIndex <- na.omit(dimIndex)\n data <- aperm(gridData$Data, dimIndex)\n put.var.ncdf(nc, name, data)\n close.ncdf(nc)\n \n}\n\n# For internaluse by writeNcdf\ngetTimeUnit <- function(dates) {\n units <- c('weeks', 'days', 'hours', 'mins', 'secs')\n output <- NULL\n for (unit in units) {\n time <- difftime(dates, dates[1], units = unit)\n rem <- sapply(time, function(x) x%%1)\n if (!any(rem != 0)) {\n output <- unit\n break\n }\n } \n return(output)\n}\n\n\n# Save for future use. \n#' @import ncdf\n#' @references \n#' David Pierce (2014). ncdf: Interface to Unidata netCDF data files. R package version 1.6.8.\n#' http://CRAN.R-project.org/package=ncdf\ngetExtralDim <- function(...) {\n dimList <- list(...)\n \n \n}", + "created" : 1441265985784.000, + "dirty" : false, + "encoding" : "ASCII", + "folds" : "", + "hash" : "2249661437", + "id" : "EAF9DB09", + "lastKnownWriteTime" : 1440689176, + "path" : "~/hyfo/R/Ncdf_related.R", + "project_path" : "R/Ncdf_related.R", + "properties" : { + "tempName" : "Untitled1" + }, + "relative_order" : 4, + "source_on_save" : false, + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/per/u/579C533F b/.Rproj.user/132DF987/sdb/per/u/579C533F deleted file mode 100644 index 355491b..0000000 --- a/.Rproj.user/132DF987/sdb/per/u/579C533F +++ /dev/null @@ -1,18 +0,0 @@ -{ - "contents" : "", - "created" : 1440499519868.000, - "dirty" : false, - "encoding" : "", - "folds" : "", - "hash" : "0", - "id" : "579C533F", - "lastKnownWriteTime" : 7011605692497750387, - "path" : null, - "project_path" : null, - "properties" : { - "tempName" : "Untitled1" - }, - "relative_order" : 1, - "source_on_save" : false, - "type" : "r_source" -} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/prop/C06E3597 b/.Rproj.user/132DF987/sdb/prop/C06E3597 new file mode 100644 index 0000000..32390ac --- /dev/null +++ b/.Rproj.user/132DF987/sdb/prop/C06E3597 @@ -0,0 +1,3 @@ +{ + "tempName" : "Untitled1" +} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/prop/D54D6462 b/.Rproj.user/132DF987/sdb/prop/D54D6462 new file mode 100644 index 0000000..7a73a41 --- /dev/null +++ b/.Rproj.user/132DF987/sdb/prop/D54D6462 @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/.Rproj.user/132DF987/sdb/prop/INDEX b/.Rproj.user/132DF987/sdb/prop/INDEX index 3a96abf..2b14bd2 100644 --- a/.Rproj.user/132DF987/sdb/prop/INDEX +++ b/.Rproj.user/132DF987/sdb/prop/INDEX @@ -62,6 +62,7 @@ C%3A%2FUsers%2Fxyc%2FDownloads%2Flibrary%2Fncdf4%2FDESCRIPTION="1C2105A3" ~%2Fhyfo%2FR%2F.Rhistory="38F0EA43" ~%2Fhyfo%2FR%2FNcdf_related.R="D698D9D" ~%2Fhyfo%2FR%2FanalyzeTS.R="47CF58C1" +~%2Fhyfo%2FR%2FbiasCorrect.R="C06E3597" ~%2Fhyfo%2FR%2FcheckBind.R="86353E86" ~%2Fhyfo%2FR%2FcollectData.R="95B83DCB" ~%2Fhyfo%2FR%2FcollectData_csv.R="F9B29B00" @@ -84,6 +85,7 @@ C%3A%2FUsers%2Fxyc%2FDownloads%2Flibrary%2Fncdf4%2FDESCRIPTION="1C2105A3" ~%2Fhyfo%2FR%2Fshp2cat.R="69971FFB" ~%2Fhyfo%2FREADME.md="F2D6A8FB" ~%2Fhyfo%2FRprofile="C6D43478" +~%2Fhyfo%2Fman%2FbiasCorrect.Rd="D54D6462" ~%2Fhyfo%2Fman%2FcheckBind.Rd="8F7A1297" ~%2Fhyfo%2Fman%2FcollectData.Rd="6F850361" ~%2Fhyfo%2Fman%2FcollectData_excel_anarbe.Rd="DF686A58" diff --git a/DESCRIPTION b/DESCRIPTION index 92e5f91..1bc6649 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: hyfo Type: Package Title: Hydrology and Climate Forecasting R Package for Data Analysis and Visualization -Version: 1.1.6 +Version: 1.1.7 Date: 2015-07-02 Authors@R: person("Yuanchao", "Xu", email = "xuyuanchao37@gmail.com", role = c("aut", "cre")) diff --git a/NAMESPACE b/NAMESPACE index 49ec8ab..e393a29 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2 (4.1.0.9001): do not edit by hand +export(biasCorrect) export(checkBind) export(collectData) export(collectData_csv_anarbe) diff --git a/R/biasCorrect.R b/R/biasCorrect.R new file mode 100644 index 0000000..d75bd1f --- /dev/null +++ b/R/biasCorrect.R @@ -0,0 +1,180 @@ + + +#' Biascorrect the input timeseries or hyfo dataset +#' +#' Biascorrect the input time series or dataset, the input time series or dataset should consist of observation, hindcast, and forecast. +#' observation and hindcast should belong to the same period, in order to calibrate. Then the modified forecast +#' will be returned. If the input is a time series, first column should be date column and rest columns should be +#' the value column. If the input is a hyfo dataset, the dataset should be the result of \code{loadNcdf}, or a list +#' file with the same format. +#' +#' @param frc a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, +#' representing the forecast to be calibrated. +#' @param hindcast a hyfo grid data output or a dataframe(time series) consists of Date column and one or more value columns, +#' representing the hindcast data. This data will be used in the calibration of the forecast, so it's better to have the same date period as +#' observation data. Check details for more information. +#' @param obs a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, +#' representing the observation data. +#' @param method bias correct method, including 'delta', 'scaling'... +#' @param scaleType only when the method "scaling" is chosen, scaleType will be available. Two different types +#' of scaling method, 'add' and 'mult', which means additive and multiplicative scaling method. More info check +#' details. +#' @param input If input is a time series, \code{input = 'TS'} needs to be assigned, or hyfo will take it as +#' an hyfo output grid file. Default is time series input, where in most of the cases we prefer. If your input +#' is a hyfo output file, \code{input = 'hyfo'}. +#' @param preci If the precipitation is biascorrected, then you have to assign \code{preci = TRUE}. Since for +#' precipitation, some biascorrect methods may not apply to, or some methods are specially for precipitation. +#' Default is FALSE. +#' @details +#' +#' Since climate forecast is based on global condition, when downscaling to different regions, it may include +#' some bias, biascorrection is used then to fix the bias. +#' +#' \strong{Hindcast} +#' +#' In order to bias correct, we need to pick up some +#' data from the forecast to train with the observation, which is called hindcast in this function. Hindcast +#' should have \strong{EVERY} attributes that forecast has. +#' +#' Hindcast is also called re-forecast, is the forecast of the past. E.g. you have a forecast from year 2000-2010, assuming now you are in 2005. So from 2000-2005, this period +#' is the hindcast period, and 2005-2010, this period is the forecast period. +#' +#' +#' \strong{How it works} +#' +#' Forecast product has to be calibrated, usually the system is doing forecast in real time. So, e.g., if the +#' forecast starts from year 2000, assuming you are in year 2003, then you will have 3 years' hindcast +#' data (year 2000 - 2003), which can be used to calibrate. And your forecast period is (2003-2004) +#' +#' E.g. you have observation from 2001 - 2002, this is your input obs. Then you can take the same +#' period (2001-2002) from the forecast, which is the hindcast period. For forecast, you can take any period. +#' The program will evaluate the obs and hindcast, to get the modification of the forecast, and then add the +#' modification to the forecast data. +#' +#' \strong{method} +#' +#' Different methods used in the bias correction. +#' +#' \strong{delta} +#' +#' This method consists on adding to the observations the mean change signal (delta method). +#' This method is applicable to any kind of variable but it is preferable to avoid it for bounded variables +#' (e.g. precipitation, wind speed, etc.) because values out of the variable range could be obtained +#' (e.g. negative wind speeds...) +#' +#' \strong{scaling} +#' +#' This method consists on scaling the simulation with the difference (additive) or quotient (multiplicative) +#' between the observed and simulated means in the train period. The \code{additive} or \code{multiplicative} +#' correction is defined by parameter \code{scaling.type} (default is \code{additive}). +#' The additive version is preferably applicable to unbounded variables (e.g. temperature) +#' and the multiplicative to variables with a lower bound (e.g. precipitation, because it also preserves the frequency). +#' +#' +#' @examples +#' +#' # Use testdl as an example, we take frc, hindcast and obs fro testdl. +#' data(testdl) +#' +#' # common period has to be extracted in order to better train the forecast. +#' +#' datalist <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1') +#' +#' frc <- datalist[[1]] +#' hindcast <- datalist[[2]] +#' obs <- datalist[[3]] +#' +#' # default method is delta +#' frc_new <- biasCorrect(frc, hindcast, obs) +#' +#' # If the variable is precipitation, it cannot be negative value, so use multi scale method +#' frc_new <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi') +#' +#' @references +#' Bias correction methods come from \code{biasCorrection} from \code{dowscaleR} +#' +#' Santander Meteorology Group (2015). downscaleR: Climate data manipulation and statistical downscaling. R +#' package version 0.6-0. https://github.com/SantanderMetGroup/downscaleR/wiki +#' @export + +biasCorrect <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', input = 'TS', preci = FALSE){ + + if (input == 'TS') { + # First check if the first column is Date + if (!grepl('-|/', obs[1, 1]) | !grepl('-|/', hindcast[1, 1]) | !grepl('-|/', frc[1, 1])) { + stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} + and use as.Date to convert.If your input is a hyfo dataset, put input = "hyfo" as an + argument, check help for more info.') + } + + + # change to date type is easier, but in case in future the flood part is added, Date type doesn't have + # hour, min and sec, so, it's better to convert it into POSIxlt. + + # if condition only accepts one condition, for list comparison, there are a lot of conditions, better + # further process it, like using any. + if (any(as.POSIXlt(hindcast[, 1]) != as.POSIXlt(obs[, 1]))) { + warning('time of obs and time of hindcast are not the same, which may cause inaccuracy in + the calibration.') + } + + if (ncol(frc) == 2) { + frc_data <- biasCorrect_core(frc[, 2], hindcast[, 2], obs[, 2], method = method, + scaleType = scaleType, preci = preci) + } else if (ncol(frc) > 2) { + # In this case more than one value columns exist in the dataset, both frc and hindcast. + + n <- ncol(frc) + + # For every column, it's biascorrected respectively. + frc_data <- lapply(2:n, function(x) biasCorrect_core(frc[, x], hindcast[, x], obs[, 2], method = method, + scaleType = scaleType, preci = preci)) + frc_data <- do.call('cbind', frc_data) + + } else stop('Wrong TS input, check your TS dimension.') + + + } else if (input == 'hyfo') { + print('Under development...') + } + + names <- colnames(frc) + frc <- data.frame(frc[, 1], frc_data) + colnames(frc) <- names + + return(frc) +} + + +# this is only used to calculate the value column, +biasCorrect_core <- function(frc, hindcast, obs, method = 'delta', scaleType = 'multi', preci = FALSE){ + + + # default is the simplest method in biascorrection, just do simple addition and subtraction. + if (method == 'delta') { + # comes from downscaleR biascorrection method + frcMean <- mean(obs, na.rm = TRUE) + hindcastMean <- mean(hindcast, na.rm = TRUE) + frc <- obs - hindcastMean + frcMean + + } else if (method == 'scaling') { + obsMean <- mean(obs, na.rm = TRUE) + hindcastMean <- mean(hindcast, na.rm = TRUE) + + if (scaleType == 'multi') { + frc <- frc / hindcastMean * obsMean + + } else if (scaleType == 'add') { + frc <- frc - hindcastMean + obsMean + } + + + } else if (method == 'eqm') { + + # To be added, right now too complicated and not so much use. + + } + + + return(frc) +} \ No newline at end of file diff --git a/R/extractPeriod.R b/R/extractPeriod.R index 4af1a3a..98e3201 100644 --- a/R/extractPeriod.R +++ b/R/extractPeriod.R @@ -84,10 +84,11 @@ extractPeriod <- function(datalist, startDate = NULL, endDate = NULL, commonPeri #' @return The extracted dataframe between \code{startDate} and \code{endDate}. # @export extractPeriod_dataframe <- function(dataframe, startDate, endDate) { + # to check whether first column is a date format if (!grepl('-|/', dataframe[1, 1])) { stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} and use as.Date to convert.') - } + } dataframe[, 1] <- as.Date(dataframe[, 1]) startIndex <- which(dataframe[, 1] == startDate) diff --git a/R/getEnsemble.R b/R/getEnsemble.R index 4801e96..30fb6a8 100644 --- a/R/getEnsemble.R +++ b/R/getEnsemble.R @@ -63,7 +63,7 @@ #' If the end date is so important that cannot be changed, try to solve this problem by resetting #' the example period, to make the event included in the example. #' -#' Good set of example and by can generate good ensemble. +#' Good set of example and interval can generate good ensemble. #' #' \code{buffer} #' Sometimes the model needs to run for a few days to warm up, before the forecast. E.g., if a forecast starts at @@ -296,6 +296,32 @@ getHisEnsem <- function (TS, example, interval = 365, buffer = 0, plot = 'norm', #' by the argument directly, If name is not assigned and \code{output = 'ggplot'} is selected, then #' the system time will be selected as name column. #' +#' @examples +#' +#' filePath <- system.file("extdata", "tnc.nc", package = "hyfo") + +#' # Then if you don't know the variable name, you can use \code{getNcdfVar} to get variable name +#' varname <- getNcdfVar(filePath) +#' nc <- loadNcdf(filePath, varname) +#' a <- getFrcEnsem(nc) +#' +#' # If there is no member session in the dataset, a single time sereis will be extracted. +#' a1 <- getFrcEnsem(tgridData) +#' +#' +#' # The default output is spatially averaged, if there are more than one cells in the dataset, +#' # the mean value of the cells will be calculated. While if you are interested in special cell, +#' # you can assign the cell value. You can also directly use longitude and latitude to extract +#' # time series. +#' +#' getSpatialMap(nc, 'mean') +#' a <- getFrcEnsem(nc, cell = c(6,2)) +#' +#' # From the map, cell = c(6, 2) means lon = -1.4, lat = 43.2, so you can use corrd to locate +#' # your research area and extract time series. +#' b <- getFrcEnsem(nc, coord = c(-1.4, 43.2)) +#' +#' #' @return A ensemble time series extracted from forecating data. #' #' @import ggplot2 diff --git a/man/biasCorrect.Rd b/man/biasCorrect.Rd new file mode 100644 index 0000000..1ab8c93 --- /dev/null +++ b/man/biasCorrect.Rd @@ -0,0 +1,110 @@ +% Generated by roxygen2 (4.1.0.9001): do not edit by hand +% Please edit documentation in R/biasCorrect.R +\name{biasCorrect} +\alias{biasCorrect} +\title{Biascorrect the input timeseries or hyfo dataset} +\usage{ +biasCorrect(frc, hindcast, obs, method = "delta", scaleType = "multi", + input = "TS", preci = FALSE) +} +\arguments{ +\item{frc}{a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, +representing the forecast to be calibrated.} + +\item{hindcast}{a hyfo grid data output or a dataframe(time series) consists of Date column and one or more value columns, +representing the hindcast data. This data will be used in the calibration of the forecast, so it's better to have the same date period as +observation data. Check details for more information.} + +\item{obs}{a hyfo grid data output or a dataframe (time series) consists of Date column and one or more value columns, +representing the observation data.} + +\item{method}{bias correct method, including 'delta', 'scaling'...} + +\item{scaleType}{only when the method "scaling" is chosen, scaleType will be available. Two different types +of scaling method, 'add' and 'mult', which means additive and multiplicative scaling method. More info check +details.} + +\item{input}{If input is a time series, \code{input = 'TS'} needs to be assigned, or hyfo will take it as +an hyfo output grid file. Default is time series input, where in most of the cases we prefer. If your input +is a hyfo output file, \code{input = 'hyfo'}.} + +\item{preci}{If the precipitation is biascorrected, then you have to assign \code{preci = TRUE}. Since for +precipitation, some biascorrect methods may not apply to, or some methods are specially for precipitation. +Default is FALSE.} +} +\description{ +Biascorrect the input time series or dataset, the input time series or dataset should consist of observation, hindcast, and forecast. +observation and hindcast should belong to the same period, in order to calibrate. Then the modified forecast +will be returned. If the input is a time series, first column should be date column and rest columns should be +the value column. If the input is a hyfo dataset, the dataset should be the result of \code{loadNcdf}, or a list +file with the same format. +} +\details{ +Since climate forecast is based on global condition, when downscaling to different regions, it may include +some bias, biascorrection is used then to fix the bias. + +\strong{Hindcast} + +In order to bias correct, we need to pick up some +data from the forecast to train with the observation, which is called hindcast in this function. Hindcast +should have \strong{EVERY} attributes that forecast has. + +Hindcast is also called re-forecast, is the forecast of the past. E.g. you have a forecast from year 2000-2010, assuming now you are in 2005. So from 2000-2005, this period +is the hindcast period, and 2005-2010, this period is the forecast period. + + +\strong{How it works} + +Forecast product has to be calibrated, usually the system is doing forecast in real time. So, e.g., if the +forecast starts from year 2000, assuming you are in year 2003, then you will have 3 years' hindcast +data (year 2000 - 2003), which can be used to calibrate. And your forecast period is (2003-2004) + +E.g. you have observation from 2001 - 2002, this is your input obs. Then you can take the same +period (2001-2002) from the forecast, which is the hindcast period. For forecast, you can take any period. +The program will evaluate the obs and hindcast, to get the modification of the forecast, and then add the +modification to the forecast data. + +\strong{method} + +Different methods used in the bias correction. + +\strong{delta} + +This method consists on adding to the observations the mean change signal (delta method). +This method is applicable to any kind of variable but it is preferable to avoid it for bounded variables + (e.g. precipitation, wind speed, etc.) because values out of the variable range could be obtained + (e.g. negative wind speeds...) + + \strong{scaling} + +This method consists on scaling the simulation with the difference (additive) or quotient (multiplicative) +between the observed and simulated means in the train period. The \code{additive} or \code{multiplicative} +correction is defined by parameter \code{scaling.type} (default is \code{additive}). +The additive version is preferably applicable to unbounded variables (e.g. temperature) +and the multiplicative to variables with a lower bound (e.g. precipitation, because it also preserves the frequency). +} +\examples{ +# Use testdl as an example, we take frc, hindcast and obs fro testdl. +data(testdl) + +# common period has to be extracted in order to better train the forecast. + +datalist <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1') + +frc <- datalist[[1]] +hindcast <- datalist[[2]] +obs <- datalist[[3]] + +# default method is delta +frc_new <- biasCorrect(frc, hindcast, obs) + +# If the variable is precipitation, it cannot be negative value, so use multi scale method +frc_new <- biasCorrect(frc, hindcast, obs, method = 'scaling', scaleType = 'multi') +} +\references{ +Bias correction methods come from \code{biasCorrection} from \code{dowscaleR} + +Santander Meteorology Group (2015). downscaleR: Climate data manipulation and statistical downscaling. R +package version 0.6-0. https://github.com/SantanderMetGroup/downscaleR/wiki +} + diff --git a/man/getFrcEnsem.Rd b/man/getFrcEnsem.Rd index 4694582..ef0ceee 100644 --- a/man/getFrcEnsem.Rd +++ b/man/getFrcEnsem.Rd @@ -52,6 +52,29 @@ need a name column to differentiate one ggplot output from the other. You can as by the argument directly, If name is not assigned and \code{output = 'ggplot'} is selected, then the system time will be selected as name column. } +\examples{ +filePath <- system.file("extdata", "tnc.nc", package = "hyfo") +# Then if you don't know the variable name, you can use \\code{getNcdfVar} to get variable name +varname <- getNcdfVar(filePath) +nc <- loadNcdf(filePath, varname) +a <- getFrcEnsem(nc) + +# If there is no member session in the dataset, a single time sereis will be extracted. +a1 <- getFrcEnsem(tgridData) + + +# The default output is spatially averaged, if there are more than one cells in the dataset, +# the mean value of the cells will be calculated. While if you are interested in special cell, +# you can assign the cell value. You can also directly use longitude and latitude to extract +# time series. + +getSpatialMap(nc, 'mean') +a <- getFrcEnsem(nc, cell = c(6,2)) + +# From the map, cell = c(6, 2) means lon = -1.4, lat = 43.2, so you can use corrd to locate +# your research area and extract time series. +b <- getFrcEnsem(nc, coord = c(-1.4, 43.2)) +} \references{ H. Wickham. ggplot2: elegant graphics for data analysis. Springer New York, 2009. diff --git a/man/getHisEnsem.Rd b/man/getHisEnsem.Rd index ffc1131..7cc64c2 100644 --- a/man/getHisEnsem.Rd +++ b/man/getHisEnsem.Rd @@ -79,7 +79,7 @@ will be changed to 2001-1-1 to 2000-2-29, which keeps the start Date and change If the end date is so important that cannot be changed, try to solve this problem by resetting the example period, to make the event included in the example. -Good set of example and by can generate good ensemble. +Good set of example and interval can generate good ensemble. \code{buffer} Sometimes the model needs to run for a few days to warm up, before the forecast. E.g., if a forecast starts at