apply data.table

Yuanchao-Xu · Feb 20, 2017 · 4a6be13 · 4a6be13
1 parent 5422e8d
commit 4a6be13
Show file tree

Hide file tree

Showing 41 changed files with 529 additions and 301 deletions.
diff --git a/.Rhistory b/.Rhistory
diff --git a/.Rproj.user/D1D10CF6/pcs/find-in-files.pper b/.Rproj.user/D1D10CF6/pcs/find-in-files.pper
@@ -1,10 +1,10 @@
 {
  "dialog-state" : {
- "caseSensitive" : true,
+ "caseSensitive" : false,
  "filePatterns" : [
  ],
  "path" : "~/GitHub/hyfo",
- "query" : "newFrc",
+ "query" : "lmcoef",
  "regex" : false
  }
 }
diff --git a/.Rproj.user/D1D10CF6/pcs/source-pane.pper b/.Rproj.user/D1D10CF6/pcs/source-pane.pper
@@ -1,3 +1,3 @@
 {
- "activeTab" : 1
+ "activeTab" : 6
 }
diff --git a/.Rproj.user/D1D10CF6/pcs/workbench-pane.pper b/.Rproj.user/D1D10CF6/pcs/workbench-pane.pper
@@ -1,6 +1,6 @@
 {
  "TabSet1" : 0,
- "TabSet2" : 0,
+ "TabSet2" : 3,
  "TabZoom" : {
  }
 }
diff --git a/.Rproj.user/D1D10CF6/sdb/per/t/390DEBE1 b/.Rproj.user/D1D10CF6/sdb/per/t/390DEBE1
@@ -1,14 +1,14 @@
 {
  "collab_server" : "",
- "contents" : "hyfo 1.3.8\n==========\nDate: 2017-1-8\n\n- add changes to the new version of roxygen2\n- change biasCorrection's description, change default prThreshold to 0, since not every one is an expert and know how to set it, better keep the original unchanged.\n\n\n\nhyfo 1.3.7\n==========\nDate: 2016-3-1\n\n- add one more argument to plotTS, to cancel the marking of NA values. For some users, NA values are too many to be plotted.\n\n\n\nhyfo 1.3.6\n==========\nDate: 2015-12-15\n\n- transfer from ncdf to ncdf4\n- grepAndMatch created, for capturing dimension names.\n- minor bug fixed about the loadNcdf, when no dimension found, it will give an error indicating.\n- change most of the match function into grepAndMatch, in order to deal with different dimension names.\n- add name attributes to gridfile$xyCoords$x,y, when writeNcdf, the dim names will be taken from that attribute, which can be exactly the same with the original. \n- bug fixed for nc files without members.\n\nNOTE:\n====\n- for hyfo$Data part, when load and write using ncdf4, there will be very little differences compared to the original, which cannot be addressed. If you first load an ncdf file, then write it, then load it again. The data part may have very little difference, less than 10E-5.\n\n\n\nhyfo 1.3.5\n==========\nDate: 2015-12-6\n\n- travis check passed, change the rgdal version from 0.9-3 back to 0.8-16 due to the lack of packages on travis ubuntu.\n- changed .yml file to fix the problem with No repository set, so cyclic dependency check skipped.\n- on CRAN\n- fully supported for windows, Linux and OS.\n\n\n\nhyfo 1.3.3\n==========\nDate: 2015-11-27\n\n- Delete readData_folder, since it's only windows based, add information to get special version for windows users.\n- travis test added.\n\n\nhyfo 1.3.2\n==========\nDate: 2015-11-7\n\n- bug fixed about getPreciBar, signature('data.frame')\n- vignettes updated about bug and hided the warning information.\n- Add how to debug in the documentation for the generic functions.\n\n\n\nhyfo 1.3.1\n==========\nDate: 2015-11-3\n\n- new generic function biasCorrect, extractPeriod, resample, getAnnual, getPreciBar added. No need to designate input type any more, R will detect automatically.\n- coordinates conversion function extracted.\n- new user manual about real time bias correction and resample added.\n\n\n\nhyfo 1.2.9\n==========\nDate: 2015-10-30\n\n- new biasFactor S4 class added, to avoid set the input type every time.\n- operational bias correction has been changed to generic function.\n- news file added.\n\n\n\nhyfo 1.2.8\n==========\nDate: 2015.10.10\n\n- operational bias correction added, in normal function.",
+ "contents" : "hyfo 1.3.9\n==========\nDate: 2017-2-20\n\n- apply data.table package to facilitate data processing\n\n\n\nhyfo 1.3.8\n==========\nDate: 2017-1-8\n\n- add changes to the new version of roxygen2\n- change biasCorrection's description, change default prThreshold to 0, since not every one is an expert and know how to set it, better keep the original unchanged.\n\n\n\nhyfo 1.3.7\n==========\nDate: 2016-3-1\n\n- add one more argument to plotTS, to cancel the marking of NA values. For some users, NA values are too many to be plotted.\n\n\n\nhyfo 1.3.6\n==========\nDate: 2015-12-15\n\n- transfer from ncdf to ncdf4\n- grepAndMatch created, for capturing dimension names.\n- minor bug fixed about the loadNcdf, when no dimension found, it will give an error indicating.\n- change most of the match function into grepAndMatch, in order to deal with different dimension names.\n- add name attributes to gridfile$xyCoords$x,y, when writeNcdf, the dim names will be taken from that attribute, which can be exactly the same with the original. \n- bug fixed for nc files without members.\n\nNOTE:\n====\n- for hyfo$Data part, when load and write using ncdf4, there will be very little differences compared to the original, which cannot be addressed. If you first load an ncdf file, then write it, then load it again. The data part may have very little difference, less than 10E-5.\n\n\n\nhyfo 1.3.5\n==========\nDate: 2015-12-6\n\n- travis check passed, change the rgdal version from 0.9-3 back to 0.8-16 due to the lack of packages on travis ubuntu.\n- changed .yml file to fix the problem with No repository set, so cyclic dependency check skipped.\n- on CRAN\n- fully supported for windows, Linux and OS.\n\n\n\nhyfo 1.3.3\n==========\nDate: 2015-11-27\n\n- Delete readData_folder, since it's only windows based, add information to get special version for windows users.\n- travis test added.\n\n\nhyfo 1.3.2\n==========\nDate: 2015-11-7\n\n- bug fixed about getPreciBar, signature('data.frame')\n- vignettes updated about bug and hided the warning information.\n- Add how to debug in the documentation for the generic functions.\n\n\n\nhyfo 1.3.1\n==========\nDate: 2015-11-3\n\n- new generic function biasCorrect, extractPeriod, resample, getAnnual, getPreciBar added. No need to designate input type any more, R will detect automatically.\n- coordinates conversion function extracted.\n- new user manual about real time bias correction and resample added.\n\n\n\nhyfo 1.2.9\n==========\nDate: 2015-10-30\n\n- new biasFactor S4 class added, to avoid set the input type every time.\n- operational bias correction has been changed to generic function.\n- news file added.\n\n\n\nhyfo 1.2.8\n==========\nDate: 2015.10.10\n\n- operational bias correction added, in normal function.",
  "created" : 1483876849677.000,
  "dirty" : false,
  "encoding" : "ASCII",
  "folds" : "",
- "hash" : "395220279",
+ "hash" : "676496319",
  "id" : "390DEBE1",
- "lastKnownWriteTime" : 1483973453,
- "last_content_update" : 1483973453652,
+ "lastKnownWriteTime" : 1487522938,
+ "last_content_update" : 1487522938948,
  "path" : "~/GitHub/hyfo/NEWS",
  "project_path" : "NEWS",
  "properties" : {

diff --git a/.Rproj.user/D1D10CF6/sdb/per/t/47CB7F65 b/.Rproj.user/D1D10CF6/sdb/per/t/47CB7F65
@@ -0,0 +1,20 @@
+{
+ "collab_server" : "",
+ "contents" : "chooseDim <- function(array, dim, value, drop = FALSE) { \n # Create list representing arguments supplied to [\n # bquote() creates an object corresponding to a missing argument\n dimnames <- attributes(array)$dimensions\n \n indices <- rep(list(bquote()), length(dim(array)))\n indices[[dim]] <- value\n \n if (dim(array)[dim] < max(value)) {\n stop('Chosen member exceeds the member range of the dataset.')\n }\n \n # Generate the call to [\n call <- as.call(c(\n list(as.name(\"[\"), quote(array)),\n indices,\n list(drop = drop)))\n # Print it, just to make it easier to see what's going on\n # Print(call)\n \n # Finally, evaluate it\n output <- eval(call)\n \n if (length(dim(output)) == length(dimnames)) {\n attributes(output)$dimensions <- dimnames\n } else if (length(dim(output)) < length(dimnames)){\n \n # In this case, one dimension is dropped, if value is a number \n # and drop == T, this situation can appear. So the dropped dimemsion\n # should be the chosen dimension.\n i <- 1:length(dimnames)\n # get rid of the dropped dimensin\n i <- i[-dim]\n attributes(output)$dimensions <- dimnames[i]\n }\n \n return(output)\n}\n\n\nadjustDim <- function(data, ref = 'no') {\n # input data is an array\n # ref is the Data part of a hyfo file, used as reference\n # Further may be arranged into a seperate function\n # the input reference will be put at first, then the rest \n if (is.null(data)) return(NULL)\n if (identical(ref, 'no')) {\n # Default\n refOrder <- c('lon', 'lat', 'time')\n } else if (is.character(ref)) {\n refOrder <- ref\n } else {\n # Get dimension from input\n refOrder <- attributes(ref)$dimensions\n }\n \n att <- attributes(data)$dimensions\n if (is.null(att)) stop('No dimnames in the input data attributes, please use loadNcdf to load data.')\n if (identical(att, refOrder)) return(data)\n \n dimIndex <- seq(1, length(att))\n dimIndex1 <- grepAndMatch(refOrder, att)# match can apply to simple cases\n \n \n # for array this works, or setdiff can be used here to find the nomatch element.\n dimIndex2 <- dimIndex[-dimIndex1]# choose nomatch\n \n \n data <- aperm(data, c(dimIndex1, dimIndex2))\n attributes(data)$dimensions <- att[c(dimIndex1, dimIndex2)]\n \n return(data)\n}\n\n# Belong to checkDimLength\ncalcuDim <- function(data, dim) {\n dimIndex <- grepAndMatch(dim, attributes(data)$dimensions)\n dimLength <- dim(data)[dimIndex]\n return(dimLength)\n}\n",
+ "created" : 1487525892454.000,
+ "dirty" : false,
+ "encoding" : "ASCII",
+ "folds" : "",
+ "hash" : "2303557273",
+ "id" : "47CB7F65",
+ "lastKnownWriteTime" : 1483875653,
+ "last_content_update" : 1483875653,
+ "path" : "~/GitHub/hyfo/R/array_dimension.R",
+ "project_path" : "R/array_dimension.R",
+ "properties" : {
+ },
+ "relative_order" : 19,
+ "source_on_save" : false,
+ "source_window" : "",
+ "type" : "r_source"
+}
diff --git a/.Rproj.user/D1D10CF6/sdb/per/t/6511719A b/.Rproj.user/D1D10CF6/sdb/per/t/6511719A
@@ -0,0 +1,20 @@
+{
+ "collab_server" : "",
+ "contents" : "#' Fill gaps in the rainfall time series.\n#'\n#' @param dataset A dataframe with first column the time, the rest columns are rainfall data of different gauges\n#' @param corPeriod A string showing the period used in the correlation computing, \n#' e.g. daily, monthly, yearly.\n#' @return The filled dataframe\n#' @details\n#' the gap filler follows the rules below:\n#' \n#' 1. The correlation coefficient of every two columns (except time column) is calculated.\n#' the correlation coefficient calculation can be based on 'daily', 'monthly', 'annual',\n#' in each case, the daily data, the monthly mean daily data and annual mean daily data of \n#' each column will be taken in the correlation calculation.\n#' \n#' Then the correlation matrix is got, then based on the matrix, for each column, \n#' the 1st, 2nd, 3rd,... correlated column will be got. So if there is missing value in the\n#' column, it will get data from orderly 1st, 2nd, 3rd column.\n#' \n#' 2. The simple linear regress is calculated between every two columns. When generating the\n#' linear coefficient, the incept should be force to 0. i.e. y = a*x + b should be forec to \n#' y = a*x.\n#' \n#' 3. Gap filling. E.g., on a certain date, there is a missing value in column A, then the\n#' correlation order is column B, column C, column D, which means A should take values from\n#' B firstly, if B is also missing data, then C, then D.\n#' \n#' Assuming finally value from column C is taken. Then according to step 2, A = a*C, then the\n#' final value filled in column A is missing_in_A = a*value_in_C, a is the linear coeffcient.\n#' \n#' @examples\n#' b <- read.table(text = ' Date AAA BBB CCC DDD EEE\n#' 49 1999-12-15 24.8 21.4 25.6 35.0 17.4\n#' 50 1999-12-16 NA 0.6 1.5 6.3 2.5\n#' 51 1999-12-17 NA 16.3 20.3 NA 19.2\n#' 52 1999-12-18 13 1.6 NA 6.3 0.0\n#' 53 1999-12-19 10 36.4 12.5 26.8 24.9\n#' 54 1999-12-20 NA 0.0 0.0 0.2 0.0\n#' 55 1999-12-21 0.2 0.0 0.0 0.0 0.0\n#' 56 1999-12-22 0.0 0.0 0.0 0.0 0.0')\n#' \n#' b1 <- fillGap(b) # if corPeriod is missing, 'daily' is taken as default.\n#' \n#' data(testdl)\n#' a <- extractPeriod(testdl, commonPeriod = TRUE)\n#' a1 <- list2Dataframe(a)\n#' a2 <- fillGap(a1)\n#' a3 <- fillGap(a1, corPeriod = 'monthly')\n#' \n#' \n#' # More examples can be found in the user manual on http:https://yuanchao-xu.github.io/hyfo/\n#' \n#' @references\n#' Gap fiiling method based on correlation and linear regression.\n#' \n#' \\itemize{\n#' \\item Hirsch, Robert M., et al. \"Statistical analysis of hydrologic data.\" Handbook of hydrology. (1992): 17-1.\n#' Salas, Jose D. \"Analysis and modeling of hydrologic time series.\" Handbook of hydrology 19 (1993): 1-72.\n#' \n#' }\n#' \n#' \n#' @export\nfillGap <- function(dataset, corPeriod = 'daily') {\n \n if (!grepl('-|/', dataset[1, 1])) {\n stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base} \n and use as.Date to convert.')\n }\n Date <- as.Date(dataset[, 1])\n data <- data.frame(dataset[, 2:dim(dataset)[2]])\n names <- colnames(data)\n \n corN <- fillGap_cor(data, corPeriod = corPeriod, Date = Date)\n cat('\\nCorrelation Coefficient\\n')\n print(corN)\n \n corOrder <- apply(corN, MARGIN = 1, FUN = function(x) order(-x))\n corOrder <- corOrder[2:dim(corOrder)[1], ]\n corOrderName <- t(apply(corOrder, MARGIN = 2, FUN = function(x) names[x]))\n \n cat('\\nCorrelation Order\\n')\n colnames(corOrderName) <- seq(1 : dim(corOrderName)[2])\n print(corOrderName)\n \n lmCoef <- fillGap_lmCoef(data, corOrder)\n cat('\\nLinear Coefficients\\n')\n rownames(lmCoef) <- seq(1 : dim(corOrderName)[2])\n print(t(lmCoef))\n \n output <- lapply(1:dim(data)[2], fillGap_column, data = data,\n corOrder = corOrder, lmCoef = lmCoef)\n output <- data.frame(output)\n colnames(output) <- names\n \n output <- cbind(Date, output)\n \n return(output)\n}\n\n\n#' Get monthly rainfall\n#' \n#' @param TS A rainfall time series.\n#' @param year A list showing the year index of the time series.\n#' @param mon A list showing the mon index of the time series.\n#' @return the monthly rainfall matrix of the rainfall time series.\nmonthlyPreci <- function(TS, year, mon) {\n \n # monthly daily mean is used in order not to affected by missing values.\n monTS <- tapply(TS, INDEX = list(year, mon), FUN = mean, na.rm = TRUE)\n output <- t(monTS)\n dim(output) <- c(dim(monTS)[1] * dim(monTS)[2], 1)\n return(output)\n}\n\n\nfillGap_column <- function(i, data, corOrder, lmCoef) {\n TS <- data[, i] # extract target column\n l <- dim(data)[2] # length\n \n for (j in 1:l) {\n if (!any(is.na(TS))) break\n NAindex <- which(is.na(TS))\n TS[NAindex] <- round(lmCoef[j, i] * data[NAindex, corOrder[j, i]], 3)\n \n if (j == l) stop('Error: One time consists of all NA values')\n }\n \n return(TS)\n}\n\n\n#' @importFrom stats cor na.omit\n#' @references \n#' \n#' \\itemize{\n#' \\item R Core Team (2015). R: A language and environment for statistical computing. R Foundation for\n#' Statistical Computing, Vienna, Austria. URL http:https://www.R-project.org/.\n#' }\n#' \n#' \n\nfillGap_cor <- function(data, corPeriod = 'daily', Date) {\n \n names <- colnames(data)\n year <- format(Date, '%Y')\n \n if (corPeriod == 'monthly') {\n #based on monthly rainfall\n mon <- format(Date, '%m')\n monthlyPreci <- lapply(data, FUN = monthlyPreci, year = year, mon = mon)\n corData <- do.call('cbind', monthlyPreci)\n } else if (corPeriod == 'yearly') {\n year <- format(Date, '%Y')\n # yearly daily mean is used in order not to affected by missing values.\n annualPreci <- lapply(data, FUN = function(x) tapply(x, INDEX = year, FUN = mean, na.rm = TRUE))\n corData <- do.call('cbind', annualPreci)\n } else if (corPeriod == 'daily') {\n corData <- data\n } else {\n stop('Pleas choose among \"daily\", \"monthly\", \"yearly\".')\n }\n \n corData <- data.frame(na.omit(corData))\n colnames(corData) <- names\n \n corN <- cor(corData)\n \n return(corN)\n \n} \n\n#' @importFrom utils combn\n#' @importFrom stats coef lm\n#' @references \n#' R Core Team (2015). R: A language and environment for statistical computing. R Foundation for\n#' Statistical Computing, Vienna, Austria. URL http:https://www.R-project.org/.\n#' \nfillGap_lmCoef <- function(data, corOrder) {\n l <- dim(data)[2]\n m <- diag(l)# m is the coeficients matrix\n m[lower.tri(m)] <- combn(data, 2, function(x) coef(lm(x[, 2] ~ x[, 1] + 0)))\n tm <- t(m)\n \n tm[lower.tri(tm)] <- combn(data, 2, function(x) coef(lm(x[, 1] ~ x[, 2] + 0)))\n \n m <- t(tm)\n \n lmCoef <- lapply(1 : l, function(x) m[x,corOrder[, x]])\n lmCoef <- do.call('rbind', lmCoef)\n rownames(lmCoef) <- colnames(data)\n \n return(t(lmCoef))\n}\n\n",
+ "created" : 1487522438368.000,
+ "dirty" : false,
+ "encoding" : "ASCII",
+ "folds" : "",
+ "hash" : "214900332",
+ "id" : "6511719A",
+ "lastKnownWriteTime" : 1487594210,
+ "last_content_update" : 1487594210897,
+ "path" : "~/GitHub/hyfo/R/fillGap.R",
+ "project_path" : "R/fillGap.R",
+ "properties" : {
+ },
+ "relative_order" : 16,
+ "source_on_save" : false,
+ "source_window" : "",
+ "type" : "r_source"
+}
diff --git a/.Rproj.user/D1D10CF6/sdb/per/t/6DDA2A7B b/.Rproj.user/D1D10CF6/sdb/per/t/6DDA2A7B