-
Notifications
You must be signed in to change notification settings - Fork 6
/
extractPeriod.R
189 lines (159 loc) · 7.5 KB
/
extractPeriod.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#' Extract period from list or dataframe.
#'
#' Extract common period or certain period from a list of different dataframes of time series, or from a
#' dataframe.
#' NOTE: all the dates in the datalist should follow the format in ?as.Date{base}.
#' @param datalist A list of different dataframes of time series .
#' @param startDate A Date showing the start of the extract period, default as NULL, check details.
#' @param endDate A Date showing the end of the extract period, default as NULL, check details.
#' @param commonPeriod A boolean showing whether the common period is extracted. If chosen, startDate and endDate
#' should be NULL.
#' @param dataframe A dataframe with first column Date, the rest columns value. If your input is a
#' dataframe, not time series list, you can put \code{dataframe = yourdataframe}. And certain period will be
#' extracted. Note: if your input is a time series, that means all the columns share the same period of date.
#' @param year extract certain year in the entire time series. if you want to extract year 2000, set \code{year = 2000}
#' @param month extract certain months in a year. e.g. if you want to extract Jan, Feb of each year,
#' set \code{month = c(1, 2)}.
#' @details
#' \strong{startDate and endDate}
#'
#' If startDate and endDate are assigned, then certain period between startDate and endDate will be returned,
#' for both datalist input and dataframe input.
#'
#' If startDate and endDate are NOT assigned, then,
#'
#' if input is a datalist, the startDate and endDate of the common period of different datalists will be assigned
#' to the startDate and endDate.
#'
#' if input is a dataframe, the startDate and endDate of the input dataframe will be assigned to the startDate
#' and endDate . Since different value columns share a common Date column in a dataframe input.
#'
#' \strong{year and month}
#'
#' For year crossing month input, hyfo will take from the year before. E.g. if \code{month = c(10, 11, 12, 1)},
#' and \code{year = 1999}, hyfo will take month 10, 11 and 12 from year 1998, and month 1 from 1999.You DO NOT
#' have to set \code{year = 1998 : 1999}.
#'
#' Well, if you set \code{year = 1998 : 1999}, hyfo will take month 10, 11 and 12 from year 1997, and month 1 from 1998,
#' then, take month 10, 11 and 12 from year 1998, month 1 from 1999. So you only have to care about the latter year.
#'
#'
#'
#' @return A list or a dataframe with all the time series inside containing the same period.
#' @examples
#' # Generate timeseries datalist. Each data frame consists of a Date and a value.
#'
#' AAA <- data.frame(
#' # date column
#' Date = seq(as.Date('1990-10-28'),as.Date('1997-4-1'),1),
#' # value column
#' AAA = sample(1:100,length(seq(as.Date('1990-10-28'),as.Date('1997-4-1'),1)), repl = TRUE))
#'
#' BBB <- data.frame(
#' Date = seq(as.Date('1993-3-28'),as.Date('1999-1-1'),1),
#' BBB = sample(1:100,length(seq(as.Date('1993-3-28'),as.Date('1999-1-1'),1)), repl = TRUE))
#'
#' CCC <- data.frame(
#' Date = seq(as.Date('1988-2-2'),as.Date('1996-1-1'),1),
#' CCC = sample(1:100,length(seq(as.Date('1988-2-2'),as.Date('1996-1-1'),1)), repl = TRUE))
#'
#' list <- list(AAA, BBB, CCC)# dput() and dget() can be used to save and load list file.
#'
#' list_com <- extractPeriod(list, commonPeriod = TRUE)
#'
#' # list_com is the extracted datalist.
#' str(list_com)
#'
#' # If startDate and endDate is provided, the record between them will be extracted.
#' # make sure startDate is later than any startDate in each dataframe and endDate is
#' # earlier than any endDate in each dataframe.
#'
#' data(testdl)
#' datalist_com1 <- extractPeriod(testdl, startDate = '1994-1-1', endDate = '1995-10-1')
#'
#'
#' dataframe <- list2Dataframe(datalist_com1)
#' # now we have a dataframe to extract certain months and years.
#' dataframe_new <- extractPeriod(dataframe = dataframe, month = c(1,2,3))
#' dataframe_new <- extractPeriod(dataframe = dataframe, month = c(12,1,2), year = 1995)
#'
#'
#' @importFrom zoo as.Date
#' @references
#' Achim Zeileis and Gabor Grothendieck (2005). zoo: S3 Infrastructure for Regular and Irregular Time
#' Series. Journal of Statistical Software, 14(6), 1-27. URL http:https://www.jstatsoft.org/v14/i06/
#' @export
extractPeriod <- function(datalist, startDate = NULL, endDate = NULL, commonPeriod = FALSE,
dataframe = NULL, year = NULL, month = NULL) {
if (!is.null(dataframe)) {
dataset <- extractPeriod_dataframe(dataframe, startDate = startDate, endDate = endDate, year = year,
month = month)
} else {
if (!is.null(startDate) & !is.null(endDate) & commonPeriod == FALSE) {
dataset <- lapply(datalist, extractPeriod_dataframe, startDate = startDate, endDate = endDate, year = year,
month = month)
} else if (is.null(startDate) & is.null(endDate) & commonPeriod == TRUE) {
Dates <- lapply(datalist, extractPeriod_getDate)
Dates <- do.call('rbind', Dates)
startDate <- as.Date(max(Dates[, 1]))
endDate <- as.Date(min(Dates[, 2]))
dataset <- lapply(datalist, extractPeriod_dataframe, startDate = startDate, endDate = endDate, year = year,
month = month)
} else {
stop('Enter startDate and endDate, set commonPeriod as False, or simply set commonPeriod as TRUE')
}
}
return(dataset)
}
extractPeriod_dataframe <- function(dataframe, startDate, endDate, year = NULL, month = NULL) {
# to check whether first column is a date format
if (!grepl('-|/', dataframe[1, 1])) {
stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base}
and use as.Date to convert.')
}
dataframe[, 1] <- as.Date(dataframe[, 1])
if (is.null(startDate)) startDate <- dataframe[1, 1]
if (is.null(endDate)) endDate <- tail(dataframe[, 1], 1)
startIndex <- which(dataframe[, 1] == startDate)
endIndex <- which(dataframe[, 1] == endDate)
if (length(startIndex) == 0 | length(endIndex) == 0) {
stop('startDate and endDate exceeds the date limits in dataframe. Check datalsit please.')
}
output <- dataframe[startIndex:endIndex, ]
# month needs to be firstly extracted, then year, otherwise, redundant months will be extracted.
if (!is.null(month)) {
Date <- as.POSIXlt(output[, 1])
mon <- Date$mon + 1
# %in% can deal with multiple equalities
DateIndex <- which(mon %in% month)
output <- output[DateIndex, ]
}
if (!is.null(year)) {
Date <- as.POSIXlt(output[, 1])
yea <- Date$year + 1900
mon <- Date$mon + 1
if (!any(sort(month) != month) | is.null(month)) {
DateIndex <- which(yea %in% year)
output <- output[DateIndex, ]
# if year crossing than sort(month) != month
} else {
startIndex <- which(yea == year - 1 & mon == month[1])[1]
endIndex <- tail(which(yea == year & mon == tail(month, 1)), 1)
output <- output[startIndex:endIndex, ]
}
}
return(output)
}
#' @importFrom utils tail
#' @references
#' R Core Team (2015). R: A language and environment for statistical computing. R Foundation for
#' Statistical Computing, Vienna, Austria. URL http:https://www.R-project.org/.
extractPeriod_getDate <- function(dataset) {
if (!grepl('-|/', dataset[1, 1])) {
stop('First column is not date or Wrong Date formate, check the format in ?as.Date{base},
and use as.Date to convert.')
}
start <- as.Date(dataset[1, 1])
end <- as.Date(tail(dataset[, 1], 1))
return(c(start, end))
}