forked from johnkerl/miller
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mlrmap_flatten_unflatten.go
254 lines (222 loc) · 8.15 KB
/
mlrmap_flatten_unflatten.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
// ================================================================
// FLATTEN/UNFLATTEN
//
// These are used by the flatten/unflatten verbs and DSL functions. They are
// crucial to the operation of Miller 6 wherein records have full Mlrval
// values, i.e. they can be arrays/maps as well as int/float/string.
//
// When we read JSON and write (say) CSV, we have two choices for handling the
// fact that JSON handles multi-level data and CSV does not:
//
// (1) JSON-stringify values, using the json-stringify verb or json_stringify
// DSL function. For example, the array of ints [1,2,3] becomes the string
// "[1,2,3]" which works fine as a CSV field.
//
// (2) Flatten them by key-spreading. For example, the single field with key
// "x" with value {"a":1,"b":2} flattens to the *pair* of fields x:a=1 and
// x:b=2.
//
// The former are used implicitly (i.e. unless the user explicitly requests
// otherwise) when we convert to/from JSON.
// ================================================================
package mlrval
import (
"strings"
"github.com/johnkerl/miller/internal/pkg/lib"
)
// ----------------------------------------------------------------
// Flattens all field values in the record. This is a special case of
// FlattenFields but it's worth its own special case (to avoid iffing on the
// nullity of the fieldNameSet) since the flatten/unflatten check is done by
// default on ALL Miller records whenever we convert to/from JSON. So, the
// default path should be fast.
//
// Examples:
// * The single field x = {"a": 7, "b": 8, "c": 9} becomes the three fields
// x.a = 7, x.b = 8, x.c = 9.
// * The single field x = [7,8,9] becomes the three fields
// x.1 = 7, x.2 = 8, x.3 = 9.
func (mlrmap *Mlrmap) Flatten(separator string) {
if !mlrmap.isFlattenable() { // fast path: don't modify the record at all
return
}
other := NewMlrmapAsRecord()
for pe := mlrmap.Head; pe != nil; pe = pe.Next {
if pe.Value.IsArrayOrMap() {
pieces := pe.Value.FlattenToMap(pe.Key, separator)
for pf := pieces.GetMap().Head; pf != nil; pf = pf.Next {
other.PutReference(pf.Key, pf.Value)
}
} else {
other.PutReference(pe.Key, pe.Value)
}
}
*mlrmap = *other
}
// ----------------------------------------------------------------
// For mlr flatten -f.
func (mlrmap *Mlrmap) FlattenFields(
fieldNameSet map[string]bool,
separator string,
) {
if !mlrmap.isFlattenable() { // fast path
return
}
other := NewMlrmapAsRecord()
for pe := mlrmap.Head; pe != nil; pe = pe.Next {
if pe.Value.IsArrayOrMap() && fieldNameSet[pe.Key] {
pieces := pe.Value.FlattenToMap(pe.Key, separator)
for pf := pieces.GetMap().Head; pf != nil; pf = pf.Next {
other.PutReference(pf.Key, pf.Value)
}
} else {
other.PutReference(pe.Key, pe.Value)
}
}
*mlrmap = *other
}
// ----------------------------------------------------------------
// Optimization for Flatten, to avoid needless data motion in the case
// where all field values are non-collections.
func (mlrmap *Mlrmap) isFlattenable() bool {
for pe := mlrmap.Head; pe != nil; pe = pe.Next {
if pe.Value.IsArrayOrMap() {
return true
}
}
return false
}
// ----------------------------------------------------------------
// For mlr unflatten without -f. This undoes Unflatten. This is for conversion
// from non-JSON to JSON. If there are fields x.a, x.b, x.c, etc. they're put
// into a single field x with map-valued value keyed by "a", "b", "c".
// Special case: if the resulting string keys are string representations of 1,
// 2, 3, etc -- without gaps -- then the map is converted to an array.
//
// Examples:
//
// * The three fields x.a = 7, x.b = 8, x.c = 9 become
// the single field x = {"a": 7, "b": 8, "c": 9}.
//
// * The three fields x.1 = 7, x.2 = 8, x.3 = 9 become
// the single field x = [7,8,9].
//
// * The two fields x.1 = 7, x.3 = 9 become
// the single field x = {"1": 7, "3": 9}
func (mlrmap *Mlrmap) Unflatten(
separator string,
) {
*mlrmap = *(mlrmap.CopyUnflattened(separator))
}
func (mlrmap *Mlrmap) CopyUnflattened(
separator string,
) *Mlrmap {
other := NewMlrmapAsRecord()
affectedBaseIndices := make(map[string]bool)
// We'll come through this loop once for x.a, another for x.b, etc.
for pe := mlrmap.Head; pe != nil; pe = pe.Next {
// Is the field name something dot something?
if strings.Contains(pe.Key, separator) {
arrayOfIndices := SplitAXHelper(pe.Key, separator)
lib.InternalCodingErrorIf(len(arrayOfIndices.arrayval) < 1)
// If the input field name was "x.a" then remember the "x".
baseIndex := arrayOfIndices.arrayval[0].String()
affectedBaseIndices[baseIndex] = true
// Use PutIndexed to assign $x["a"] = 7, or $x["b"] = 8, etc.
other.PutIndexed(
CopyMlrvalArray(arrayOfIndices.arrayval),
unflattenTerminal(pe.Value).Copy(),
)
} else {
other.PutReference(pe.Key, unflattenTerminal(pe.Value))
}
}
// Go through all the field names which were turned into maps -- e.g. "x"
// in the example above -- and see if the keys were like "1", "2", etc and
// if so then convert to array. This undoes how Flatten flattens arrays.
for baseIndex := range affectedBaseIndices {
oldValue := other.Get(baseIndex)
lib.InternalCodingErrorIf(oldValue == nil)
newValue := oldValue.Arrayify()
other.PutReference(baseIndex, newValue)
}
return other
}
// ----------------------------------------------------------------
// For mlr unflatten -f. See comments on Unflatten. Largely copypasta of
// Unflatten, but split out separately since Flatten needn't check a
// fieldNameSet.
func (mlrmap *Mlrmap) UnflattenFields(
fieldNameSet map[string]bool,
separator string,
) {
*mlrmap = *(mlrmap.CopyUnflattenFields(fieldNameSet, separator))
}
func (mlrmap *Mlrmap) CopyUnflattenFields(
fieldNameSet map[string]bool,
separator string,
) *Mlrmap {
other := NewMlrmapAsRecord()
affectedBaseIndices := make(map[string]bool)
// We'll come through this loop once for x.a, another for x.b, etc.
for pe := mlrmap.Head; pe != nil; pe = pe.Next {
// Is the field name something dot something?
if strings.Contains(pe.Key, separator) {
arrayOfIndices := SplitAXHelper(pe.Key, separator)
lib.InternalCodingErrorIf(len(arrayOfIndices.arrayval) < 1)
// If the input field name was "x.a" then remember the "x".
baseIndex := arrayOfIndices.arrayval[0].String()
if fieldNameSet[baseIndex] {
// Use PutIndexed to assign $x["a"] = 7, or $x["b"] = 8, etc.
other.PutIndexed(
CopyMlrvalArray(arrayOfIndices.arrayval),
unflattenTerminal(pe.Value).Copy(),
)
affectedBaseIndices[baseIndex] = true
} else {
other.PutReference(pe.Key, unflattenTerminal(pe.Value))
}
} else {
other.PutReference(pe.Key, unflattenTerminal(pe.Value))
}
}
// Go through all the field names which were turned into maps -- e.g. "x"
// in the example above -- and see if the keys were like "1", "2", etc and
// if so then convert to array. This undoes how Flatten flattens arrays.
for baseIndex := range affectedBaseIndices {
oldValue := other.Get(baseIndex)
lib.InternalCodingErrorIf(oldValue == nil)
newValue := oldValue.Arrayify()
other.PutReference(baseIndex, newValue)
}
return other
}
// ----------------------------------------------------------------
// Flatten of empty map and empty array produce "{}" and "[]" as special cases.
// (Without this, key-spreading would cause such fields to disappear entirely:
// the field "x" -> {"a": 1, "b": 2} would spread to the pair of fields "x:a"
// -> 1 and "x:b" -> 2, and the field "x" -> {"a": 1} would spread to the
// single field "x:a" -> 1, so the field "x" -> {} would spread to zero
// fields.) Here we reverse that special case of the flatten operation.
func unflattenTerminal(input *Mlrval) *Mlrval {
if !input.IsString() {
return input
}
if input.printrep == "{}" {
return FromMap(NewMlrmap())
}
if input.printrep == "[]" {
return FromArray(make([]*Mlrval, 0))
}
return input
}
// SplitAXHelper is split out for the benefit of BIF_splitax and
// BIF_unflatten.
func SplitAXHelper(input string, separator string) *Mlrval {
fields := lib.SplitString(input, separator)
output := FromArray(make([]*Mlrval, len(fields)))
for i, field := range fields {
output.arrayval[i] = FromString(field)
}
return output
}