-
Notifications
You must be signed in to change notification settings - Fork 0
/
Chapter04.sql
166 lines (139 loc) · 3.65 KB
/
Chapter04.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
-- Data Science with SQL Server Quick Start Guide
-- Chapter 04
USE AdventureWorksDW2017;
GO
SELECT DISTINCT CommuteDistance
FROM dbo.vTargetMail
ORDER BY CommuteDistance;
-- Frequencies with ordered CommuteDistance
WITH freqCTE AS
(
SELECT CASE v.CommuteDistance
WHEN '0-1 Miles' THEN '1 - 0-1 Miles'
WHEN '1-2 Miles' THEN '2 - 1-2 Miles'
WHEN '2-5 Miles' THEN '3 - 2-5 Miles'
WHEN '5-10 Miles' THEN '4 - 5-10 Miles'
WHEN '10+ Miles' THEN '5 - 10+ Miles'
END AS CommuteDistance,
COUNT(v.CommuteDistance) AS AbsFreq,
CAST(ROUND(100. * (COUNT(v.CommuteDistance)) /
(SELECT COUNT(*) FROM vTargetMail), 0) AS INT) AS AbsPerc
FROM dbo.vTargetMail AS v
GROUP BY v.CommuteDistance
)
SELECT CommuteDistance,
AbsFreq,
SUM(AbsFreq)
OVER(ORDER BY CommuteDistance
ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW) AS CumFreq,
AbsPerc,
SUM(AbsPerc)
OVER(ORDER BY CommuteDistance
ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW) AS CumPerc,
CAST(REPLICATE('*',AbsPerc) AS VARCHAR(50)) AS Histogram
FROM freqCTE
ORDER BY CommuteDistance;
GO
-- Centers
/* Make customers 15 years younger
USE AdventureWorksDW2017;
GO
SELECT MIN(Age), MAX(Age)
FROM dbo.vTargetMail;
UPDATE dbo.DimCustomer
SET BirthDate = DATEADD(year, 15, BirthDate)
SELECT MIN(Age), MAX(Age)
FROM dbo.vTargetMail;
GO
-- 16 TO 87
*/
-- Median
SELECT DISTINCT
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY Age) OVER () AS Median
FROM dbo.vTargetMail;
-- Arithmetic mean
SELECT AVG(1.0*Age) AS Mean
FROM dbo.vtargetMail;
-- Spread
-- Range
SELECT MAX(Age) - MIN(Age) AS Range
FROM dbo.vTargetMail;
-- IQR
SELECT DISTINCT
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY 1.0*Age) OVER () -
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY 1.0*Age) OVER () AS IQR
FROM dbo.vTargetMail;
-- Standard deviation, coefficient of variation
SELECT STDEV(1.0*Age) AS StDevAge,
STDEV(1.0*YearlyIncome) AS StDevIncome,
STDEV(1.0*Age) / AVG(1.0*Age) AS CVAge,
STDEV(1.0*YearlyIncome) / AVG(1.0*YearlyIncome) AS CVIncome
FROM dbo.vTargetMail;
GO
-- Higher population moments
-- Skewness
WITH SkewCTE AS
(
SELECT SUM(1.0*Age) AS rx,
SUM(POWER(1.0*Age,2)) AS rx2,
SUM(POWER(1.0*Age,3)) AS rx3,
COUNT(1.0*Age) AS rn,
STDEV(1.0*Age) AS stdv,
AVG(1.0*Age) AS av
FROM dbo.vTargetMail
)
SELECT
(rx3 - 3*rx2*av + 3*rx*av*av - rn*av*av*av)
/ (stdv*stdv*stdv) * rn / (rn-1) / (rn-2) AS Skewness
FROM SkewCTE;
-- Kurtosis
WITH KurtCTE AS
(
SELECT SUM(1.0*Age) AS rx,
SUM(POWER(1.0*Age,2)) AS rx2,
SUM(POWER(1.0*Age,3)) AS rx3,
SUM(POWER(1.0*Age,4)) AS rx4,
COUNT(1.0*Age) AS rn,
STDEV(1.0*Age) AS stdv,
AVG(1.*Age) AS av
FROM dbo.vTargetMail
)
SELECT
(rx4 - 4*rx3*av + 6*rx2*av*av - 4*rx*av*av*av + rn*av*av*av*av)
/ (stdv*stdv*stdv*stdv) * rn * (rn+1) / (rn-1) / (rn-2) / (rn-3)
- 3.0 * (rn-1) * (rn-1) / (rn-2) / (rn-3) AS Kurtosis
FROM KurtCTE;
GO
/***************************/
/* More code as a bonus:-) */
/***************************/
-- Mode
SELECT TOP (1) WITH TIES Age, COUNT(*) AS Number
FROM dbo.vTargetMail
GROUP BY Age
ORDER BY COUNT(*) DESC;
-- Mode with RANK()
WITH AgeCTE AS
(
SELECT Age, COUNT(*) AS Number
FROM dbo.vTargetMail
GROUP BY Age
),
AgeRankCTE AS
(
SELECT Age, Number,
RANK() OVER (ORDER BY Number DESC) AS AgeRank
FROM AgeCTE
)
SELECT Age, Number
FROM AgeRankCTE
WHERE AgeRank = 1;
-- Geometric mean
SELECT POWER(10.0000, SUM(LOG10(1.0*Age))/COUNT(*)) AS GeometricMean
FROM dbo.vtargetMail;
-- Harmonic mean
SELECT COUNT(*)/SUM(1.0/Age) AS HarmonicMean
FROM dbo.vtargetMail;
GO