-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
123 lines (102 loc) · 4.02 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
READ_PATH?=s3a:https://grupozap-data-engineer-test/
WRITE_PATH?=output
USER_KEY?=anonymous_id
TIMESTAMP_KEY?=device_sent_timestamp
MAX_SESSION_SECONDS?=1800
GROUP_KEY?=device_family # [browser_family, device_family, os_family]
HUB_PUBLISHER?=coqueirotree
HUB_PASSWORD?=$(shell cat .hub_password)
SPARK_VERSION?=2.4.4
HADOOP_VERSION?=3.1.2
BUMP_LEVEL?=patch # [patch, minor, major]
APP_IMAGE=${HUB_PUBLISHER}/spark-session-calc${SPARK_VERSION}-hadoop${HADOOP_VERSION}-aws-support
SUBMIT_VERSION=$(shell cat docker-spark/submit/VERSION)
APP_VERSION=$(shell cat session-calc/VERSION)
GIT_BRANCH=$(shell git branch | sed -n -e 's/^\* \(.*\)/\1/p')
test_app:
@docker run --rm --name docker-pyspark \
-v $(shell pwd)/session-calc:/app:ro \
-e READ_PATH=test/data/part0.json \
-e WRITE_PATH=../output \
-e USER_KEY=user_key \
-e TIMESTAMP_KEY=timestamp_key \
-e MAX_SESSION_SECONDS=3600 \
-e GROUP_KEY=group_field1 \
coqueirotree/docker-pyspark:0.0.1 \
python3 -m unittest
build_app:
@cd session-calc ; \
docker build \
--build-arg HUB_PUBLISHER="${HUB_PUBLISHER}" \
--build-arg SPARK_VERSION="${SPARK_VERSION}" \
--build-arg HADOOP_VERSION="${HADOOP_VERSION}" \
--build-arg SUBMIT_VERSION="${SUBMIT_VERSION}" \
-t "${APP_IMAGE}:${APP_VERSION}" .
@docker tag ${APP_IMAGE}:${APP_VERSION} ${APP_IMAGE}:latest
login:
@docker login --username ${HUB_PUBLISHER} --password ${HUB_PASSWORD}
push_app: login
@docker push ${APP_IMAGE}:${APP_VERSION}
@docker push ${APP_IMAGE}:latest
bump_app:
@python3 -m pip install bumpversion==0.5.3
@bumpversion --current-version ${APP_VERSION} ${BUMP_LEVEL} session-calc/VERSION
@git add session-calc/VERSION
@git commit -m "session-calc version bump to ${VERSION}"
@git push origin ${GIT_BRANCH}
release_app: build_app push_app
pull_app:
@docker pull ${APP_IMAGE}:${APP_VERSION}
@docker pull ${APP_IMAGE}:latest
build_docker_spark:
@cd docker-spark; CLUSTER_COMPONENT=base make build
@cd docker-spark; CLUSTER_COMPONENT=master make build
@cd docker-spark; CLUSTER_COMPONENT=worker make build
@cd docker-spark; CLUSTER_COMPONENT=submit make build
bump_docker_spark:
@cd docker-spark; CLUSTER_COMPONENT=base BUMP_LEVEL=${BUMP_LEVEL} make bump
@cd docker-spark; CLUSTER_COMPONENT=master BUMP_LEVEL=${BUMP_LEVEL} make bump
@cd docker-spark; CLUSTER_COMPONENT=worker BUMP_LEVEL=${BUMP_LEVEL} make bump
@cd docker-spark; CLUSTER_COMPONENT=submit BUMP_LEVEL=${BUMP_LEVEL} make bump
release_docker_spark:
@cd docker-spark; CLUSTER_COMPONENT=base make release
@cd docker-spark; CLUSTER_COMPONENT=master make release
@cd docker-spark; CLUSTER_COMPONENT=worker make release
@cd docker-spark; CLUSTER_COMPONENT=submit make release
pull_docker_spark:
@cd docker-spark; CLUSTER_COMPONENT=base make pull
@cd docker-spark; CLUSTER_COMPONENT=master make pull
@cd docker-spark; CLUSTER_COMPONENT=worker make pull
@cd docker-spark; CLUSTER_COMPONENT=submit make pull
# Support currently available for local docker spark execution only
run_docker_spark:
@cd docker-spark; docker-compose -f docker-compose.yml up -d
@echo "Waiting 10 seconds for docker-spark cluster setup."; sleep 10
run_app: run_docker_spark
@docker run --rm --name session-calc \
-e ENABLE_INIT_DAEMON=false \
-e READ_PATH=${READ_PATH} \
-e WRITE_PATH=${WRITE_PATH} \
-e USER_KEY=${USER_KEY} \
-e TIMESTAMP_KEY=${TIMESTAMP_KEY} \
-e MAX_SESSION_SECONDS=${MAX_SESSION_SECONDS} \
-e GROUP_KEY=${GROUP_KEY} \
-v $(shell pwd)/session-calc/output:/app/output:rw \
--link spark-master:spark-master \
--net docker-spark_default ${APP_IMAGE}:${APP_VERSION}
clean_app:
@echo "Removing dangling containers."
@cd docker-spark; docker-compose -f docker-compose.yml down
session_calc: run_app clean_app
run_eda: run_docker_spark
@docker run --rm --name eda \
-e ENABLE_INIT_DAEMON=false \
-e READ_PATH=${READ_PATH} \
-v $(shell pwd)/eda:/app:ro \
--link spark-master:spark-master \
--net docker-spark_default ${APP_IMAGE}:${APP_VERSION}
eda: run_eda clean_app
debug_app:
@docker run -it \
-v $(shell pwd)/session-calc/output:/app/output:rw \
${APP_IMAGE}:${APP_VERSION} bash