apache · dianfu · May 29, 2019 · Jun 12, 2019 · Jun 12, 2019 · tillrohrmann
diff --git a/flink-container/docker/Dockerfile b/flink-container/docker/Dockerfile
@@ -25,25 +25,37 @@ RUN apk add --no-cache bash snappy libc6-compat
 ENV FLINK_INSTALL_PATH=/opt
 ENV FLINK_HOME $FLINK_INSTALL_PATH/flink
 ENV FLINK_LIB_DIR $FLINK_HOME/lib
+ENV FLINK_OPT_DIR $FLINK_HOME/opt
+ENV FLINK_JOB_ARTIFACTS_DIR $FLINK_INSTALL_PATH/artifacts
 ENV PATH $PATH:$FLINK_HOME/bin
 
 # flink-dist can point to a directory or a tarball on the local system
 ARG flink_dist=NOT_SET
-ARG job_jar=NOT_SET
+ARG job_artifacts=NOT_SET
+ARG python_version=NOT_SET
 # hadoop jar is optional
 ARG hadoop_jar=NOT_SET*
 
+# Install Python
+RUN \
+ if [ "$python_version" = "2" ]; then \
+ apk add --no-cache python; \
+ elif [ "$python_version" = "3" ]; then \
+ apk add --no-cache python3 && ln -s /usr/bin/python3 /usr/bin/python; \
+ fi
+
 # Install build dependencies and flink
 ADD $flink_dist $hadoop_jar $FLINK_INSTALL_PATH/
-ADD $job_jar $FLINK_INSTALL_PATH/job.jar
+ADD $job_artifacts/* $FLINK_JOB_ARTIFACTS_DIR/
 
 RUN set -x && \
  ln -s $FLINK_INSTALL_PATH/flink-[0-9]* $FLINK_HOME && \
- ln -s $FLINK_INSTALL_PATH/job.jar $FLINK_LIB_DIR && \
+ for jar in $FLINK_JOB_ARTIFACTS_DIR/*.jar; do [ -f "$jar" ] || continue; ln -s $jar $FLINK_LIB_DIR; done && \
+ if [ -n "$python_version" ]; then ln -s $FLINK_OPT_DIR/flink-python-*-java-binding.jar $FLINK_LIB_DIR; fi && \
  if [ -f ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* ]; then ln -s ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* $FLINK_LIB_DIR; fi && \
  addgroup -S flink && adduser -D -S -H -G flink -h $FLINK_HOME flink && \
  chown -R flink:flink ${FLINK_INSTALL_PATH}/flink-* && \
- chown -R flink:flink ${FLINK_INSTALL_PATH}/job.jar && \
+ chown -R flink:flink ${FLINK_JOB_ARTIFACTS_DIR}/ && \
  chown -h flink:flink $FLINK_HOME
 
 COPY docker-entrypoint.sh /

diff --git a/flink-container/docker/README.md b/flink-container/docker/README.md
@@ -13,26 +13,28 @@ Install the most recent stable version of [Docker](https://docs.docker.com/insta
 Images are based on the official Java Alpine (OpenJDK 8) image.
 
 Before building the image, one needs to build the user code jars for the job.
-Assume that the job jar is stored under `<PATH_TO_JOB_JAR>` 
+A Flink job can consist of multiple artifacts. In order to specify the required artifacts, they need to be passed to `--job-artifacts` of the build script. The individual paths are comma separated.
 
 If you want to build the Flink image from the version you have checked out locally run:
 
- build.sh --from-local-dist --job-jar <PATH_TO_JOB_JAR> --image-name <IMAGE_NAME>
+ build.sh --from-local-dist --job-artifacts <COMMA_SEPARATED_PATH_TO_JOB_ARTIFACTS> [--with-python2|--with-python3] --image-name <IMAGE_NAME>
 
 Note that you first need to call `mvn package -pl flink-dist -am` to build the Flink binaries.
 
 If you want to build the Flink image from an archive stored under `<PATH_TO_ARCHIVE>` run:
 
- build.sh --from-archive <PATH_TO_ARCHIVE> --job-jar <PATH_TO_JOB_JAR> --image-name <IMAGE_NAME>
+ build.sh --from-archive <PATH_TO_ARCHIVE> --job-artifacts <COMMA_SEPARATED_PATH_TO_JOB_ARTIFACTS> [--with-python2|--with-python3] --image-name <IMAGE_NAME>
 
 If you want to build the Flink image for a specific version of Flink/Hadoop/Scala run:
 
- build.sh --from-release --flink-version 1.6.0 --hadoop-version 2.8 --scala-version 2.11 --image-name <IMAGE_NAME>
+ build.sh --from-release --flink-version 1.6.0 --hadoop-version 2.8 --scala-version 2.11 --job-artifacts <COMMA_SEPARATED_PATH_TO_JOB_ARTIFACTS> [--with-python2|--with-python3] --image-name <IMAGE_NAME>
 
 Please note that from Flink-1.8, hadoop version is optional and you could build the Flink image without providing any hadoop version.
 
 The script will try to download the released version from the Apache archive.
 
+The artifacts specified in <COMMA_SEPARATED_PATH_TO_JOB_ARTIFACTS> will be copied to directory /opt/artifacts of the built image.
+
 ## Deploying via Docker compose
 
 The `docker-compose.yml` contains the following parameters:

diff --git a/flink-container/docker/build.sh b/flink-container/docker/build.sh
@@ -20,9 +20,9 @@
 usage() {
  cat <<HERE
 Usage:
- build.sh --job-jar <path-to-job-jar> --from-local-dist [--image-name <image>]
- build.sh --job-jar <path-to-job-jar> --from-archive <path-to-dist-archive> [--image-name <image>]
- build.sh --job-jar <path-to-job-jar> --from-release --flink-version <x.x.x> --scala-version <x.xx> [--hadoop-version <x.x>] [--image-name <image>]
+ build.sh --job-artifacts <comma-separated-paths-to-job-artifacts> [--with-python2|--with-python3] --from-local-dist [--image-name <image>]
+ build.sh --job-artifacts <comma-separated-paths-to-job-artifacts> [--with-python2|--with-python3] --from-archive <path-to-dist-archive> [--image-name <image>]
+ build.sh --job-artifacts <comma-separated-paths-to-job-artifacts> [--with-python2|--with-python3] --from-release --flink-version <x.x.x> --scala-version <x.xx> [--hadoop-version <x.x>] [--image-name <image>]
  build.sh --help
 
  If the --image-name flag is not used the built image name will be 'flink-job'.
@@ -35,10 +35,16 @@ while [[ $# -ge 1 ]]
 do
 key="$1"
  case $key in
- --job-jar)
- JOB_JAR_PATH="$2"
+ --job-artifacts)
+ JOB_ARTIFACTS_PATH="$2"
  shift
  ;;
+ --with-python2)
+ PYTHON_VERSION="2"
+ ;;
+ --with-python3)
+ PYTHON_VERSION="3"
+ ;;
  --from-local-dist)
  FROM_LOCAL="true"
  ;;
@@ -93,8 +99,16 @@ trap cleanup EXIT
 
 mkdir -p "${TMPDIR}"
 
-JOB_JAR_TARGET="${TMPDIR}/job.jar"
-cp ${JOB_JAR_PATH} ${JOB_JAR_TARGET}
+JOB_ARTIFACTS_TARGET="${TMPDIR}/artifacts"
+mkdir -p ${JOB_ARTIFACTS_TARGET}
+
+OLD_IFS="$IFS"
+IFS=","
+job_artifacts_array=(${JOB_ARTIFACTS_PATH})
+IFS="$OLD_IFS"
+for artifact in ${job_artifacts_array[@]}; do
+ cp ${artifact} ${JOB_ARTIFACTS_TARGET}/
+done
 
 checkUrlAvailable() {
  curl --output /dev/null --silent --head --fail $1
@@ -170,4 +184,4 @@ else
 
 fi
 
-docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg job_jar="${JOB_JAR_TARGET}" --build-arg hadoop_jar="${SHADED_HADOOP}" -t "${IMAGE_NAME}" .
+docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg job_artifacts="${JOB_ARTIFACTS_TARGET}" --build-arg hadoop_jar="${SHADED_HADOOP}" --build-arg python_version="${PYTHON_VERSION}" -t "${IMAGE_NAME}" .
diff --git a/flink-container/kubernetes/README.md b/flink-container/kubernetes/README.md
@@ -29,7 +29,7 @@ In non HA mode, you should first start the job cluster service:
 
 In order to deploy the job cluster entrypoint run:
 
-`FLINK_IMAGE_NAME=<IMAGE_NAME> FLINK_JOB=<JOB_NAME> FLINK_JOB_PARALLELISM=<PARALLELISM> envsubst < job-cluster-job.yaml.template | kubectl create -f -`
+`FLINK_IMAGE_NAME=<IMAGE_NAME> FLINK_JOB_PARALLELISM=<PARALLELISM> envsubst < job-cluster-job.yaml.template | kubectl create -f -`
 
 Now you should see the `flink-job-cluster` job being started by calling `kubectl get job`.