From 00d90a4fe3ff93d19ff6be3ed7b9cf9d0f5b0dfd Mon Sep 17 00:00:00 2001 From: Dian Fu Date: Thu, 13 Jun 2019 10:55:38 +0200 Subject: [PATCH] [FLINK-12788][container] Add support for Python jobs in build script This closes #8609. --- flink-container/docker/Dockerfile | 20 ++++++++++++++++---- flink-container/docker/README.md | 10 ++++++---- flink-container/docker/build.sh | 30 ++++++++++++++++++++++-------- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/flink-container/docker/Dockerfile b/flink-container/docker/Dockerfile index 5839d1a1e85be..853ac3600d02a 100644 --- a/flink-container/docker/Dockerfile +++ b/flink-container/docker/Dockerfile @@ -25,25 +25,37 @@ RUN apk add --no-cache bash snappy libc6-compat ENV FLINK_INSTALL_PATH=/opt ENV FLINK_HOME $FLINK_INSTALL_PATH/flink ENV FLINK_LIB_DIR $FLINK_HOME/lib +ENV FLINK_OPT_DIR $FLINK_HOME/opt +ENV FLINK_JOB_ARTIFACTS_DIR $FLINK_INSTALL_PATH/artifacts ENV PATH $PATH:$FLINK_HOME/bin # flink-dist can point to a directory or a tarball on the local system ARG flink_dist=NOT_SET -ARG job_jar=NOT_SET +ARG job_artifacts=NOT_SET +ARG python_version=NOT_SET # hadoop jar is optional ARG hadoop_jar=NOT_SET* +# Install Python +RUN \ + if [ "$python_version" = "2" ]; then \ + apk add --no-cache python; \ + elif [ "$python_version" = "3" ]; then \ + apk add --no-cache python3 && ln -s /usr/bin/python3 /usr/bin/python; \ + fi + # Install build dependencies and flink ADD $flink_dist $hadoop_jar $FLINK_INSTALL_PATH/ -ADD $job_jar $FLINK_INSTALL_PATH/job.jar +ADD $job_artifacts/* $FLINK_JOB_ARTIFACTS_DIR/ RUN set -x && \ ln -s $FLINK_INSTALL_PATH/flink-[0-9]* $FLINK_HOME && \ - ln -s $FLINK_INSTALL_PATH/job.jar $FLINK_LIB_DIR && \ + for jar in $FLINK_JOB_ARTIFACTS_DIR/*.jar; do [ -f "$jar" ] || continue; ln -s $jar $FLINK_LIB_DIR; done && \ + if [ -n "$python_version" ]; then ln -s $FLINK_OPT_DIR/flink-python-*-java-binding.jar $FLINK_LIB_DIR; fi && \ if [ -f ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* ]; then ln -s ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* $FLINK_LIB_DIR; fi && \ addgroup -S flink && adduser -D -S -H -G flink -h $FLINK_HOME flink && \ chown -R flink:flink ${FLINK_INSTALL_PATH}/flink-* && \ - chown -R flink:flink ${FLINK_INSTALL_PATH}/job.jar && \ + chown -R flink:flink ${FLINK_JOB_ARTIFACTS_DIR}/ && \ chown -h flink:flink $FLINK_HOME COPY docker-entrypoint.sh / diff --git a/flink-container/docker/README.md b/flink-container/docker/README.md index b1569a3052f43..42e49723909f7 100644 --- a/flink-container/docker/README.md +++ b/flink-container/docker/README.md @@ -13,26 +13,28 @@ Install the most recent stable version of [Docker](https://docs.docker.com/insta Images are based on the official Java Alpine (OpenJDK 8) image. Before building the image, one needs to build the user code jars for the job. -Assume that the job jar is stored under `` +A Flink job can consist of multiple artifacts. In order to specify the required artifacts, they need to be passed to `--job-artifacts` of the build script. The individual paths are comma separated. If you want to build the Flink image from the version you have checked out locally run: - build.sh --from-local-dist --job-jar --image-name + build.sh --from-local-dist --job-artifacts [--with-python2|--with-python3] --image-name Note that you first need to call `mvn package -pl flink-dist -am` to build the Flink binaries. If you want to build the Flink image from an archive stored under `` run: - build.sh --from-archive --job-jar --image-name + build.sh --from-archive --job-artifacts [--with-python2|--with-python3] --image-name If you want to build the Flink image for a specific version of Flink/Hadoop/Scala run: - build.sh --from-release --flink-version 1.6.0 --hadoop-version 2.8 --scala-version 2.11 --image-name + build.sh --from-release --flink-version 1.6.0 --hadoop-version 2.8 --scala-version 2.11 --job-artifacts [--with-python2|--with-python3] --image-name Please note that from Flink-1.8, hadoop version is optional and you could build the Flink image without providing any hadoop version. The script will try to download the released version from the Apache archive. +The artifacts specified in will be copied to directory /opt/artifacts of the built image. + ## Deploying via Docker compose The `docker-compose.yml` contains the following parameters: diff --git a/flink-container/docker/build.sh b/flink-container/docker/build.sh index 179260369971e..937cb6959daea 100755 --- a/flink-container/docker/build.sh +++ b/flink-container/docker/build.sh @@ -20,9 +20,9 @@ usage() { cat < --from-local-dist [--image-name ] - build.sh --job-jar --from-archive [--image-name ] - build.sh --job-jar --from-release --flink-version --scala-version [--hadoop-version ] [--image-name ] + build.sh --job-artifacts [--with-python2|--with-python3] --from-local-dist [--image-name ] + build.sh --job-artifacts [--with-python2|--with-python3] --from-archive [--image-name ] + build.sh --job-artifacts [--with-python2|--with-python3] --from-release --flink-version --scala-version [--hadoop-version ] [--image-name ] build.sh --help If the --image-name flag is not used the built image name will be 'flink-job'. @@ -35,10 +35,16 @@ while [[ $# -ge 1 ]] do key="$1" case $key in - --job-jar) - JOB_JAR_PATH="$2" + --job-artifacts) + JOB_ARTIFACTS_PATH="$2" shift ;; + --with-python2) + PYTHON_VERSION="2" + ;; + --with-python3) + PYTHON_VERSION="3" + ;; --from-local-dist) FROM_LOCAL="true" ;; @@ -93,8 +99,16 @@ trap cleanup EXIT mkdir -p "${TMPDIR}" -JOB_JAR_TARGET="${TMPDIR}/job.jar" -cp ${JOB_JAR_PATH} ${JOB_JAR_TARGET} +JOB_ARTIFACTS_TARGET="${TMPDIR}/artifacts" +mkdir -p ${JOB_ARTIFACTS_TARGET} + +OLD_IFS="$IFS" +IFS="," +job_artifacts_array=(${JOB_ARTIFACTS_PATH}) +IFS="$OLD_IFS" +for artifact in ${job_artifacts_array[@]}; do + cp ${artifact} ${JOB_ARTIFACTS_TARGET}/ +done checkUrlAvailable() { curl --output /dev/null --silent --head --fail $1 @@ -170,4 +184,4 @@ else fi -docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg job_jar="${JOB_JAR_TARGET}" --build-arg hadoop_jar="${SHADED_HADOOP}" -t "${IMAGE_NAME}" . +docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg job_artifacts="${JOB_ARTIFACTS_TARGET}" --build-arg hadoop_jar="${SHADED_HADOOP}" --build-arg python_version="${PYTHON_VERSION}" -t "${IMAGE_NAME}" .