diff --git a/flink-container/docker/Dockerfile b/flink-container/docker/Dockerfile index 57454799dd8e4..5839d1a1e85be 100644 --- a/flink-container/docker/Dockerfile +++ b/flink-container/docker/Dockerfile @@ -30,16 +30,20 @@ ENV PATH $PATH:$FLINK_HOME/bin # flink-dist can point to a directory or a tarball on the local system ARG flink_dist=NOT_SET ARG job_jar=NOT_SET +# hadoop jar is optional +ARG hadoop_jar=NOT_SET* # Install build dependencies and flink -ADD $flink_dist $FLINK_INSTALL_PATH +ADD $flink_dist $hadoop_jar $FLINK_INSTALL_PATH/ ADD $job_jar $FLINK_INSTALL_PATH/job.jar RUN set -x && \ - ln -s $FLINK_INSTALL_PATH/flink-* $FLINK_HOME && \ + ln -s $FLINK_INSTALL_PATH/flink-[0-9]* $FLINK_HOME && \ ln -s $FLINK_INSTALL_PATH/job.jar $FLINK_LIB_DIR && \ + if [ -f ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* ]; then ln -s ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* $FLINK_LIB_DIR; fi && \ addgroup -S flink && adduser -D -S -H -G flink -h $FLINK_HOME flink && \ - chown -R flink:flink $FLINK_INSTALL_PATH/flink-* && \ + chown -R flink:flink ${FLINK_INSTALL_PATH}/flink-* && \ + chown -R flink:flink ${FLINK_INSTALL_PATH}/job.jar && \ chown -h flink:flink $FLINK_HOME COPY docker-entrypoint.sh / diff --git a/flink-container/docker/README.md b/flink-container/docker/README.md index 3ff70c6b3bd5f..b1569a3052f43 100644 --- a/flink-container/docker/README.md +++ b/flink-container/docker/README.md @@ -28,6 +28,8 @@ If you want to build the Flink image from an archive stored under ` + +Please note that from Flink-1.8, hadoop version is optional and you could build the Flink image without providing any hadoop version. The script will try to download the released version from the Apache archive. diff --git a/flink-container/docker/build.sh b/flink-container/docker/build.sh index 614a9c3790368..179260369971e 100755 --- a/flink-container/docker/build.sh +++ b/flink-container/docker/build.sh @@ -22,10 +22,11 @@ usage() { Usage: build.sh --job-jar --from-local-dist [--image-name ] build.sh --job-jar --from-archive [--image-name ] - build.sh --job-jar --from-release --flink-version --hadoop-version --scala-version [--image-name ] + build.sh --job-jar --from-release --flink-version --scala-version [--hadoop-version ] [--image-name ] build.sh --help - If the --image-name flag is not used the built image name will be 'flink'. + If the --image-name flag is not used the built image name will be 'flink-job'. + Before Flink-1.8, the hadoop-version is required. And from Flink-1.8, the hadoop-version is optional and would download pre-bundled shaded Hadoop jar package if provided. HERE exit 1 } @@ -57,7 +58,8 @@ key="$1" shift ;; --hadoop-version) - HADOOP_VERSION="$(echo "$2" | sed 's/\.//')" + HADOOP_VERSION="$2" + HADOOP_MAJOR_VERSION="$(echo ${HADOOP_VERSION} | sed 's/\.//')" shift ;; --scala-version) @@ -94,15 +96,60 @@ mkdir -p "${TMPDIR}" JOB_JAR_TARGET="${TMPDIR}/job.jar" cp ${JOB_JAR_PATH} ${JOB_JAR_TARGET} +checkUrlAvailable() { + curl --output /dev/null --silent --head --fail $1 + ret=$? + if [[ ${ret} -ne 0 ]]; then + echo "The url $1 not available, please check your parameters, exit..." + usage + exit 2 + fi +} + if [ -n "${FROM_RELEASE}" ]; then - [[ -n "${FLINK_VERSION}" ]] && [[ -n "${HADOOP_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage + [[ -n "${FLINK_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage + + FLINK_BASE_URL="$(curl -s https://www.apache.org/dyn/closer.cgi\?preferred\=true)flink/flink-${FLINK_VERSION}/" + + FLINK_MAJOR_VERSION=$(echo "$FLINK_VERSION" | sed -e 's/\.//;s/\(..\).*/\1/') + + if [[ $FLINK_MAJOR_VERSION -ge 18 ]]; then + + # After Flink-1.8 we would let release pre-built package with hadoop + if [[ -n "${HADOOP_VERSION}" ]]; then + echo "After Flink-1.8, we would download pre-bundle hadoop jar package." + # list to get target pre-bundle package + SHADED_HADOOP_BASE_URL="https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop2-uber/" + SHADED_HADOOP_VERSION="$(curl -s ${SHADED_HADOOP_BASE_URL} | grep -o "title=\"[0-9.-]*/\"" | sed 's/title=\"//g; s/\/"//g' | grep ${HADOOP_VERSION} | head -1)" + SHADED_HADOOP_FILE_NAME="flink-shaded-hadoop2-uber-${SHADED_HADOOP_VERSION}.jar" + + CURL_OUTPUT_SHADED_HADOOP="${TMPDIR}/${SHADED_HADOOP_FILE_NAME}" + + DOWNLOAD_SHADED_HADOOP_URL=${SHADED_HADOOP_BASE_URL}${SHADED_HADOOP_VERSION}/${SHADED_HADOOP_FILE_NAME} + checkUrlAvailable ${DOWNLOAD_SHADED_HADOOP_URL} + + echo "Downloading ${SHADED_HADOOP_FILE_NAME} from ${DOWNLOAD_SHADED_HADOOP_URL}" + + curl -# ${DOWNLOAD_SHADED_HADOOP_URL} --output ${CURL_OUTPUT_SHADED_HADOOP} + SHADED_HADOOP="${CURL_OUTPUT_SHADED_HADOOP}" + fi + FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-scala_${SCALA_VERSION}.tgz" + elif [[ -z "${HADOOP_VERSION}" ]]; then + usage + else + FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_MAJOR_VERSION}-scala_${SCALA_VERSION}.tgz" + fi + - FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala_${SCALA_VERSION}.tgz" CURL_OUTPUT="${TMPDIR}/${FLINK_DIST_FILE_NAME}" - echo "Downloading ${FLINK_DIST_FILE_NAME} from ${FLINK_BASE_URL}" - curl -# "https://archive.apache.org/dist/flink/flink-${FLINK_VERSION}/${FLINK_DIST_FILE_NAME}" --output ${CURL_OUTPUT} + DOWNLOAD_FLINK_URL=${FLINK_BASE_URL}${FLINK_DIST_FILE_NAME} + checkUrlAvailable ${DOWNLOAD_FLINK_URL} + + echo "Downloading ${FLINK_DIST_FILE_NAME} from ${DOWNLOAD_FLINK_URL}" + + curl -# ${DOWNLOAD_FLINK_URL} --output ${CURL_OUTPUT} FLINK_DIST="${CURL_OUTPUT}" @@ -123,4 +170,4 @@ else fi -docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg job_jar="${JOB_JAR_TARGET}" -t "${IMAGE_NAME}" . +docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg job_jar="${JOB_JAR_TARGET}" --build-arg hadoop_jar="${SHADED_HADOOP}" -t "${IMAGE_NAME}" . diff --git a/flink-contrib/docker-flink/Dockerfile b/flink-contrib/docker-flink/Dockerfile index d24a68c9235a3..78ece73d5082b 100644 --- a/flink-contrib/docker-flink/Dockerfile +++ b/flink-contrib/docker-flink/Dockerfile @@ -28,11 +28,14 @@ ENV PATH $PATH:$FLINK_HOME/bin # flink-dist can point to a directory or a tarball on the local system ARG flink_dist=NOT_SET +# hadoop jar is optional +ARG hadoop_jar=NOT_SET* # Install build dependencies and flink -ADD $flink_dist $FLINK_INSTALL_PATH +ADD $flink_dist $hadoop_jar $FLINK_INSTALL_PATH/ RUN set -x && \ ln -s $FLINK_INSTALL_PATH/flink-* $FLINK_HOME && \ + if [ -f ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* ]; then ln -s ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* $FLINK_LIB_DIR; fi && \ addgroup -S flink && adduser -D -S -H -G flink -h $FLINK_HOME flink && \ chown -R flink:flink $FLINK_INSTALL_PATH/flink-* && \ chown -h flink:flink $FLINK_HOME diff --git a/flink-contrib/docker-flink/README.md b/flink-contrib/docker-flink/README.md index c7d94bbcfcfee..990244a77b37e 100644 --- a/flink-contrib/docker-flink/README.md +++ b/flink-contrib/docker-flink/README.md @@ -35,7 +35,7 @@ or If you want to build the container for a specific version of flink/hadoop/scala you can configure it in the respective args: - docker build --build-arg FLINK_VERSION=1.0.3 --build-arg HADOOP_VERSION=26 --build-arg SCALA_VERSION=2.10 -t "flink:1.0.3-hadoop2.6-scala_2.10" flink + build.sh --from-release --flink-version 1.8.0 --hadoop-version 2.8 --scala-version 2.11 --image-name # Deploy diff --git a/flink-contrib/docker-flink/build.sh b/flink-contrib/docker-flink/build.sh index 26557a2337da9..4bbc65a344486 100755 --- a/flink-contrib/docker-flink/build.sh +++ b/flink-contrib/docker-flink/build.sh @@ -22,10 +22,11 @@ usage() { cat <] - build.sh --from-release --flink-version --hadoop-version --scala-version [--image-name ] + build.sh --from-release --flink-version --scala-version --hadoop-version [--image-name ] build.sh --help If the --image-name flag is not used the built image name will be 'flink'. + Before Flink-1.8, the hadoop-version is required. And from Flink-1.8, the hadoop-version is optional and would download pre-bundled shaded Hadoop jar package if provided. HERE exit 1 } @@ -49,7 +50,8 @@ key="$1" shift ;; --hadoop-version) - HADOOP_VERSION="$(echo "$2" | sed 's/\.//')" + HADOOP_VERSION="$2" + HADOOP_MAJOR_VERSION="$(echo ${HADOOP_VERSION} | sed 's/\.//')" shift ;; --scala-version) @@ -79,16 +81,60 @@ trap cleanup EXIT mkdir -p "${TMPDIR}" +checkUrlAvailable() { + curl --output /dev/null --silent --head --fail $1 + ret=$? + if [[ ${ret} -ne 0 ]]; then + echo "The url $1 not available, please check your parameters, exit..." + usage + exit 2 + fi +} + if [ -n "${FROM_RELEASE}" ]; then - [[ -n "${FLINK_VERSION}" ]] && [[ -n "${HADOOP_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage + [[ -n "${FLINK_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage FLINK_BASE_URL="$(curl -s https://www.apache.org/dyn/closer.cgi\?preferred\=true)flink/flink-${FLINK_VERSION}/" - FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala_${SCALA_VERSION}.tgz" + + FLINK_MAJOR_VERSION=$(echo "$FLINK_VERSION" | sed -e 's/\.//;s/\(..\).*/\1/') + + if [[ $FLINK_MAJOR_VERSION -ge 18 ]]; then + + # After Flink-1.8 we would let release pre-built package with hadoop + if [[ -n "${HADOOP_VERSION}" ]]; then + echo "After Flink-1.8, we would download pre-bundle hadoop jar package." + # list to get target pre-bundle package + SHADED_HADOOP_BASE_URL="https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop2-uber/" + SHADED_HADOOP_VERSION="$(curl -s ${SHADED_HADOOP_BASE_URL} | grep -o "title=\"[0-9.-]*/\"" | sed 's/title=\"//g; s/\/"//g' | grep ${HADOOP_VERSION} | head -1)" + SHADED_HADOOP_FILE_NAME="flink-shaded-hadoop2-uber-${SHADED_HADOOP_VERSION}.jar" + + CURL_OUTPUT_SHADED_HADOOP="${TMPDIR}/${SHADED_HADOOP_FILE_NAME}" + + DOWNLOAD_SHADED_HADOOP_URL=${SHADED_HADOOP_BASE_URL}${SHADED_HADOOP_VERSION}/${SHADED_HADOOP_FILE_NAME} + checkUrlAvailable ${DOWNLOAD_SHADED_HADOOP_URL} + + echo "Downloading ${SHADED_HADOOP_FILE_NAME} from ${DOWNLOAD_SHADED_HADOOP_URL}" + + curl -# ${DOWNLOAD_SHADED_HADOOP_URL} --output ${CURL_OUTPUT_SHADED_HADOOP} + SHADED_HADOOP="${CURL_OUTPUT_SHADED_HADOOP}" + fi + FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-scala_${SCALA_VERSION}.tgz" + elif [[ -z "${HADOOP_VERSION}" ]]; then + usage + else + FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_MAJOR_VERSION}-scala_${SCALA_VERSION}.tgz" + fi + + CURL_OUTPUT="${TMPDIR}/${FLINK_DIST_FILE_NAME}" - echo "Downloading ${FLINK_DIST_FILE_NAME} from ${FLINK_BASE_URL}" - curl -s ${FLINK_BASE_URL}${FLINK_DIST_FILE_NAME} --output ${CURL_OUTPUT} + DOWNLOAD_FLINK_URL=${FLINK_BASE_URL}${FLINK_DIST_FILE_NAME} + checkUrlAvailable ${DOWNLOAD_FLINK_URL} + + echo "Downloading ${FLINK_DIST_FILE_NAME} from ${DOWNLOAD_FLINK_URL}" + + curl -# ${DOWNLOAD_FLINK_URL} --output ${CURL_OUTPUT} FLINK_DIST="${CURL_OUTPUT}" @@ -105,4 +151,4 @@ else fi -docker build --build-arg flink_dist="${FLINK_DIST}" -t "${IMAGE_NAME}" . +docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg hadoop_jar="${SHADED_HADOOP}" -t "${IMAGE_NAME}" .