Skip to content

Commit

Permalink
[FLINK-12416] Fix docker build scripts on Flink-1.8
Browse files Browse the repository at this point in the history
This closes apache#8391.
  • Loading branch information
Myasuka authored and tillrohrmann committed May 24, 2019
1 parent 3050957 commit f1c3ac4
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 20 deletions.
10 changes: 7 additions & 3 deletions flink-container/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,20 @@ ENV PATH $PATH:$FLINK_HOME/bin
# flink-dist can point to a directory or a tarball on the local system
ARG flink_dist=NOT_SET
ARG job_jar=NOT_SET
# hadoop jar is optional
ARG hadoop_jar=NOT_SET*

# Install build dependencies and flink
ADD $flink_dist $FLINK_INSTALL_PATH
ADD $flink_dist $hadoop_jar $FLINK_INSTALL_PATH/
ADD $job_jar $FLINK_INSTALL_PATH/job.jar

RUN set -x && \
ln -s $FLINK_INSTALL_PATH/flink-* $FLINK_HOME && \
ln -s $FLINK_INSTALL_PATH/flink-[0-9]* $FLINK_HOME && \
ln -s $FLINK_INSTALL_PATH/job.jar $FLINK_LIB_DIR && \
if [ -f ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* ]; then ln -s ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* $FLINK_LIB_DIR; fi && \
addgroup -S flink && adduser -D -S -H -G flink -h $FLINK_HOME flink && \
chown -R flink:flink $FLINK_INSTALL_PATH/flink-* && \
chown -R flink:flink ${FLINK_INSTALL_PATH}/flink-* && \
chown -R flink:flink ${FLINK_INSTALL_PATH}/job.jar && \
chown -h flink:flink $FLINK_HOME

COPY docker-entrypoint.sh /
Expand Down
2 changes: 2 additions & 0 deletions flink-container/docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ If you want to build the Flink image from an archive stored under `<PATH_TO_ARCH
If you want to build the Flink image for a specific version of Flink/Hadoop/Scala run:

build.sh --from-release --flink-version 1.6.0 --hadoop-version 2.8 --scala-version 2.11 --image-name <IMAGE_NAME>

Please note that from Flink-1.8, hadoop version is optional and you could build the Flink image without providing any hadoop version.

The script will try to download the released version from the Apache archive.

Expand Down
63 changes: 55 additions & 8 deletions flink-container/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ usage() {
Usage:
build.sh --job-jar <path-to-job-jar> --from-local-dist [--image-name <image>]
build.sh --job-jar <path-to-job-jar> --from-archive <path-to-dist-archive> [--image-name <image>]
build.sh --job-jar <path-to-job-jar> --from-release --flink-version <x.x.x> --hadoop-version <x.x> --scala-version <x.xx> [--image-name <image>]
build.sh --job-jar <path-to-job-jar> --from-release --flink-version <x.x.x> --scala-version <x.xx> [--hadoop-version <x.x>] [--image-name <image>]
build.sh --help
If the --image-name flag is not used the built image name will be 'flink'.
If the --image-name flag is not used the built image name will be 'flink-job'.
Before Flink-1.8, the hadoop-version is required. And from Flink-1.8, the hadoop-version is optional and would download pre-bundled shaded Hadoop jar package if provided.
HERE
exit 1
}
Expand Down Expand Up @@ -57,7 +58,8 @@ key="$1"
shift
;;
--hadoop-version)
HADOOP_VERSION="$(echo "$2" | sed 's/\.//')"
HADOOP_VERSION="$2"
HADOOP_MAJOR_VERSION="$(echo ${HADOOP_VERSION} | sed 's/\.//')"
shift
;;
--scala-version)
Expand Down Expand Up @@ -94,15 +96,60 @@ mkdir -p "${TMPDIR}"
JOB_JAR_TARGET="${TMPDIR}/job.jar"
cp ${JOB_JAR_PATH} ${JOB_JAR_TARGET}

checkUrlAvailable() {
curl --output /dev/null --silent --head --fail $1
ret=$?
if [[ ${ret} -ne 0 ]]; then
echo "The url $1 not available, please check your parameters, exit..."
usage
exit 2
fi
}

if [ -n "${FROM_RELEASE}" ]; then

[[ -n "${FLINK_VERSION}" ]] && [[ -n "${HADOOP_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage
[[ -n "${FLINK_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage

FLINK_BASE_URL="$(curl -s https://www.apache.org/dyn/closer.cgi\?preferred\=true)flink/flink-${FLINK_VERSION}/"

FLINK_MAJOR_VERSION=$(echo "$FLINK_VERSION" | sed -e 's/\.//;s/\(..\).*/\1/')

if [[ $FLINK_MAJOR_VERSION -ge 18 ]]; then

# After Flink-1.8 we would let release pre-built package with hadoop
if [[ -n "${HADOOP_VERSION}" ]]; then
echo "After Flink-1.8, we would download pre-bundle hadoop jar package."
# list to get target pre-bundle package
SHADED_HADOOP_BASE_URL="https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop2-uber/"
SHADED_HADOOP_VERSION="$(curl -s ${SHADED_HADOOP_BASE_URL} | grep -o "title=\"[0-9.-]*/\"" | sed 's/title=\"//g; s/\/"//g' | grep ${HADOOP_VERSION} | head -1)"
SHADED_HADOOP_FILE_NAME="flink-shaded-hadoop2-uber-${SHADED_HADOOP_VERSION}.jar"

CURL_OUTPUT_SHADED_HADOOP="${TMPDIR}/${SHADED_HADOOP_FILE_NAME}"

DOWNLOAD_SHADED_HADOOP_URL=${SHADED_HADOOP_BASE_URL}${SHADED_HADOOP_VERSION}/${SHADED_HADOOP_FILE_NAME}
checkUrlAvailable ${DOWNLOAD_SHADED_HADOOP_URL}

echo "Downloading ${SHADED_HADOOP_FILE_NAME} from ${DOWNLOAD_SHADED_HADOOP_URL}"

curl -# ${DOWNLOAD_SHADED_HADOOP_URL} --output ${CURL_OUTPUT_SHADED_HADOOP}
SHADED_HADOOP="${CURL_OUTPUT_SHADED_HADOOP}"
fi
FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-scala_${SCALA_VERSION}.tgz"
elif [[ -z "${HADOOP_VERSION}" ]]; then
usage
else
FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_MAJOR_VERSION}-scala_${SCALA_VERSION}.tgz"
fi


FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala_${SCALA_VERSION}.tgz"
CURL_OUTPUT="${TMPDIR}/${FLINK_DIST_FILE_NAME}"

echo "Downloading ${FLINK_DIST_FILE_NAME} from ${FLINK_BASE_URL}"
curl -# "https://archive.apache.org/dist/flink/flink-${FLINK_VERSION}/${FLINK_DIST_FILE_NAME}" --output ${CURL_OUTPUT}
DOWNLOAD_FLINK_URL=${FLINK_BASE_URL}${FLINK_DIST_FILE_NAME}
checkUrlAvailable ${DOWNLOAD_FLINK_URL}

echo "Downloading ${FLINK_DIST_FILE_NAME} from ${DOWNLOAD_FLINK_URL}"

curl -# ${DOWNLOAD_FLINK_URL} --output ${CURL_OUTPUT}

FLINK_DIST="${CURL_OUTPUT}"

Expand All @@ -123,4 +170,4 @@ else

fi

docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg job_jar="${JOB_JAR_TARGET}" -t "${IMAGE_NAME}" .
docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg job_jar="${JOB_JAR_TARGET}" --build-arg hadoop_jar="${SHADED_HADOOP}" -t "${IMAGE_NAME}" .
5 changes: 4 additions & 1 deletion flink-contrib/docker-flink/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,14 @@ ENV PATH $PATH:$FLINK_HOME/bin

# flink-dist can point to a directory or a tarball on the local system
ARG flink_dist=NOT_SET
# hadoop jar is optional
ARG hadoop_jar=NOT_SET*

# Install build dependencies and flink
ADD $flink_dist $FLINK_INSTALL_PATH
ADD $flink_dist $hadoop_jar $FLINK_INSTALL_PATH/
RUN set -x && \
ln -s $FLINK_INSTALL_PATH/flink-* $FLINK_HOME && \
if [ -f ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* ]; then ln -s ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* $FLINK_LIB_DIR; fi && \
addgroup -S flink && adduser -D -S -H -G flink -h $FLINK_HOME flink && \
chown -R flink:flink $FLINK_INSTALL_PATH/flink-* && \
chown -h flink:flink $FLINK_HOME
Expand Down
2 changes: 1 addition & 1 deletion flink-contrib/docker-flink/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ or
If you want to build the container for a specific version of flink/hadoop/scala
you can configure it in the respective args:

docker build --build-arg FLINK_VERSION=1.0.3 --build-arg HADOOP_VERSION=26 --build-arg SCALA_VERSION=2.10 -t "flink:1.0.3-hadoop2.6-scala_2.10" flink
build.sh --from-release --flink-version 1.8.0 --hadoop-version 2.8 --scala-version 2.11 --image-name <IMAGE_NAME>

# Deploy

Expand Down
60 changes: 53 additions & 7 deletions flink-contrib/docker-flink/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ usage() {
cat <<HERE
Usage:
build.sh --from-local-dist [--image-name <image>]
build.sh --from-release --flink-version <x.x.x> --hadoop-version <x.x> --scala-version <x.xx> [--image-name <image>]
build.sh --from-release --flink-version <x.x.x> --scala-version <x.xx> --hadoop-version <x.x> [--image-name <image>]
build.sh --help
If the --image-name flag is not used the built image name will be 'flink'.
Before Flink-1.8, the hadoop-version is required. And from Flink-1.8, the hadoop-version is optional and would download pre-bundled shaded Hadoop jar package if provided.
HERE
exit 1
}
Expand All @@ -49,7 +50,8 @@ key="$1"
shift
;;
--hadoop-version)
HADOOP_VERSION="$(echo "$2" | sed 's/\.//')"
HADOOP_VERSION="$2"
HADOOP_MAJOR_VERSION="$(echo ${HADOOP_VERSION} | sed 's/\.//')"
shift
;;
--scala-version)
Expand Down Expand Up @@ -79,16 +81,60 @@ trap cleanup EXIT

mkdir -p "${TMPDIR}"

checkUrlAvailable() {
curl --output /dev/null --silent --head --fail $1
ret=$?
if [[ ${ret} -ne 0 ]]; then
echo "The url $1 not available, please check your parameters, exit..."
usage
exit 2
fi
}

if [ -n "${FROM_RELEASE}" ]; then

[[ -n "${FLINK_VERSION}" ]] && [[ -n "${HADOOP_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage
[[ -n "${FLINK_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage

FLINK_BASE_URL="$(curl -s https://www.apache.org/dyn/closer.cgi\?preferred\=true)flink/flink-${FLINK_VERSION}/"
FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala_${SCALA_VERSION}.tgz"

FLINK_MAJOR_VERSION=$(echo "$FLINK_VERSION" | sed -e 's/\.//;s/\(..\).*/\1/')

if [[ $FLINK_MAJOR_VERSION -ge 18 ]]; then

# After Flink-1.8 we would let release pre-built package with hadoop
if [[ -n "${HADOOP_VERSION}" ]]; then
echo "After Flink-1.8, we would download pre-bundle hadoop jar package."
# list to get target pre-bundle package
SHADED_HADOOP_BASE_URL="https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop2-uber/"
SHADED_HADOOP_VERSION="$(curl -s ${SHADED_HADOOP_BASE_URL} | grep -o "title=\"[0-9.-]*/\"" | sed 's/title=\"//g; s/\/"//g' | grep ${HADOOP_VERSION} | head -1)"
SHADED_HADOOP_FILE_NAME="flink-shaded-hadoop2-uber-${SHADED_HADOOP_VERSION}.jar"

CURL_OUTPUT_SHADED_HADOOP="${TMPDIR}/${SHADED_HADOOP_FILE_NAME}"

DOWNLOAD_SHADED_HADOOP_URL=${SHADED_HADOOP_BASE_URL}${SHADED_HADOOP_VERSION}/${SHADED_HADOOP_FILE_NAME}
checkUrlAvailable ${DOWNLOAD_SHADED_HADOOP_URL}

echo "Downloading ${SHADED_HADOOP_FILE_NAME} from ${DOWNLOAD_SHADED_HADOOP_URL}"

curl -# ${DOWNLOAD_SHADED_HADOOP_URL} --output ${CURL_OUTPUT_SHADED_HADOOP}
SHADED_HADOOP="${CURL_OUTPUT_SHADED_HADOOP}"
fi
FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-scala_${SCALA_VERSION}.tgz"
elif [[ -z "${HADOOP_VERSION}" ]]; then
usage
else
FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_MAJOR_VERSION}-scala_${SCALA_VERSION}.tgz"
fi


CURL_OUTPUT="${TMPDIR}/${FLINK_DIST_FILE_NAME}"

echo "Downloading ${FLINK_DIST_FILE_NAME} from ${FLINK_BASE_URL}"
curl -s ${FLINK_BASE_URL}${FLINK_DIST_FILE_NAME} --output ${CURL_OUTPUT}
DOWNLOAD_FLINK_URL=${FLINK_BASE_URL}${FLINK_DIST_FILE_NAME}
checkUrlAvailable ${DOWNLOAD_FLINK_URL}

echo "Downloading ${FLINK_DIST_FILE_NAME} from ${DOWNLOAD_FLINK_URL}"

curl -# ${DOWNLOAD_FLINK_URL} --output ${CURL_OUTPUT}

FLINK_DIST="${CURL_OUTPUT}"

Expand All @@ -105,4 +151,4 @@ else

fi

docker build --build-arg flink_dist="${FLINK_DIST}" -t "${IMAGE_NAME}" .
docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg hadoop_jar="${SHADED_HADOOP}" -t "${IMAGE_NAME}" .

0 comments on commit f1c3ac4

Please sign in to comment.