Skip to content

Commit

Permalink
Add tensorflow-gcs-config to the tensorflow-whl build (Kaggle#784)
Browse files Browse the repository at this point in the history
Add a copy of tensorflow-gcs-config source and modify tensorflow-whl/Dockerfile to build the tensorflow_gcs_config wheel.

http:https://b/152051681
  • Loading branch information
mcollins42 committed Apr 22, 2020
1 parent 28d87fe commit b47e95c
Show file tree
Hide file tree
Showing 18 changed files with 962 additions and 1 deletion.
3 changes: 2 additions & 1 deletion tensorflow-whl/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@
* `2.1.0-py36`: TensorFlow 2.1.0 with Python 3.6
* `2.1.0-py36-2`: TensorFlow 2.1.0 with CUDA 10.1
* `2.1.0-py37`: TensorFlow 2.1.0 with Python 3.7
* `2.1.0-py37-2`: TensorFlow 2.1.0 with Python 3.7 & DLVM base image.
* `2.1.0-py37-2`: TensorFlow 2.1.0 with Python 3.7 & DLVM base image.
* `2.1.0-py37-3`: TensorFlow 2.1.0 with Python 3.7, DLVM base image, tensorflow-gcs-config.
9 changes: 9 additions & 0 deletions tensorflow-whl/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,14 @@ RUN cd /usr/local/src/tensorflow && \
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_gpu && \
bazel clean

ADD tensorflow-gcs-config /usr/local/src/tensorflow_gcs_config/

# Build tensorflow_gcs_config library against the tensorflow_cpu build
RUN cd /usr/local/src/tensorflow_gcs_config && \
apt-get install -y libcurl4-openssl-dev && \
pip install /tmp/tensorflow_cpu/tensorflow*.whl && \
python setup.py bdist_wheel -d /tmp/tensorflow_gcs_config && \
bazel clean

# Print out the built .whl files
RUN ls -R /tmp/tensorflow*
12 changes: 12 additions & 0 deletions tensorflow-whl/tensorflow-gcs-config/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
bazel-bin
bazel-genfiles
bazel-out
bazel-tensorflow-gcs-config
bazel-out
bazel-testlogs
build
dist
__pycache__
*.egg-info
*.so
.bazelrc
62 changes: 62 additions & 0 deletions tensorflow-whl/tensorflow-gcs-config/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
FROM tensorflow/tensorflow:custom-op

ARG TF_VERSION
ARG UID
ARG GID
ARG USERNAME="build"
ARG CONDA_ADD_PACKAGES=""
ARG BAZEL_VERSION=0.24.1
ARG BAZEL_OS=linux

RUN apt-get update && \
apt-get install -y \
git \
curl \
nano \
unzip \
ffmpeg \
dnsutils

RUN groupadd -g ${GID} ${USERNAME}
RUN useradd -d /home/${USERNAME} -ms /bin/bash -g ${USERNAME} -G root -u $UID ${USERNAME}
USER ${USERNAME}

WORKDIR /home/${USERNAME}

RUN curl -sL https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-${BAZEL_OS}-x86_64.sh -o bazel-install.sh && \
bash -x bazel-install.sh --user && \
rm bazel-install.sh

ARG CONDA_OS=Linux

# Miniconda - Python 3.6, 64-bit, x86, latest
RUN curl -sL https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o mconda-install.sh && \
bash -x mconda-install.sh -b -p miniconda && \
rm mconda-install.sh

ENV PATH="/home/${USERNAME}/miniconda/bin:$PATH"

RUN conda create -y -q -n tensorflow-gcs-config python=3.6 ${CONDA_ADD_PACKAGES}

RUN echo ". /miniconda/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "source activate tensorflow-gcs-config" >> ~/.bashrc

ARG PIP_ADD_PACKAGES=""

RUN /bin/bash -c "source activate tensorflow-gcs-config && python -m pip install -U \
pytest \
pylint \
boto3 \
twine \
google-cloud-pubsub==0.39.1 \
pandas \
fastavro \
'tensorflow>=2' \
${PIP_ADD_PACKAGES} \
"

# This just forces a new fetch of the latest TF binary if the version changes.
RUN /bin/bash -c "echo ${TF_VERSION}"
RUN /bin/bash -c "source activate tensorflow-gcs-config && python -m pip install -U 'tensorflow>=2'"

RUN bazel help > /dev/null
2 changes: 2 additions & 0 deletions tensorflow-whl/tensorflow-gcs-config/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include tensorflow_gcs_config/*.py
include tensorflow_gcs_config/*.so
10 changes: 10 additions & 0 deletions tensorflow-whl/tensorflow-gcs-config/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Tensorflow GCS Configuration Ops

This package provides TF 2.X compatible versions of the
`tf.contrib.cloud.configure_gcs()` operations.

This is a slightly modified and repackaged version of the GCS code in TensorFlow I/O,
in particular the [tfio.gcs](https://www.tensorflow.org/io/api_docs/python/tfio/gcs) module.

This is a copy of the internal source released as the
[tensorflow-gcs-config](https://pypi.org/project/tensorflow-gcs-config/) package.
44 changes: 44 additions & 0 deletions tensorflow-whl/tensorflow-gcs-config/WORKSPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
workspace(name = "tensorflow_gcs_config")

load("//third_party/tensorflow:tf_configure.bzl", "tf_configure")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")

tf_configure(
name = "local_config_tf",
)

http_archive(
name = "com_github_grpc_grpc",
sha256 = "1bf082fb3016154d3f806da8eb5876caf05743da4b2e8130fadd000df74b5bb6",
strip_prefix = "grpc-1.21.1",
urls = [
"https://mirror.bazel.build/github.com/grpc/grpc/archive/v1.21.1.tar.gz",
"https://github.com/grpc/grpc/archive/v1.21.1.tar.gz",
],
)

# 3.7.1 with a fix to BUILD file
http_archive(
name = "com_google_protobuf",
sha256 = "1c020fafc84acd235ec81c6aac22d73f23e85a700871466052ff231d69c1b17a",
strip_prefix = "protobuf-5902e759108d14ee8e6b0b07653dac2f4e70ac73",
urls = [
"http:https://mirror.tensorflow.org/github.com/protocolbuffers/protobuf/archive/5902e759108d14ee8e6b0b07653dac2f4e70ac73.tar.gz",
"https://github.com/protocolbuffers/protobuf/archive/5902e759108d14ee8e6b0b07653dac2f4e70ac73.tar.gz",
],
)

load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps")

grpc_deps()

http_archive(
name = "jsoncpp_git",
build_file = "//third_party:jsoncpp.BUILD",
sha256 = "c49deac9e0933bcb7044f08516861a2d560988540b23de2ac1ad443b219afdb6",
strip_prefix = "jsoncpp-1.8.4",
urls = [
"http:https://mirror.tensorflow.org/github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz",
"https://github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz",
],
)
98 changes: 98 additions & 0 deletions tensorflow-whl/tensorflow-gcs-config/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================

from __future__ import print_function

import os
import re
import sys
import tensorflow as tf

def write_config():
"""Retrive compile and link information from tensorflow and write to .bazelrc."""

cflags = tf.sysconfig.get_compile_flags()

inc_regex = re.compile("^-I")
opt_regex = re.compile("^-D")

include_list = []
opt_list = []

for arg in cflags:
if inc_regex.match(arg):
include_list.append(arg)
elif opt_regex.match(arg):
opt_list.append(arg)
else:
print("WARNING: Unexpected cflag item {}".format(arg))


if len(include_list) != 1:
print("ERROR: Expected a single include directory in " +
"tf.sysconfig.get_compile_flags()")
exit(1)


library_regex = re.compile("^-l")
libdir_regex = re.compile("^-L")

library_list = []
libdir_list = []

lib = tf.sysconfig.get_link_flags()

for arg in lib:
if library_regex.match(arg):
library_list.append(arg)
elif libdir_regex.match(arg):
libdir_list.append(arg)
else:
print("WARNING: Unexpected link flag item {}".format(arg))

if len(library_list) != 1 or len(libdir_list) != 1:
print("ERROR: Expected exactly one lib and one libdir in" +
"tf.sysconfig.get_link_flags()")
exit(1)

try:

with open(".bazelrc", "w") as bazel_rc:
for opt in opt_list:
bazel_rc.write('build --copt="{}"\n'.format(opt))

bazel_rc.write('build --action_env TF_HEADER_DIR="{}"\n'
.format(include_list[0][2:]))

bazel_rc.write('build --action_env TF_SHARED_LIBRARY_DIR="{}"\n'
.format(libdir_list[0][2:]))
library_name = library_list[0][2:]
if library_name.startswith(":"):
library_name = library_name[1:]
else:
library_name = "lib" + library_name + ".so"
bazel_rc.write('build --action_env TF_SHARED_LIBRARY_NAME="{}"\n'
.format(library_name))
bazel_rc.close()
except OSError:
print("ERROR: Writing .bazelrc")
exit(1)


def compile_bazel():
write_config()

if os.system('rm -f tensorflow_gcs_config/*.so && bazel build -c dbg //tensorflow_gcs_config:_gcs_config_ops.so && cp bazel-bin/tensorflow_gcs_config/_gcs_config_ops.so tensorflow_gcs_config/') != 0:
raise Exception('Failed to build C extension.')
20 changes: 20 additions & 0 deletions tensorflow-whl/tensorflow-gcs-config/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from setuptools import setup


setup_kwargs = {
}

from build import compile_bazel
compile_bazel()

setup(
name='tensorflow-gcs-config',
version='2.1.7',
description='TensorFlow operations for configuring access to GCS (Google Compute Storage) resources.',
long_description='TensorFlow operations for configuring access to GCS (Google Compute Storage) resources.',
author='Google, Inc.',
author_email=None,
url=None,
packages = ['tensorflow_gcs_config'],
include_package_data=True,
)
22 changes: 22 additions & 0 deletions tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
licenses(["notice"]) # Apache 2.0

package(default_visibility = ["//visibility:public"])

cc_binary(
name = "_gcs_config_ops.so",
srcs = [
"gcs_config_op_kernels.cc",
"gcs_config_ops.cc",
],
copts = [
"-pthread",
"-std=c++11",
"-DNDEBUG",
],
linkshared = 1,
deps = [
"@jsoncpp_git//:jsoncpp",
"@local_config_tf//:libtensorflow_framework",
"@local_config_tf//:tf_header_lib",
],
)
Loading

0 comments on commit b47e95c

Please sign in to comment.