Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RUNTIME][uTVM] AutoTVM + uTVM for Cortex-M7 #5417

Merged
merged 11 commits into from
Apr 30, 2020
Prev Previous commit
Next Next commit
Cleanup and sync micro tvm prototype.
  • Loading branch information
areusch committed Apr 27, 2020
commit 91e132d56503b3f93a5343656274048e7e61e37c
2 changes: 1 addition & 1 deletion 3rdparty/dmlc-core
Submodule dmlc-core updated 54 files
+0 −38 .github/workflows/githubci.yml
+1 −0 .gitignore
+82 −0 .travis.yml
+31 −51 CMakeLists.txt
+13 −201 LICENSE
+1 −1 README.md
+6 −19 appveyor.yml
+0 −13 cmake/Modules/FindASan.cmake
+0 −13 cmake/Modules/FindLSan.cmake
+0 −13 cmake/Modules/FindTSan.cmake
+0 −13 cmake/Modules/FindUBSan.cmake
+0 −63 cmake/Sanitizer.cmake
+1 −4 cmake/build_config.h.in
+1 −1 cmake/gtest_cmake.in
+16 −1 doc/Doxyfile
+1 −16 include/dmlc/base.h
+1 −4 include/dmlc/build_config_default.h
+0 −4 include/dmlc/concurrency.h
+18 −18 include/dmlc/concurrentqueue.h
+2 −3 include/dmlc/json.h
+3 −20 include/dmlc/logging.h
+1 −1 include/dmlc/omp.h
+0 −10 include/dmlc/optional.h
+23 −106 include/dmlc/parameter.h
+3 −1 include/dmlc/thread_group.h
+2 −4 include/dmlc/thread_local.h
+46 −74 include/dmlc/threadediter.h
+2 −0 make/dmlc.mk
+2 −2 scripts/lint.py
+19 −12 scripts/packages.mk
+32 −0 scripts/setup_nvcc.sh
+0 −65 scripts/test_script.sh
+0 −0 scripts/travis/s390x/Dockerfile
+0 −0 scripts/travis/s390x/build_via_cmake.sh
+1 −1 scripts/travis/s390x/ci_build.sh
+0 −0 scripts/travis/s390x/entrypoint.sh
+3 −0 scripts/travis/travis_before_cache.sh
+9 −0 scripts/travis/travis_osx_install.sh
+57 −0 scripts/travis/travis_script.sh
+40 −0 scripts/travis/travis_setup_env.sh
+16 −0 src/build_config.cc
+3 −7 src/data/csv_parser.h
+1 −1 test/logging_test.cc
+0 −4 test/unittest/CMakeLists.txt
+1 −2 test/unittest/unittest_env.cc
+0 −30 test/unittest/unittest_param.cc
+56 −80 test/unittest/unittest_parser.cc
+1 −0 test/unittest/unittest_thread_group.cc
+2 −2 test/unittest/unittest_threaditer.cc
+15 −19 test/unittest/unittest_threaditer_exc_handling.cc
+0 −4 tracker/dmlc_tracker/launcher.py
+0 −7 tracker/dmlc_tracker/ssh.py
+0 −13 tracker/dmlc_tracker/util.py
+2 −4 tracker/dmlc_tracker/yarn.py
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@ build/libtvm_web_runtime.js: build/libtvm_web_runtime.bc
cpplint:
python3 3rdparty/dmlc-core/scripts/lint.py vta cpp vta/include vta/src
python3 3rdparty/dmlc-core/scripts/lint.py topi cpp topi/include;
python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp include src \
# Note: exclude src/runtime/micro/host_driven becuase it contains C99 files.
python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp \
--exclude_path=src/runtime/micro/host_driven \
tmoreau89 marked this conversation as resolved.
Show resolved Hide resolved
include src \
examples/extension/src examples/graph_executor/src

pylint:
Expand Down
2 changes: 2 additions & 0 deletions include/tvm/tir/stmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,8 @@ constexpr const char* loop_scope = "loop_scope";
constexpr const char* reduce_scope = "reduce_scope";
/*! \brief Mark region is guarded by the pragma extension */
constexpr const char* pragma_scope_prefix = "pragma_";
/*! \brief Import C source or file into the final code gen module */
constexpr const char* pragma_import_c = "pragma_import_c";
/*! \brief Import llvm source or file into the final code gen module */
constexpr const char* pragma_import_llvm = "pragma_import_llvm";
/*! \brief Try to modify the AST to support Tensor Core */
Expand Down
13 changes: 6 additions & 7 deletions python/tvm/autotvm/measure/measure_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,11 +245,12 @@ def get_build_kwargs(self):

if 'cuda' in self.task.target.keys:
kwargs["cuda_arch"] = "sm_" + "".join(ctx.compute_version.split('.'))
if self.task.target.device_name == 'micro_dev':
kwargs.setdefault('build_option', {})['disable_vectorize'] = True

return kwargs

def run(self, measure_inputs, build_results):
print('[RPCRunner.run]')
results = []
remote_args = (self.key, self.host, self.port, self.priority, self.timeout)

Expand All @@ -274,10 +275,9 @@ def run(self, measure_inputs, build_results):
if isinstance(res, Exception): # executor error or timeout
results.append(MeasureResult((str(res),), MeasureErrorNo.RUN_TIMEOUT,
self.timeout, time.time()))
#raise Exception(f'encountered exception during measurement: {results}')
else:
print(f' got a result: {res}')
results.append(res)
raise Exception(f'encountered exception during measurement: {results}')

results.append(res)

return results

Expand Down Expand Up @@ -511,8 +511,7 @@ def run_through_rpc(measure_input, build_result,
msg = msg[:msg.index("Stack trace returned")]
if "CUDA Source" in msg:
msg = msg[:msg.index("CUDA Source")]
#costs = (RuntimeError(msg[:1024]),)
costs = (RuntimeError(msg),)
costs = (RuntimeError(msg[:1024]),)
errno = MeasureErrorNo.RUNTIME_DEVICE
tstamp = time.time()
time.sleep(cooldown_interval)
Expand Down
1 change: 1 addition & 0 deletions python/tvm/autotvm/task/relay_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def _lower(mod,
grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
grc.codegen(mod["main"])
return

# default case
# Try graph codegen first to extract autotvm tasks.
# If failed to compile, then fallback to use VM compiler.
Expand Down
6 changes: 2 additions & 4 deletions python/tvm/autotvm/task/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,10 +544,8 @@ def apply(self, sch, op, axes, axis_lens=None,
if ann == 'none':
pass
elif ann == 'unroll':
#if max_unroll and axis_lens[i] > max_unroll:
# cfg.raise_error("Too large factor for unrolling")
#if max_unroll and axis_lens[i] < max_unroll:
# cfg.raise_error("Too large factor for unrolling")
if max_unroll and axis_lens[i] > max_unroll:
cfg.raise_error("Too large factor for unrolling")
sch[op].unroll(axes[i])
elif ann == 'vec':
if vec_size and axis_lens[i] not in vec_size:
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/autotvm/tuner/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def _callback(tuner, inputs, results):
if res.error_no == 0:
flops = inp.task.flop / np.mean(res.costs)

if logger.level < logging.DEBUG: # only print progress bar in non-debug mode
if logger.level > logging.DEBUG: # only print progress bar in non-debug mode
ctx.cur_flops = flops
ctx.best_flops = tuner.best_flops

Expand Down
8 changes: 6 additions & 2 deletions python/tvm/autotvm/tuner/ga_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ def __init__(self, task, pop_size=100, elite_num=3, mutation_prob=0.1):

# space info
self.space = task.config_space
self.dims = [len(x) for x in self.space.space_map.values()]
self.dim_keys = []
self.dims = []
for k, v in self.space.space_map.items():
self.dim_keys.append(k)
self.dims.append(len(v))

self.visited = set([])

Expand Down Expand Up @@ -123,7 +127,7 @@ def update(self, inputs, results):
if len(self.visited) < len(self.space):
while knob2point(tmp_gene, self.dims) in self.visited:
j = np.random.randint(len(self.dims))
tmp_gene[j] = np.random.randint(self.dims[j])
tmp_gene[j] = np.random.randint(self.dims[j]) # pylint: disable=invalid-sequence-index
next_genes.append(tmp_gene)
self.visited.add(knob2point(tmp_gene, self.dims))
else:
Expand Down
1 change: 0 additions & 1 deletion python/tvm/autotvm/tuner/model_based_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ def update(self, inputs, results):
# if we have enough new training samples
if len(self.xs) >= self.plan_size * (self.train_ct + 1) \
and self.flops_max > 1e-6:
import pdb; pdb.set_trace()
self.cost_model.fit(self.xs, self.ys, self.plan_size)
if self.diversity_filter_ratio:
candidate = self.model_optimizer.find_maximums(
Expand Down
12 changes: 7 additions & 5 deletions python/tvm/autotvm/tuner/tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,15 @@ def tune(self, n_trial, measure_option, early_stopping=None, callbacks=(), si_pr
i + k + 1, si_prefix, format_si_prefix(flops, si_prefix),
format_si_prefix(self.best_flops, si_prefix), res, config)

num_successes = 0
for result in results:
if isinstance(result.costs[0], float):
i += 1
else:
print('[Tuner.tune]')
print(' not counting failure towards trial count')
#i += len(results)
num_successes += 1
if num_successes != len(results):
logger.debug('not counting %d failures towards trial count',
len(results) - num_successes)
i += num_successes

self.ttl = min(early_stopping + self.best_iter, n_trial) - i

self.update(inputs, results)
Expand Down
107 changes: 17 additions & 90 deletions python/tvm/contrib/binutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,80 +72,6 @@
}}
"""

def run_cmd(cmd):
"""Runs `cmd` in a subprocess and awaits its completion.

Parameters
----------
cmd : List[str]
list of command-line arguments

Returns
-------
output : str
resulting stdout capture from the subprocess
"""
proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(output, _) = proc.communicate()
output = output.decode('utf-8')
if proc.returncode != 0:
cmd_str = ' '.join(cmd)
msg = f'error while running command \"{cmd_str}\":\n{output}'
raise RuntimeError(msg)
return output


RELOCATION_LD_SCRIPT_TEMPLATE = """
/* linker symbol for use in UTVMInit */
_utvm_stack_pointer_init = 0x{stack_pointer_init:x};

SECTIONS
{{
. = 0x{text_start:x};
. = ALIGN({word_size});
.text :
{{
. = ALIGN({word_size});
KEEP(*(.text))
KEEP(*(.text*))
. = ALIGN({word_size});
}}

. = 0x{rodata_start:x};
. = ALIGN({word_size});
.rodata :
{{
. = ALIGN({word_size});
KEEP(*(.rodata))
KEEP(*(.rodata*))
. = ALIGN({word_size});
}}

. = 0x{data_start:x};
. = ALIGN({word_size});
.data :
{{
. = ALIGN({word_size});
KEEP(*(.data))
KEEP(*(.data*))
. = ALIGN({word_size});
}}

. = 0x{bss_start:x};
. = ALIGN({word_size});
.bss :
{{
. = ALIGN({word_size});
KEEP(*(.bss))
KEEP(*(.bss*))
. = ALIGN({word_size});
}}
}}
"""

def run_cmd(cmd):
"""Runs `cmd` in a subprocess and awaits its completion.

Expand Down Expand Up @@ -236,11 +162,11 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
# padding for most cases, but symbols can be arbitrarily large, so this
# isn't bulletproof.
return section_size + 32
# TODO remove this arbitrary addition once we figure out why section sizes
# are being undercalculated.
# maybe stop relying on `*size` to give us the size and instead read the
# section with `*objcopy` and count the bytes.
return section_size + 8

# NOTE: in the past, section_size has been wrong on x86. it may be
# inconsistent. TODO: maybe stop relying on `*size` to give us the size and
# instead read the section with `*objcopy` and count the bytes.
return section_size


@tvm._ffi.register_func("tvm_callback_relocate_binary")
Expand Down Expand Up @@ -315,17 +241,18 @@ def tvm_callback_relocate_binary(
with open(rel_obj_path, 'rb') as f:
rel_bin = bytearray(f.read())

gdb_init_dir = os.environ['MICRO_GDB_INIT_DIR']
gdb_init_path = f'{gdb_init_dir}/.gdbinit'
with open(gdb_init_path, 'r') as f:
gdbinit_contents = f.read().split('\n')
new_contents = []
for line in gdbinit_contents:
new_contents.append(line)
if line.startswith('target'):
new_contents.append(f'add-symbol-file {rel_obj_path}')
with open(gdb_init_path, 'w') as f:
f.write('\n'.join(new_contents))
gdb_init_dir = os.environ.get('MICRO_GDB_INIT_DIR')
if gdb_init_dir is not None:
gdb_init_path = f'{gdb_init_dir}/.gdbinit'
with open(gdb_init_path, 'r') as f:
gdbinit_contents = f.read().split('\n')
new_contents = []
for line in gdbinit_contents:
new_contents.append(line)
if line.startswith('target'):
new_contents.append(f'add-symbol-file {rel_obj_path}')
with open(gdb_init_path, 'w') as f:
f.write('\n'.join(new_contents))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be worth splitting these lines into a separate µTVM debugging tools PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's also going to change soon, so would prefer to fix then


return rel_bin

Expand Down
7 changes: 4 additions & 3 deletions python/tvm/exec/rpc_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ def server_shutdown():
'--utvm-dev-config-args is specified.'))
parser.add_argument('--utvm-dev-config-args', type=str,
help=("Arguments to the device module's generate_config function. "
'Must be a python literal parseable by literal_eval. If specified, the '
"device configuration is generated using the device module's generate_config. "
"Can't be specified when --utvm-dev-config is specified."))
'Must be a python literal parseable by literal_eval. If specified, '
"the device configuration is generated using the device module's "
"generate_config. Can't be specified when --utvm-dev-config is "
"specified."))
parser.add_argument('--utvm-dev-id', type=str,
help=('Unique ID for the target device (if using MicroTVM). Should '
'match the name of a module underneath tvm.micro.device).'))
Expand Down
Loading