Skip to content

Commit

Permalink
Merge pull request #37 from octu0/v1.19.3
Browse files Browse the repository at this point in the history
v1.19.3
  • Loading branch information
octu0 committed Apr 18, 2022
2 parents ec681a0 + e144526 commit d408641
Show file tree
Hide file tree
Showing 239 changed files with 989 additions and 271 deletions.
8 changes: 4 additions & 4 deletions Dockerfile.generator
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ WORKDIR /halide
RUN set -eux && \
apt update && \
apt install -y wget clang g++ binutils libpng-dev libjpeg-dev && \
wget https://github.com/halide/Halide/releases/download/v13.0.1/Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz && \
tar xzf Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz && \
rm Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz && \
mv Halide-13.0.1-x86-64-linux Halide-Runtime
wget https://github.com/halide/Halide/releases/download/v14.0.0/Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz && \
tar xzf Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz && \
rm Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz && \
mv Halide-14.0.0-x86-64-linux Halide-Runtime

COPY docker-entrypoint.generator.sh /usr/local/bin/docker-entrypoint.generator.sh
ENTRYPOINT [ "docker-entrypoint.generator.sh" ]
18 changes: 9 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ _NAME = $(shell grep -o 'AppName string = "[^"]*"' $(VERSION_GO) | cut -d
_VERSION = $(shell grep -oE 'Version string = "[0-9]+\.[0-9]+\.[0-9]+"' $(VERSION_GO) | cut -d '"' -f2)

_HALIDE = "generator"
_HALIDE_VER = "13.0.1"
_HALIDE_VER = "14.0.0"

.PHONY: vet
vet:
Expand All @@ -27,21 +27,21 @@ setup-halide-runtime_linux:
ifeq ($(shell [ -d detector/Halide-Runtime ] && echo "1"),1)
@echo "detector/Halide-Runtime exists"
else
curl -O -sSL https://github.com/halide/Halide/releases/download/v13.0.1/Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
tar xzf Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
mv Halide-13.0.1-x86-64-linux detector/Halide-Runtime
rm Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
curl -O -sSL https://github.com/halide/Halide/releases/download/v14.0.0/Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
tar xzf Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
mv Halide-14.0.0-x86-64-linux detector/Halide-Runtime
rm Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
endif

.PHONY: setup-halide-runtime_darwin
setup-halide-runtime_darwin:
ifeq ($(shell [ -d detector/Halide-Runtime ] && echo "1"),1)
@echo "detector/Halide-Runtime exists"
else
curl -O -sSL https://github.com/halide/Halide/releases/download/v13.0.1/Halide-13.0.1-x86-64-osx-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
tar xzf Halide-13.0.1-x86-64-osx-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
mv Halide-13.0.1-x86-64-osx ./Halide-Runtime
rm Halide-13.0.1-x86-64-osx-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
curl -O -sSL https://github.com/halide/Halide/releases/download/v14.0.0/Halide-14.0.0-x86-64-osx-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
tar xzf Halide-14.0.0-x86-64-osx-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
mv Halide-14.0.0-x86-64-osx ./Halide-Runtime
rm Halide-14.0.0-x86-64-osx-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
endif

.PHONY: setup-halide-runtime
Expand Down
102 changes: 51 additions & 51 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,57 +19,57 @@ darwin/amd64 Intel(R) Core(TM) i7-8569U CPU @ 2.80GHz

```
src 320x240
BenchmarkJIT/cloneimg : 0.00752ms
BenchmarkJIT/convert_from_argb : 0.02369ms
BenchmarkJIT/convert_from_abgr : 0.03711ms
BenchmarkJIT/convert_from_bgra : 0.02472ms
BenchmarkJIT/convert_from_rabg : 0.03139ms
BenchmarkJIT/convert_from_yuv_420 : 0.02957ms
BenchmarkJIT/convert_from_yuv_444 : 0.02586ms
BenchmarkJIT/convert_to_yuv_420 : 0.05634ms
BenchmarkJIT/convert_to_yuv_444 : 0.06963ms
BenchmarkJIT/rotate0 : 0.00739ms
BenchmarkJIT/rotate90 : 0.02580ms
BenchmarkJIT/rotate180 : 0.00746ms
BenchmarkJIT/rotate270 : 0.02557ms
BenchmarkJIT/crop : 0.06071ms
BenchmarkJIT/scale : 0.14003ms
BenchmarkJIT/scale_box : 0.19936ms
BenchmarkJIT/scale_linear : 0.19754ms
BenchmarkJIT/scale_gaussian : 0.22766ms
BenchmarkJIT/blend_normal : 0.08383ms
BenchmarkJIT/blend_sub : 0.08447ms
BenchmarkJIT/blend_add : 0.08394ms
BenchmarkJIT/blend_diff : 0.08423ms
BenchmarkJIT/grayscale : 0.03839ms
BenchmarkJIT/invert : 0.04330ms
BenchmarkJIT/brightness : 0.04931ms
BenchmarkJIT/gammacorrection : 0.08158ms
BenchmarkJIT/contrast : 0.01506ms
BenchmarkJIT/boxblur : 0.12091ms
BenchmarkJIT/gaussianblur : 0.32293ms
BenchmarkJIT/blockmozaic : 0.27398ms
BenchmarkJIT/erosion : 0.12039ms
BenchmarkJIT/dilation : 0.12439ms
BenchmarkJIT/morphology_open : 0.10255ms
BenchmarkJIT/morphology_close : 0.10472ms
BenchmarkJIT/morphology_gradient : 0.08321ms
BenchmarkJIT/emboss$1 : 0.05385ms
BenchmarkJIT/laplacian : 0.03204ms
BenchmarkJIT/highpass : 0.03783ms
BenchmarkJIT/gradient : 0.03303ms
BenchmarkJIT/edgedetect : 0.02638ms
BenchmarkJIT/sobel : 0.06399ms
BenchmarkJIT/canny : 0.29472ms
BenchmarkJIT/canny_dilate : 0.36258ms
BenchmarkJIT/canny_morphology_open : 0.39542ms
BenchmarkJIT/canny_morphology_close : 0.40479ms
BenchmarkJIT/match_template_sad : 6.64854ms
BenchmarkJIT/match_template_ssd : 4.76639ms
BenchmarkJIT/match_template_ncc : 9.37937ms
BenchmarkJIT/prepared_match_template_ncc : 6.85107ms
BenchmarkJIT/match_template_zncc : 13.29085ms
BenchmarkJIT/prepared_match_template_zncc : 12.07535ms
BenchmarkJIT/cloneimg : 0.00767ms
BenchmarkJIT/convert_from_argb : 0.02328ms
BenchmarkJIT/convert_from_abgr : 0.03573ms
BenchmarkJIT/convert_from_bgra : 0.02430ms
BenchmarkJIT/convert_from_rabg : 0.03159ms
BenchmarkJIT/convert_from_yuv_420 : 0.02964ms
BenchmarkJIT/convert_from_yuv_444 : 0.02652ms
BenchmarkJIT/convert_to_yuv_420 : 0.05654ms
BenchmarkJIT/convert_to_yuv_444 : 0.07356ms
BenchmarkJIT/rotate0 : 0.00828ms
BenchmarkJIT/rotate90 : 0.02600ms
BenchmarkJIT/rotate180 : 0.00792ms
BenchmarkJIT/rotate270 : 0.02560ms
BenchmarkJIT/crop : 0.06128ms
BenchmarkJIT/scale : 0.13941ms
BenchmarkJIT/scale_box : 0.20701ms
BenchmarkJIT/scale_linear : 0.20637ms
BenchmarkJIT/scale_gaussian : 0.31937ms
BenchmarkJIT/blend_normal : 0.09480ms
BenchmarkJIT/blend_sub : 0.08381ms
BenchmarkJIT/blend_add : 0.08439ms
BenchmarkJIT/blend_diff : 0.08445ms
BenchmarkJIT/grayscale : 0.03752ms
BenchmarkJIT/invert : 0.03684ms
BenchmarkJIT/brightness : 0.04142ms
BenchmarkJIT/gammacorrection : 0.07600ms
BenchmarkJIT/contrast : 0.01532ms
BenchmarkJIT/boxblur : 0.10674ms
BenchmarkJIT/gaussianblur : 0.31972ms
BenchmarkJIT/blockmozaic : 0.27346ms
BenchmarkJIT/erosion : 0.11407ms
BenchmarkJIT/dilation : 0.11997ms
BenchmarkJIT/morphology_open : 0.14157ms
BenchmarkJIT/morphology_close : 0.10427ms
BenchmarkJIT/morphology_gradient : 0.07612ms
BenchmarkJIT/emboss : 0.06083ms
BenchmarkJIT/laplacian : 0.04251ms
BenchmarkJIT/highpass : 0.03843ms
BenchmarkJIT/gradient : 0.03320ms
BenchmarkJIT/edgedetect : 0.02701ms
BenchmarkJIT/sobel : 0.06392ms
BenchmarkJIT/canny : 0.28839ms
BenchmarkJIT/canny_dilate : 0.34880ms
BenchmarkJIT/canny_morphology_open : 0.38943ms
BenchmarkJIT/canny_morphology_close : 0.39011ms
BenchmarkJIT/match_template_sad : 5.69188ms
BenchmarkJIT/match_template_ssd : 4.75666ms
BenchmarkJIT/match_template_ncc : 8.98426ms
BenchmarkJIT/prepared_match_template_ncc : 6.23328ms
BenchmarkJIT/match_template_zncc : 12.64066ms
BenchmarkJIT/prepared_match_template_zncc : 11.67131ms
```

## AOT benchmarks
Expand Down
50 changes: 33 additions & 17 deletions blurry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,8 @@ Func filter2d_gray(
);

conv.compute_at(gradient, yi)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();

gradient.compute_at(in, ti)
.tile(x, y, xo, yo, xi, yi, 32, 32)
Expand Down Expand Up @@ -707,7 +708,8 @@ Func gaussian(Func in, Expr sigma, RDom rd, const char *name) {
gaussian(x, y) += cast<uint8_t>(val / center_val);

sum_kernel.compute_at(gaussian, y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();
return gaussian;
}

Expand Down Expand Up @@ -781,11 +783,14 @@ Func canny(Func in, Param<int32_t> threshold_max, Param<int32_t> threshold_min)

gauss.compute_at(hy, ti)
.vectorize(y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();
gy.compute_at(hy, ti)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();
gx.compute_at(hy, ti)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();

nms.compute_at(hy, ti)
.vectorize(x);
Expand Down Expand Up @@ -1971,10 +1976,12 @@ Func sobel_fn(Func input, Param<int32_t> width, Param<int32_t> height){

gy.compute_at(sobel, yi)
.parallel(y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();
gx.compute_at(sobel, yi)
.parallel(y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();

sobel.compute_at(in, ti)
.tile(x, y, xo, yo, xi, yi, 32, 32)
Expand Down Expand Up @@ -2119,7 +2126,8 @@ Func emboss_fn(Func input, Param<int32_t> width, Param<int32_t> height){
);

conv.compute_at(emboss, yi)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();

emboss.compute_at(in, ti)
.tile(x, y, xo, yo, xi, yi, 32, 32)
Expand Down Expand Up @@ -2552,7 +2560,7 @@ Func linearsum_xy(Func in, Expr size, Expr xfactor, Expr yfactor) {
Var x("x"), y("y");

RDom rd = RDom(0, size, "rd_linearsum");
Func f = Func("linearsum");
Func f = Func("linearsum_xy");
f(x, y) += in(x + (rd * xfactor), y + (rd * yfactor));
return f;
}
Expand Down Expand Up @@ -2869,28 +2877,36 @@ Func contour_line(Func binary_input, Expr width, Expr height, Expr size) {

next_top.compute_at(f, ti)
.vectorize(y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();
next_top_right.compute_at(f, ti)
.vectorize(y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();
next_right.compute_at(f, ti)
.vectorize(y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();
next_bottom_right.compute_at(f, ti)
.vectorize(y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();
next_bottom.compute_at(f, ti)
.vectorize(y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();
next_bottom_left.compute_at(f, ti)
.vectorize(y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();
next_left.compute_at(f, ti)
.vectorize(y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();
next_top_left.compute_at(f, ti)
.vectorize(y)
.vectorize(x);
.vectorize(x)
.update(0).unscheduled();

nb.compute_at(f, ti)
.vectorize(y, 8)
Expand Down
Loading

0 comments on commit d408641

Please sign in to comment.