Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v1.15.0 #18

Merged
merged 11 commits into from
Apr 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 67 additions & 33 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,38 @@ for _, img := range images {
}
```

#### ZNCC

```go
scores, err := blurry.MatchTemplateZNCC(input, template, 0.1)
```

| filter | input | template | Result |
| :--------------------------: | :----------------------------------------------: | :----------------------------------------------: | :-------------------------------------------------: |
| `none` | ![example](testdata/src.png) | ![example](testdata/tpl.png) | ![example](testdata/mt_zncc.png) |
| `grayscale` | ![example](testdata/grayscale.png) | ![example](testdata/tpl_gray.png) | ![example](testdata/mt_zncc_gray.png) |
| `sobel` | ![example](testdata/sobel.png) | ![example](testdata/tpl_sobel.png) | ![example](testdata/mt_zncc_sobel.png) |
| `canny dilate:3 morph:open` | ![example](testdata/src_canny_morph_open_d3.png) | ![example](testdata/tpl_canny_morph_open_d3.png) | ![example](testdata/mt_zncc_canny_morph_open_d3.png) |

#### Prepared ZNCC

Improve processing speed by pre-calculating part of ZNCC process.

```go
p, err := blurry.PrepareZNCCTemplate(template)
if err != nil {
panic(err)
}
defer blurry.FreePreparedZNCCTemplate(p)

for _, img := range images {
scores, err := blurry.PreparedMatchTemplateZNCC(img, p, 0.1)
if err != nil {
panic(err)
}
}
```

## CLI usage

Run it via docker.
Expand Down Expand Up @@ -309,7 +341,7 @@ USAGE:
blurry [global options] command [command options] [arguments...]

VERSION:
1.14.0
1.15.0

COMMANDS:
blockmozaic
Expand Down Expand Up @@ -351,38 +383,40 @@ darwin/amd64 Intel(R) Core(TM) i7-8569U CPU @ 2.80GHz

```
src 320x240
BenchmarkJIT/cloneimg : 0.01866ms
BenchmarkJIT/rotate0 : 0.01959ms
BenchmarkJIT/rotate90 : 0.08530ms
BenchmarkJIT/rotate180 : 0.02128ms
BenchmarkJIT/rotate270 : 0.09410ms
BenchmarkJIT/grayscale : 0.08568ms
BenchmarkJIT/invert : 0.06168ms
BenchmarkJIT/brightness : 0.06807ms
BenchmarkJIT/gammacorrection : 0.11157ms
BenchmarkJIT/contrast : 0.06726ms
BenchmarkJIT/boxblur : 0.22119ms
BenchmarkJIT/gaussianblur : 0.18089ms
BenchmarkJIT/blockmozaic : 0.31144ms
BenchmarkJIT/erosion : 0.09030ms
BenchmarkJIT/dilation : 0.09327ms
BenchmarkJIT/morphology_open : 0.13934ms
BenchmarkJIT/morphology_close : 0.13051ms
BenchmarkJIT/morphology_gradient : 0.07682ms
BenchmarkJIT/emboss$1 : 0.15839ms
BenchmarkJIT/laplacian : 0.12597ms
BenchmarkJIT/highpass : 0.12796ms
BenchmarkJIT/gradient : 0.12204ms
BenchmarkJIT/edge : 0.10505ms
BenchmarkJIT/sobel : 0.10904ms
BenchmarkJIT/canny : 0.63846ms
BenchmarkJIT/canny_dilate : 0.62961ms
BenchmarkJIT/canny_morphology_open : 0.71912ms
BenchmarkJIT/canny_morphology_close : 0.71065ms
BenchmarkJIT/match_template_sad : 5.93616ms
BenchmarkJIT/match_template_ssd : 4.40269ms
BenchmarkJIT/match_template_ncc : 8.12906ms
BenchmarkJIT/prepared_match_template_ncc : 5.86860ms
BenchmarkJIT/cloneimg : 0.01952ms
BenchmarkJIT/rotate0 : 0.01952ms
BenchmarkJIT/rotate90 : 0.08169ms
BenchmarkJIT/rotate180 : 0.02266ms
BenchmarkJIT/rotate270 : 0.09420ms
BenchmarkJIT/grayscale : 0.08012ms
BenchmarkJIT/invert : 0.06201ms
BenchmarkJIT/brightness : 0.06828ms
BenchmarkJIT/gammacorrection : 0.11261ms
BenchmarkJIT/contrast : 0.06815ms
BenchmarkJIT/boxblur : 0.18756ms
BenchmarkJIT/gaussianblur : 0.16728ms
BenchmarkJIT/blockmozaic : 0.31110ms
BenchmarkJIT/erosion : 0.10399ms
BenchmarkJIT/dilation : 0.09312ms
BenchmarkJIT/morphology_open : 0.13103ms
BenchmarkJIT/morphology_close : 0.12874ms
BenchmarkJIT/morphology_gradient : 0.07516ms
BenchmarkJIT/emboss$1 : 0.15934ms
BenchmarkJIT/laplacian : 0.13013ms
BenchmarkJIT/highpass : 0.13032ms
BenchmarkJIT/gradient : 0.12489ms
BenchmarkJIT/edge : 0.10574ms
BenchmarkJIT/sobel : 0.11102ms
BenchmarkJIT/canny : 0.59503ms
BenchmarkJIT/canny_dilate : 0.62175ms
BenchmarkJIT/canny_morphology_open : 0.72361ms
BenchmarkJIT/canny_morphology_close : 0.70342ms
BenchmarkJIT/match_template_sad : 5.26705ms
BenchmarkJIT/match_template_ssd : 4.14713ms
BenchmarkJIT/match_template_ncc : 8.03261ms
BenchmarkJIT/prepared_match_template_ncc : 5.83674ms
BenchmarkJIT/match_template_zncc : 11.69895ms
BenchmarkJIT/prepared_match_template_zncc : 10.88168ms
```

## AOT benchmarks
Expand Down
165 changes: 121 additions & 44 deletions blurry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1167,14 +1167,14 @@ Func match_template_sad_fn(
match.compute_root()
.tile(x, y, xo, yo, xi, yi, 32, 32)
.fuse(xo, yo, ti)
.parallel(ti, 4)
.parallel(ti)
.vectorize(xi, 32);
in.compute_at(match, ti)
in.compute_root()
.unroll(y, 4)
.vectorize(x, 16);
t.compute_at(match, ti)
.vectorize(x, 32);
t.compute_root()
.unroll(y, 4)
.vectorize(x, 16);
.vectorize(x, 32);
return match;
}

Expand Down Expand Up @@ -1206,12 +1206,12 @@ Func match_template_ssd_fn(
match.compute_root()
.tile(x, y, xo, yo, xi, yi, 32, 32)
.fuse(xo, yo, ti)
.parallel(ti, 4)
.parallel(ti)
.vectorize(xi, 32);
in.compute_at(match, ti)
in.compute_root()
.unroll(y, 4)
.vectorize(x, 16);
t.compute_at(match, ti)
t.compute_root()
.unroll(y, 4)
.vectorize(x, 16);
return match;
Expand Down Expand Up @@ -1243,7 +1243,7 @@ Func prepare_ncc_template_fn(
return serialize;
}

Func prepated_match_template_ncc_fn(
Func prepared_match_template_ncc_fn(
Func input, Param<int32_t> width, Param<int32_t> height,
Func buf_tpl_val, Func buf_tpl_sum,
Param<int32_t> tpl_width, Param<int32_t> tpl_height
Expand All @@ -1270,11 +1270,9 @@ Func prepated_match_template_ncc_fn(
match.compute_root()
.tile(x, y, xo, yo, xi, yi, 32, 32)
.fuse(xo, yo, ti)
.parallel(ti, 4)
.parallel(ti)
.vectorize(xi, 32);
in.compute_at(match, ti)
.unroll(y, 4)
.vectorize(x, 16);
in.compute_root();
buf_tpl_val.compute_root();
buf_tpl_sum.compute_root();
return match;
Expand Down Expand Up @@ -1320,36 +1318,109 @@ Func match_template_ncc_fn(
return match;
}

Expr zncc_avg(Func in, RDom rd, Var x, Var y, Expr size) {
Func zncc_avg(Func in, RDom rd, Expr size) {
Var x("x"), y("y");
Func avg = Func("zncc_avg");
Expr val = cast<float>(in(x + rd.x, y + rd.y));
Expr avg = cast<float>(sum(val) / size);
avg(x, y) = cast<float>(sum(val)) / size;
return avg;
}

Expr zncc_avg_tpl(Func in, RDom rd, Expr size) {
Func zncc_avg_tpl(Func in, RDom rd, Expr size) {
Func avg = Func("zncc_avg_tpl");
Expr val = cast<float>(in(rd.x, rd.y));
Expr avg = cast<float>(sum(val) / size);
avg(_) = cast<float>(sum(val)) / size;
return avg;
}

Tuple zncc_stddev(Func in, RDom rd, Var x, Var y, Expr size) {
Expr avg = zncc_avg(in, rd, x, y, size);
Func zncc_stddev(Func in, RDom rd, Expr size, Func avg) {
Var x("x"), y("y");
Func stddev = Func("zncc_stddev");
Expr val = cast<float>(in(x + rd.x, y + rd.y));
Expr s = cast<float>(sum(val - avg));
return Tuple(
sqrt(s) / size,
avg
);
Expr s = sum(fast_pow(val - avg(x, y), 2));
stddev(x, y) = cast<float>(s);
return stddev;
}

Tuple zncc_stddev_tpl(Func in, RDom rd, Expr size) {
Expr avg = zncc_avg_tpl(in, rd, size);
Func zncc_stddev_tpl(Func in, RDom rd, Expr size, Func avg) {
Func stddev = Func("zncc_stddev_tpl");
Expr val = cast<float>(in(rd.x, rd.y));
Expr s = cast<float>(sum(val - avg));
return Tuple(
sqrt(s) / size,
avg
Expr s = sum(fast_pow(val - avg(_), 2));
stddev(_) = cast<float>(s);
return stddev;
}

Func prepare_zncc_template_fn(
Func tpl, Param<int32_t> tpl_width, Param<int32_t> tpl_height
) {
Region tpl_bounds = {{0, tpl_width},{0, tpl_height},{0, 4}};
Func t = gray_xy_uint8(BoundaryConditions::constant_exterior(tpl, 0, tpl_bounds), "tpl");

Var x("x"), y("y"), ch("ch");
Var xo("xo"), xi("xi");
Var yo("yo"), yi("yi");
Var ti("ti");

Func serialize = Func("prepare_zncc_template");
Expr tpl_size = cast<float>(tpl_width * tpl_height);
RDom rd_template = RDom(0, tpl_width, 0, tpl_height, "rd_template");
Func tpl_avg = zncc_avg_tpl(t, rd_template, tpl_size);
Func tpl_stddev = zncc_stddev_tpl(t, rd_template, tpl_size, tpl_avg);

serialize(x, y) = Tuple(
cast<float>(t(x, y) - tpl_avg(_)),
tpl_stddev(_)
);

serialize.compute_root()
.tile(x, y, xo, yo, xi, yi, 16, 16)
.fuse(xo, yo, ti)
.parallel(ti)
.vectorize(xi, 16);
return serialize;
}

Func prepared_match_template_zncc_fn(
Func input, Param<int32_t> width, Param<int32_t> height,
Func buf_tpl_val, Func buf_tpl_stddev,
Param<int32_t> tpl_width, Param<int32_t> tpl_height
) {
Region src_bounds = {{0, width},{0, height},{0, 4}};
Func in = gray_xy_uint8(BoundaryConditions::constant_exterior(input, 0, src_bounds), "in");

Var x("x"), y("y"), ch("ch");
Var xo("xo"), xi("xi");
Var yo("yo"), yi("yi");
Var ti("ti");

RDom rd_template = RDom(0, tpl_width, 0, tpl_height, "rd_template");

Func match = Func("prepared_match_template_zncc");
Expr tpl_size = cast<float>(tpl_width * tpl_height);
Func src_avg = zncc_avg(in, rd_template, tpl_size);
Func src_stddev = zncc_stddev(in, rd_template, tpl_size, src_avg);

Expr src_val = cast<float>(in(x + rd_template.x, y + rd_template.y)) - src_avg(x, y);
Expr tpl_val = cast<float>(buf_tpl_val(rd_template.x, rd_template.y));
Expr s = cast<float>(sum(src_val * tpl_val));
Expr v = s / sqrt(src_stddev(x, y) * buf_tpl_stddev(0, 0));
match(x, y) = cast<double>(v);

match.compute_root()
.tile(x, y, xo, yo, xi, yi, 32, 32)
.fuse(xo, yo, ti)
.parallel(ti)
.vectorize(xi, 32);
src_avg.compute_root()
.parallel(y)
.vectorize(x, 32);
src_stddev.compute_root()
.parallel(y)
.vectorize(x, 32);
in.compute_root();
buf_tpl_val.compute_root();
buf_tpl_stddev.compute_root();
return match;
}

Func match_template_zncc_fn(
Expand All @@ -1370,25 +1441,31 @@ Func match_template_zncc_fn(

Func match = Func("match_template_zncc");
Expr tpl_size = cast<float>(tpl_width * tpl_height);
Tuple src_std = zncc_stddev(in, rd_template, x, y, tpl_size);
Tuple tpl_std = zncc_stddev_tpl(t, rd_template, tpl_size);

Expr src_val = cast<float>(in(x + rd_template.x, y + rd_template.y)) - src_std[1];
Expr tpl_val = cast<float>(t(rd_template.x, rd_template.y)) - tpl_std[1];
Expr s = sum(src_val * tpl_val);
Func src_avg = zncc_avg(in, rd_template, tpl_size);
Func src_stddev = zncc_stddev(in, rd_template, tpl_size, src_avg);
Func tpl_avg = zncc_avg_tpl(t, rd_template, tpl_size);
Func tpl_stddev = zncc_stddev_tpl(t, rd_template, tpl_size, tpl_avg);

match(x, y) = cast<double>(s / fast_pow(tpl_size, 2) * (src_std[0] * tpl_std[0]));
Expr src_val = cast<float>(in(x + rd_template.x, y + rd_template.y)) - src_avg(x, y);
Expr tpl_val = cast<float>(t(rd_template.x, rd_template.y)) - tpl_avg(_);
Expr s = cast<float>(sum(src_val * tpl_val));
Expr v = s / sqrt(src_stddev(x, y) * tpl_stddev(_));
match(x, y) = cast<double>(v);

match.compute_root()
.tile(x, y, xo, yo, xi, yi, 32, 32)
.fuse(xo, yo, ti)
.parallel(ti, 4)
.parallel(ti)
.vectorize(xi, 32);
in.compute_at(match, ti)
.unroll(y, 4)
.vectorize(x, 16);
t.compute_at(match, ti)
.unroll(y, 4)
.vectorize(x, 16);
src_avg.compute_root()
.parallel(y)
.vectorize(x, 32);
src_stddev.compute_root()
.parallel(y)
.vectorize(x, 32);
tpl_avg.compute_root();
tpl_stddev.compute_root();
in.compute_root();
t.compute_root();
return match;
}
12 changes: 11 additions & 1 deletion blurry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ Func prepare_ncc_template_fn(
Func tpl, Param<int32_t> tpl_width, Param<int32_t> tpl_height
);

Func prepated_match_template_ncc_fn(
Func prepared_match_template_ncc_fn(
Func input, Param<int32_t> width, Param<int32_t> height,
Func buf_tpl_val, Func buf_tpl_sum,
Param<int32_t> tpl_width, Param<int32_t> tpl_height
Expand All @@ -136,4 +136,14 @@ Func match_template_zncc_fn(
Func input, Param<int32_t> width, Param<int32_t> height,
Func tpl, Param<int32_t> tpl_width, Param<int32_t> tpl_height
);

Func prepare_zncc_template_fn(
Func tpl, Param<int32_t> tpl_width, Param<int32_t> tpl_height
);

Func prepared_match_template_zncc_fn(
Func input, Param<int32_t> width, Param<int32_t> height,
Func buf_tpl_val, Func buf_tpl_stddev,
Param<int32_t> tpl_width, Param<int32_t> tpl_height
);
#endif // BLURRY_H_
Loading