Skip to content

Commit

Permalink
Merge pull request #38 from octu0/v1.20.0
Browse files Browse the repository at this point in the history
v1.20.0
  • Loading branch information
octu0 authored May 25, 2022
2 parents d408641 + 72632ef commit f29497f
Show file tree
Hide file tree
Showing 24 changed files with 677 additions and 61 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
out
*.out
/Halide-Runtime*
.git
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ ifeq ($(shell [ -d detector/Halide-Runtime ] && echo "1"),1)
else
curl -O -sSL https://github.com/halide/Halide/releases/download/v14.0.0/Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
tar xzf Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
mv Halide-14.0.0-x86-64-linux detector/Halide-Runtime
mv Halide-14.0.0-x86-64-linux ./Halide-Runtime
rm Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
endif

Expand Down
117 changes: 66 additions & 51 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,57 +19,58 @@ darwin/amd64 Intel(R) Core(TM) i7-8569U CPU @ 2.80GHz

```
src 320x240
BenchmarkJIT/cloneimg : 0.00767ms
BenchmarkJIT/convert_from_argb : 0.02328ms
BenchmarkJIT/convert_from_abgr : 0.03573ms
BenchmarkJIT/convert_from_bgra : 0.02430ms
BenchmarkJIT/convert_from_rabg : 0.03159ms
BenchmarkJIT/convert_from_yuv_420 : 0.02964ms
BenchmarkJIT/convert_from_yuv_444 : 0.02652ms
BenchmarkJIT/convert_to_yuv_420 : 0.05654ms
BenchmarkJIT/convert_to_yuv_444 : 0.07356ms
BenchmarkJIT/rotate0 : 0.00828ms
BenchmarkJIT/rotate90 : 0.02600ms
BenchmarkJIT/rotate180 : 0.00792ms
BenchmarkJIT/rotate270 : 0.02560ms
BenchmarkJIT/crop : 0.06128ms
BenchmarkJIT/scale : 0.13941ms
BenchmarkJIT/scale_box : 0.20701ms
BenchmarkJIT/scale_linear : 0.20637ms
BenchmarkJIT/scale_gaussian : 0.31937ms
BenchmarkJIT/blend_normal : 0.09480ms
BenchmarkJIT/blend_sub : 0.08381ms
BenchmarkJIT/blend_add : 0.08439ms
BenchmarkJIT/blend_diff : 0.08445ms
BenchmarkJIT/grayscale : 0.03752ms
BenchmarkJIT/invert : 0.03684ms
BenchmarkJIT/brightness : 0.04142ms
BenchmarkJIT/gammacorrection : 0.07600ms
BenchmarkJIT/contrast : 0.01532ms
BenchmarkJIT/boxblur : 0.10674ms
BenchmarkJIT/gaussianblur : 0.31972ms
BenchmarkJIT/blockmozaic : 0.27346ms
BenchmarkJIT/erosion : 0.11407ms
BenchmarkJIT/dilation : 0.11997ms
BenchmarkJIT/morphology_open : 0.14157ms
BenchmarkJIT/morphology_close : 0.10427ms
BenchmarkJIT/morphology_gradient : 0.07612ms
BenchmarkJIT/emboss : 0.06083ms
BenchmarkJIT/laplacian : 0.04251ms
BenchmarkJIT/highpass : 0.03843ms
BenchmarkJIT/gradient : 0.03320ms
BenchmarkJIT/edgedetect : 0.02701ms
BenchmarkJIT/sobel : 0.06392ms
BenchmarkJIT/canny : 0.28839ms
BenchmarkJIT/canny_dilate : 0.34880ms
BenchmarkJIT/canny_morphology_open : 0.38943ms
BenchmarkJIT/canny_morphology_close : 0.39011ms
BenchmarkJIT/match_template_sad : 5.69188ms
BenchmarkJIT/match_template_ssd : 4.75666ms
BenchmarkJIT/match_template_ncc : 8.98426ms
BenchmarkJIT/prepared_match_template_ncc : 6.23328ms
BenchmarkJIT/match_template_zncc : 12.64066ms
BenchmarkJIT/prepared_match_template_zncc : 11.67131ms
BenchmarkJIT/cloneimg : 0.00788ms
BenchmarkJIT/convert_from_argb : 0.02375ms
BenchmarkJIT/convert_from_abgr : 0.03884ms
BenchmarkJIT/convert_from_bgra : 0.02470ms
BenchmarkJIT/convert_from_rabg : 0.03655ms
BenchmarkJIT/convert_from_yuv_420 : 0.03168ms
BenchmarkJIT/convert_from_yuv_444 : 0.02686ms
BenchmarkJIT/convert_to_yuv_420 : 0.06742ms
BenchmarkJIT/convert_to_yuv_444 : 0.07209ms
BenchmarkJIT/rotate0 : 0.00774ms
BenchmarkJIT/rotate90 : 0.02599ms
BenchmarkJIT/rotate180 : 0.00802ms
BenchmarkJIT/rotate270 : 0.02582ms
BenchmarkJIT/crop : 0.06126ms
BenchmarkJIT/scale : 0.14009ms
BenchmarkJIT/scale_box : 0.20598ms
BenchmarkJIT/scale_linear : 0.20440ms
BenchmarkJIT/scale_gaussian : 0.31444ms
BenchmarkJIT/blend_normal : 0.08443ms
BenchmarkJIT/blend_sub : 0.08398ms
BenchmarkJIT/blend_add : 0.08364ms
BenchmarkJIT/blend_diff : 0.08453ms
BenchmarkJIT/grayscale : 0.03687ms
BenchmarkJIT/invert : 0.03730ms
BenchmarkJIT/brightness : 0.04703ms
BenchmarkJIT/gammacorrection : 0.08013ms
BenchmarkJIT/contrast : 0.01549ms
BenchmarkJIT/boxblur : 0.11211ms
BenchmarkJIT/gaussianblur : 0.33058ms
BenchmarkJIT/blockmozaic : 0.27455ms
BenchmarkJIT/erosion : 0.11934ms
BenchmarkJIT/dilation : 0.12013ms
BenchmarkJIT/morphology_open : 0.10370ms
BenchmarkJIT/morphology_close : 0.10435ms
BenchmarkJIT/morphology_gradient : 0.07684ms
BenchmarkJIT/emboss : 0.04402ms
BenchmarkJIT/laplacian : 0.03192ms
BenchmarkJIT/highpass : 0.03847ms
BenchmarkJIT/gradient : 0.03322ms
BenchmarkJIT/edgedetect : 0.02705ms
BenchmarkJIT/sobel : 0.06276ms
BenchmarkJIT/canny : 0.29922ms
BenchmarkJIT/canny_dilate : 0.35114ms
BenchmarkJIT/canny_morphology_open : 0.44488ms
BenchmarkJIT/canny_morphology_close : 0.40522ms
BenchmarkJIT/match_template_sad : 5.75482ms
BenchmarkJIT/match_template_ssd : 4.48363ms
BenchmarkJIT/match_template_ncc : 8.32420ms
BenchmarkJIT/prepared_match_template_ncc : 6.22423ms
BenchmarkJIT/match_template_zncc : 12.73780ms
BenchmarkJIT/prepared_match_template_zncc : 11.38906ms
BenchmarkJIT/pcm16_decibel : 0.00257ms
```

## AOT benchmarks
Expand Down Expand Up @@ -620,6 +621,20 @@ ycbcr, err := blurry.ConvertToYUV444(rgba)
| 420 | `blurry.ConvertToYUV420(*image.RGBA)` |
| 444 | `blurry.ConvertToYUV444(*image.RGBA)` |

#### PCM16 Decibel

Gets the decibel of given PCM16.

```go
var data []byte
decibel, err := blurry.PCM16Decibel(data, length)

or

var input []int16
decibel, err := blurry.PCM16DecibelFromInt16(input)
```

## CLI usage

Run it via docker.
Expand Down
67 changes: 67 additions & 0 deletions blurry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ const Expr degree0 = cast<uint8_t>(0);
const Expr degree45 = cast<uint8_t>(45);
const Expr degree90 = cast<uint8_t>(90);
const Expr degree135 = cast<uint8_t>(135);
const Expr pcm16_max = cast<float>(32768.0f); // 2^15
const Expr ln10 = cast<float>(2.30258509299404568401799145468436420760110148862877297603332790f); // https://oeis.org/A002392

const Func kernel_sobel_x = kernel_sobel3x3_x();
const Func kernel_sobel_y = kernel_sobel3x3_y();
Expand Down Expand Up @@ -145,6 +147,34 @@ Func wrapFunc_xy(Buffer<double> buf, const char* name) {
return f;
}

Func wrapFunc_x(Buffer<uint8_t> buf, const char* name) {
Var x("x");
Func f = Func(name);
f(x) = buf(x);
return f;
}

Func wrapFunc_x(Buffer<int16_t> buf, const char* name) {
Var x("x");
Func f = Func(name);
f(x) = buf(x);
return f;
}

Func wrapFunc_x(Buffer<float> buf, const char* name) {
Var x("x");
Func f = Func(name);
f(x) = buf(x);
return f;
}

Func wrapFunc_x(Buffer<double> buf, const char* name) {
Var x("x");
Func f = Func(name);
f(x) = buf(x);
return f;
}

Func read(Func clamped, const char *name) {
Var x("x"), y("y"), ch("ch");
Func read = Func(name);
Expand Down Expand Up @@ -2982,3 +3012,40 @@ Func contour_fn(
f(x, y) = value;
return f;
}

Expr log10(Expr v) {
return fast_log(v) / ln10;
}

Func pcm16_decibel_fn(
Func input, Param<int32_t> length
) {
Var x("x");

Func normalize = Func("pcm16_normalize");
Expr in = cast<int16_t>(input(x));
normalize(x) = cast<float>(in) / pcm16_max; // -1.0 .. x .. 1.0

Func sq = Func("square");
sq(x) = fast_pow(normalize(x), 2);

RDom rd = RDom(0, length, "length");
Func rms = Func("root_mean_square");
Expr sum_values = sum(sq(rd));
Expr mean = sum_values / cast<float>(length);
rms(_) = sqrt(mean);

Func decibel = Func("pcm16_decibel");
decibel(_) = 20 * (log10(rms(_)));

// schedule

normalize.compute_at(sq, x)
.store_root()
.vectorize(x, 64);
sq.compute_root()
.store_root()
.vectorize(x, 64);

return decibel;
}
11 changes: 11 additions & 0 deletions blurry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ Func wrapFunc_xy(Buffer<float> buf, const char* name);

Func wrapFunc_xy(Buffer<double> buf, const char* name);

Func wrapFunc_x(Buffer<uint8_t> buf, const char* name);

Func wrapFunc_x(Buffer<int16_t> buf, const char* name);

Func wrapFunc_x(Buffer<float> buf, const char* name);

Func wrapFunc_x(Buffer<double> buf, const char* name);

Func cloneimg_fn(Func input, Param<int32_t> width, Param<int32_t> height);

Func convert_from_abgr_fn(Func input, Param<int32_t> width, Param<int32_t> height);
Expand Down Expand Up @@ -225,4 +233,7 @@ Func contour_fn(
Param<uint8_t> threshold, Param<uint8_t> size
);

Func pcm16_decibel_fn(
Func input, Param<int32_t> length
);
#endif // BLURRY_H_
32 changes: 32 additions & 0 deletions blurry_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@ int jit_benchmark_bounds(Func fn, int32_t width, int32_t height, std::string nam
return 0;
}

int jit_benchmark_pcm16_bounds(Func fn, int32_t length, std::string name) {
fn.compile_jit(get_jit_target_from_environment());

double result = benchmark(100, 10, [&]() {
fn.realize({length});
});
printf("BenchmarkJIT/%-30s: %-3.5fms\n", name.c_str(), result * 1e3);
return 0;
}

int jit_benchmark(Func fn, Buffer<uint8_t> buf_src, std::string name) {
return jit_benchmark_bounds(fn, buf_src.get()->width(), buf_src.get()->height(), name);
}
Expand Down Expand Up @@ -640,6 +650,27 @@ int benchmark_prepared_match_template_zncc(
), buf_src, "prepared_match_template_zncc");
}

int benchmark_pcm16_decibel(){
FILE *const f = fopen("./testdata/pcm16_1.data", "rb");
if(f == nullptr) {
return 1;
}
int32_t length = 1024;
int16_t *data = (int16_t *) calloc(length, sizeof(int16_t));
fread(data, sizeof(int16_t), length, f);
fclose(f);

Buffer<int16_t> buf_src = Buffer<int16_t>::make_interleaved(data, length, 0, 1);
buf_src.raw_buffer()->dimensions = 1;
buf_src.raw_buffer()->dim[0].extent = length;
buf_src.raw_buffer()->dim[0].stride = 1;
Param<int32_t> _length{"length", length};

return jit_benchmark_pcm16_bounds(pcm16_decibel_fn(
wrapFunc_x(buf_src, "buf_src"), _length
), length, "pcm16_decibel");
}

int benchmark(char **argv) {
printf("benchmark...\n");
Buffer<uint8_t> buf_src = load_and_convert_image(argv[2]);
Expand Down Expand Up @@ -702,6 +733,7 @@ int benchmark(char **argv) {
benchmark_prepared_match_template_ncc(buf_src, width, height, buf_tpl, tpl_width, tpl_height);
benchmark_match_template_zncc(buf_src, width, height, buf_tpl, tpl_width, tpl_height);
benchmark_prepared_match_template_zncc(buf_src, width, height, buf_tpl, tpl_width, tpl_height);
benchmark_pcm16_decibel();
return 0;
}

Expand Down
27 changes: 27 additions & 0 deletions blurry_gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,11 @@ void init_input_yuv_444(ImageParam in_y, ImageParam in_u, ImageParam in_v, Param
in_v.dim(1).set_stride(width);
}

void init_input_array(ImageParam in, Param<int32_t> size) {
in.dim(0).set_stride(1);
in.dim(1).set_stride(size);
}

void init_input_array(ImageParam in, Param<int32_t> width, Param<int32_t> height) {
in.dim(0).set_stride(1);
in.dim(1).set_stride(width);
Expand All @@ -155,6 +160,11 @@ void init_output_yuv_444(OutputImageParam out, Param<int32_t> width, Param<int32
out.dim(1).set_stride(width);
}

void init_output_array(OutputImageParam out, Param<int32_t> size) {
out.dim(0).set_stride(1);
out.dim(1).set_stride(size);
}

void init_output_array(OutputImageParam out, Param<int32_t> width, Param<int32_t> height) {
out.dim(0).set_stride(1);
out.dim(1).set_stride(width);
Expand Down Expand Up @@ -1665,6 +1675,22 @@ void generate_contour(std::vector<Target::Feature> features) {
// }}} contour
//

//
// {{{ pcm16_decibel
//
void generate_pcm16_decibel(std::vector<Target::Feature> features) {
ImageParam src(type_of<int16_t>(), 1, "src");

Param<int32_t> length{"length", 1024};

Func fn = pcm16_decibel_fn(src.in(), length);

generate_static_link(features, fn, { src, length }, "pcm16_decibel");
}
//
// }}} pcm16_decibel
//

int generate(char **argv){
printf("generate...\n");

Expand Down Expand Up @@ -1735,6 +1761,7 @@ int generate(char **argv){
generate_prepare_zncc_template(features);
generate_prepared_match_template_zncc(features);
generate_contour(features);
generate_pcm16_decibel(features);

return 0;
}
Expand Down
Loading

0 comments on commit f29497f

Please sign in to comment.