Skip to content

Commit d80fc0e

Browse files
committed
Current status of reduction generalization and small-destination
support.
1 parent d838f6a commit d80fc0e

File tree

3 files changed

+1541
-349
lines changed

3 files changed

+1541
-349
lines changed

src/gpuarray/array.h

+102-13
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,27 @@ typedef enum _ga_order {
123123
GA_F_ORDER=1
124124
} ga_order;
125125

126+
/**
127+
* Supported array reduction operations.
128+
*/
129+
130+
typedef enum _ga_reduce_op {
131+
GA_REDUCE_SUM, /* + */
132+
GA_REDUCE_PROD, /* * */
133+
GA_REDUCE_PRODNZ, /* * (!=0) */
134+
GA_REDUCE_MIN, /* min() */
135+
GA_REDUCE_MAX, /* max() */
136+
GA_REDUCE_ARGMIN, /* argmin() */
137+
GA_REDUCE_ARGMAX, /* argmax() */
138+
GA_REDUCE_MINANDARGMIN, /* min(), argmin() */
139+
GA_REDUCE_MAXANDARGMAX, /* max(), argmax() */
140+
GA_REDUCE_AND, /* & */
141+
GA_REDUCE_OR, /* | */
142+
GA_REDUCE_XOR, /* ^ */
143+
GA_REDUCE_ALL, /* &&/all() */
144+
GA_REDUCE_ANY, /* ||/any() */
145+
} ga_reduce_op;
146+
126147
/**
127148
* Checks if all the specified flags are set.
128149
*
@@ -614,26 +635,31 @@ GPUARRAY_PUBLIC void GpuArray_fprintf(FILE *fd, const GpuArray *a);
614635

615636
GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a);
616637

638+
617639
/**
618-
* @brief Computes simultaneously the maxima and the arguments of maxima over
619-
* specified axes of the tensor.
640+
* @brief Compute a reduction sum (+), product (*), non-zero product (* != 0),
641+
* min, max, argmin, argmax, min-and-argmin, max-and-argmax, and (&),
642+
* or (|), xor (^), all (&&) or any (||) over a list of axes to reduce.
620643
*
621-
* Returns two tensors of identical shape. Both tensors' axes are a subset of
622-
* the axes of the original tensor. The axes to be reduced are specified by
623-
* the caller, and the maxima and arguments of maxima are computed over them.
644+
* Returns one (in the case of min-and-argmin/max-and-argmax, two) destination
645+
* tensors. The destination tensor(s)' axes are a strict subset of the axes of the
646+
* source tensor. The axes to be reduced are specified by the caller, and the
647+
* reduction is performed over these axes, which are then removed in the
648+
* destination.
624649
*
625-
* @param [out] dstMax The resulting tensor of maxima
626-
* @param [out] dstArgmax the resulting tensor of arguments at maxima
650+
* @param [out] dst The destination tensor. Has the same type as the source.
651+
* @param [out] dstArg For argument of minima/maxima operations. Has type int64.
627652
* @param [in] src The source tensor.
628653
* @param [in] reduxLen The number of axes reduced. Must be >= 1 and
629654
* <= src->nd.
630655
* @param [in] reduxList A list of integers of length reduxLen, indicating
631656
* the axes to be reduced. The order of the axes
632-
* matters for dstArgmax index calculations. All
633-
* entries in the list must be unique, >= 0 and
634-
* < src->nd.
657+
* matters for dstArg index calculations (GpuArray_argmin,
658+
* GpuArray_argmax, GpuArray_minandargmin,
659+
* GpuArray_maxandargmax). All entries in the list must be
660+
* unique, >= 0 and < src->nd.
635661
*
636-
* For example, if a 5D-tensor is reduced with an axis
662+
* For example, if a 5D-tensor is max-reduced with an axis
637663
* list of [3,4,1], then reduxLen shall be 3, and the
638664
* index calculation in every point shall take the form
639665
*
@@ -647,11 +673,74 @@ GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a);
647673
* code otherwise.
648674
*/
649675

650-
GPUARRAY_PUBLIC int GpuArray_maxandargmax(GpuArray* dstMax,
651-
GpuArray* dstArgmax,
676+
GPUARRAY_PUBLIC int GpuArray_sum (GpuArray* dst,
652677
const GpuArray* src,
653678
unsigned reduxLen,
654679
const unsigned* reduxList);
680+
GPUARRAY_PUBLIC int GpuArray_prod (GpuArray* dst,
681+
const GpuArray* src,
682+
unsigned reduxLen,
683+
const unsigned* reduxList);
684+
GPUARRAY_PUBLIC int GpuArray_prodnz (GpuArray* dst,
685+
const GpuArray* src,
686+
unsigned reduxLen,
687+
const unsigned* reduxList);
688+
GPUARRAY_PUBLIC int GpuArray_min (GpuArray* dst,
689+
const GpuArray* src,
690+
unsigned reduxLen,
691+
const unsigned* reduxList);
692+
GPUARRAY_PUBLIC int GpuArray_max (GpuArray* dst,
693+
const GpuArray* src,
694+
unsigned reduxLen,
695+
const unsigned* reduxList);
696+
GPUARRAY_PUBLIC int GpuArray_argmin (GpuArray* dstArg,
697+
const GpuArray* src,
698+
unsigned reduxLen,
699+
const unsigned* reduxList);
700+
GPUARRAY_PUBLIC int GpuArray_argmax (GpuArray* dstArg,
701+
const GpuArray* src,
702+
unsigned reduxLen,
703+
const unsigned* reduxList);
704+
GPUARRAY_PUBLIC int GpuArray_minandargmin(GpuArray* dst,
705+
GpuArray* dstArg,
706+
const GpuArray* src,
707+
unsigned reduxLen,
708+
const unsigned* reduxList);
709+
GPUARRAY_PUBLIC int GpuArray_maxandargmax(GpuArray* dst,
710+
GpuArray* dstArg,
711+
const GpuArray* src,
712+
unsigned reduxLen,
713+
const unsigned* reduxList);
714+
GPUARRAY_PUBLIC int GpuArray_and (GpuArray* dst,
715+
const GpuArray* src,
716+
unsigned reduxLen,
717+
const unsigned* reduxList);
718+
GPUARRAY_PUBLIC int GpuArray_or (GpuArray* dst,
719+
const GpuArray* src,
720+
unsigned reduxLen,
721+
const unsigned* reduxList);
722+
GPUARRAY_PUBLIC int GpuArray_xor (GpuArray* dst,
723+
const GpuArray* src,
724+
unsigned reduxLen,
725+
const unsigned* reduxList);
726+
GPUARRAY_PUBLIC int GpuArray_all (GpuArray* dst,
727+
const GpuArray* src,
728+
unsigned reduxLen,
729+
const unsigned* reduxList);
730+
GPUARRAY_PUBLIC int GpuArray_any (GpuArray* dst,
731+
const GpuArray* src,
732+
unsigned reduxLen,
733+
const unsigned* reduxList);
734+
GPUARRAY_PUBLIC int GpuArray_reduction (ga_reduce_op op,
735+
GpuArray* dst,
736+
GpuArray* dstArg,
737+
const GpuArray* src,
738+
unsigned reduxLen,
739+
const unsigned* reduxList);
740+
741+
742+
743+
655744

656745
#ifdef __cplusplus
657746
}

0 commit comments

Comments
 (0)