@@ -123,6 +123,27 @@ typedef enum _ga_order {
123
123
GA_F_ORDER = 1
124
124
} ga_order ;
125
125
126
+ /**
127
+ * Supported array reduction operations.
128
+ */
129
+
130
+ typedef enum _ga_reduce_op {
131
+ GA_REDUCE_SUM , /* + */
132
+ GA_REDUCE_PROD , /* * */
133
+ GA_REDUCE_PRODNZ , /* * (!=0) */
134
+ GA_REDUCE_MIN , /* min() */
135
+ GA_REDUCE_MAX , /* max() */
136
+ GA_REDUCE_ARGMIN , /* argmin() */
137
+ GA_REDUCE_ARGMAX , /* argmax() */
138
+ GA_REDUCE_MINANDARGMIN , /* min(), argmin() */
139
+ GA_REDUCE_MAXANDARGMAX , /* max(), argmax() */
140
+ GA_REDUCE_AND , /* & */
141
+ GA_REDUCE_OR , /* | */
142
+ GA_REDUCE_XOR , /* ^ */
143
+ GA_REDUCE_ALL , /* &&/all() */
144
+ GA_REDUCE_ANY , /* ||/any() */
145
+ } ga_reduce_op ;
146
+
126
147
/**
127
148
* Checks if all the specified flags are set.
128
149
*
@@ -614,26 +635,31 @@ GPUARRAY_PUBLIC void GpuArray_fprintf(FILE *fd, const GpuArray *a);
614
635
615
636
GPUARRAY_PUBLIC int GpuArray_fdump (FILE * fd , const GpuArray * a );
616
637
638
+
617
639
/**
618
- * @brief Computes simultaneously the maxima and the arguments of maxima over
619
- * specified axes of the tensor.
640
+ * @brief Compute a reduction sum (+), product (*), non-zero product (* != 0),
641
+ * min, max, argmin, argmax, min-and-argmin, max-and-argmax, and (&),
642
+ * or (|), xor (^), all (&&) or any (||) over a list of axes to reduce.
620
643
*
621
- * Returns two tensors of identical shape. Both tensors' axes are a subset of
622
- * the axes of the original tensor. The axes to be reduced are specified by
623
- * the caller, and the maxima and arguments of maxima are computed over them.
644
+ * Returns one (in the case of min-and-argmin/max-and-argmax, two) destination
645
+ * tensors. The destination tensor(s)' axes are a strict subset of the axes of the
646
+ * source tensor. The axes to be reduced are specified by the caller, and the
647
+ * reduction is performed over these axes, which are then removed in the
648
+ * destination.
624
649
*
625
- * @param [out] dstMax The resulting tensor of maxima
626
- * @param [out] dstArgmax the resulting tensor of arguments at maxima
650
+ * @param [out] dst The destination tensor. Has the same type as the source.
651
+ * @param [out] dstArg For argument of minima/ maxima operations. Has type int64.
627
652
* @param [in] src The source tensor.
628
653
* @param [in] reduxLen The number of axes reduced. Must be >= 1 and
629
654
* <= src->nd.
630
655
* @param [in] reduxList A list of integers of length reduxLen, indicating
631
656
* the axes to be reduced. The order of the axes
632
- * matters for dstArgmax index calculations. All
633
- * entries in the list must be unique, >= 0 and
634
- * < src->nd.
657
+ * matters for dstArg index calculations (GpuArray_argmin,
658
+ * GpuArray_argmax, GpuArray_minandargmin,
659
+ * GpuArray_maxandargmax). All entries in the list must be
660
+ * unique, >= 0 and < src->nd.
635
661
*
636
- * For example, if a 5D-tensor is reduced with an axis
662
+ * For example, if a 5D-tensor is max- reduced with an axis
637
663
* list of [3,4,1], then reduxLen shall be 3, and the
638
664
* index calculation in every point shall take the form
639
665
*
@@ -647,11 +673,74 @@ GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a);
647
673
* code otherwise.
648
674
*/
649
675
650
- GPUARRAY_PUBLIC int GpuArray_maxandargmax (GpuArray * dstMax ,
651
- GpuArray * dstArgmax ,
676
+ GPUARRAY_PUBLIC int GpuArray_sum (GpuArray * dst ,
652
677
const GpuArray * src ,
653
678
unsigned reduxLen ,
654
679
const unsigned * reduxList );
680
+ GPUARRAY_PUBLIC int GpuArray_prod (GpuArray * dst ,
681
+ const GpuArray * src ,
682
+ unsigned reduxLen ,
683
+ const unsigned * reduxList );
684
+ GPUARRAY_PUBLIC int GpuArray_prodnz (GpuArray * dst ,
685
+ const GpuArray * src ,
686
+ unsigned reduxLen ,
687
+ const unsigned * reduxList );
688
+ GPUARRAY_PUBLIC int GpuArray_min (GpuArray * dst ,
689
+ const GpuArray * src ,
690
+ unsigned reduxLen ,
691
+ const unsigned * reduxList );
692
+ GPUARRAY_PUBLIC int GpuArray_max (GpuArray * dst ,
693
+ const GpuArray * src ,
694
+ unsigned reduxLen ,
695
+ const unsigned * reduxList );
696
+ GPUARRAY_PUBLIC int GpuArray_argmin (GpuArray * dstArg ,
697
+ const GpuArray * src ,
698
+ unsigned reduxLen ,
699
+ const unsigned * reduxList );
700
+ GPUARRAY_PUBLIC int GpuArray_argmax (GpuArray * dstArg ,
701
+ const GpuArray * src ,
702
+ unsigned reduxLen ,
703
+ const unsigned * reduxList );
704
+ GPUARRAY_PUBLIC int GpuArray_minandargmin (GpuArray * dst ,
705
+ GpuArray * dstArg ,
706
+ const GpuArray * src ,
707
+ unsigned reduxLen ,
708
+ const unsigned * reduxList );
709
+ GPUARRAY_PUBLIC int GpuArray_maxandargmax (GpuArray * dst ,
710
+ GpuArray * dstArg ,
711
+ const GpuArray * src ,
712
+ unsigned reduxLen ,
713
+ const unsigned * reduxList );
714
+ GPUARRAY_PUBLIC int GpuArray_and (GpuArray * dst ,
715
+ const GpuArray * src ,
716
+ unsigned reduxLen ,
717
+ const unsigned * reduxList );
718
+ GPUARRAY_PUBLIC int GpuArray_or (GpuArray * dst ,
719
+ const GpuArray * src ,
720
+ unsigned reduxLen ,
721
+ const unsigned * reduxList );
722
+ GPUARRAY_PUBLIC int GpuArray_xor (GpuArray * dst ,
723
+ const GpuArray * src ,
724
+ unsigned reduxLen ,
725
+ const unsigned * reduxList );
726
+ GPUARRAY_PUBLIC int GpuArray_all (GpuArray * dst ,
727
+ const GpuArray * src ,
728
+ unsigned reduxLen ,
729
+ const unsigned * reduxList );
730
+ GPUARRAY_PUBLIC int GpuArray_any (GpuArray * dst ,
731
+ const GpuArray * src ,
732
+ unsigned reduxLen ,
733
+ const unsigned * reduxList );
734
+ GPUARRAY_PUBLIC int GpuArray_reduction (ga_reduce_op op ,
735
+ GpuArray * dst ,
736
+ GpuArray * dstArg ,
737
+ const GpuArray * src ,
738
+ unsigned reduxLen ,
739
+ const unsigned * reduxList );
740
+
741
+
742
+
743
+
655
744
656
745
#ifdef __cplusplus
657
746
}
0 commit comments