-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfp_cpy.cuh
65 lines (56 loc) · 1.26 KB
/
fp_cpy.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// bls12_381: Arithmetic for BLS12-381
// Copyright 2022-2023 Dag Arne Osvik
// Copyright 2022-2023 Luan Cardoso dos Santos
#ifndef FP_CPY
/**
* @brief PTX macro for copying from x to z.
*
*/
#define FP_CPY(Z, X) \
/* z = x */ \
\
"\n\tmov.u64 "#Z"0, "#X"0;" \
"\n\tmov.u64 "#Z"1, "#X"1;" \
"\n\tmov.u64 "#Z"2, "#X"2;" \
"\n\tmov.u64 "#Z"3, "#X"3;" \
"\n\tmov.u64 "#Z"4, "#X"4;" \
"\n\tmov.u64 "#Z"5, "#X"5;"
__forceinline__
__device__ void fp_cpy(
uint64_t &z0,
uint64_t &z1,
uint64_t &z2,
uint64_t &z3,
uint64_t &z4,
uint64_t &z5,
uint64_t x0,
uint64_t x1,
uint64_t x2,
uint64_t x3,
uint64_t x4,
uint64_t x5) {
asm volatile (
"\n\t{"
"\n\t.reg .u64 z<6>, x<6>;"
"\n\tmov.u64 x0, %6;"
"\n\tmov.u64 x1, %7;"
"\n\tmov.u64 x2, %8;"
"\n\tmov.u64 x3, %9;"
"\n\tmov.u64 x4, %10;"
"\n\tmov.u64 x5, %11;"
FP_CPY(z, x)
"\n\tmov.u64 %0, z0;"
"\n\tmov.u64 %1, z1;"
"\n\tmov.u64 %2, z2;"
"\n\tmov.u64 %3, z3;"
"\n\tmov.u64 %4, z4;"
"\n\tmov.u64 %5, z5;"
"\n\t}"
:
"=l"(z0), "=l"(z1), "=l"(z2), "=l"(z3), "=l"(z4), "=l"(z5)
:
"l"(x0), "l"(x1), "l"(x2), "l"(x3), "l"(x4), "l"(x5)
);
}
#endif
// vim: ts=4 et sw=4 si