-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfunctionpacking.cu
160 lines (157 loc) · 4.27 KB
/
functionpacking.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#define FALSE 0
#define TRUE 1
__global__ void rand_init(long *a,long seed)
{
int tid=threadIdx.x+blockDim.x*blockIdx.x;
//long a = 100001;
a[tid] = seed + tid;
}
// returns random integer from 1 to lim
__device__ int rand1(long *a, int lim)
{
int tid=threadIdx.x+blockDim.x*blockIdx.x;
a[tid] = (a[tid] * 125) % 2796203;
return ((a[tid] % lim) + 1);
}
// returns random integer from 1 to lim (Gerhard's generator)
__device__ int rand2(long *a, int lim)
{
int tid=threadIdx.x+blockDim.x*blockIdx.x;
a[tid] = (a[tid] * 32719 + 3) % 32749;
return ((a[tid] % lim) + 1);
}
// returns random integer from 1 to lim (Bill's generator)
__device__ int rand3(long *a, int lim)
{
int tid=threadIdx.x+blockDim.x*blockIdx.x;
a[tid] = (((a[tid] * 214013L + 2531011L) >> 16) & 32767);
return ((a[tid] % lim) + 1);
}
__host__ __device__ float lencalc(int *sx, int *sy, float *w, float *xpos, bool axis)
{
//int cost=0;
auto int matchy[N];//,matchx[N];
auto float L[N];
for(int i=0;i<N;i++) {
//matchx[sx[i]-1]=i;
matchy[sy[i]-1]=i;
L[i]=0; xpos[i]=0;
}
int b,p;
float t;
for(int i=0;i<N;i++){
if(axis) b=sx[i]-1;
else b=sx[N-1-i]-1;
p=matchy[b];
xpos[b]=L[p];
t=xpos[b]+w[b];
for(int j=p; j<N && t>L[j] ;j++) L[j]=t;
}
return L[N-1];
}
__device__ void neighbourhood(int *sx,int *sy, int *sxnew, int *synew, long *seed){
int randxi=rand1(seed,N-1),randxj=rand1(seed,N-1); //In 1D gridding, use N-1,N. for 2D gridding use N-1,N-1
int randyi=rand1(seed,N ),randyj=rand1(seed,N ); //for 1D N, 2D N-1
//swap
for(int i=0;i<N;i++){
sxnew[i]=sx[i]; synew[i]=sy[i];
}
sxnew[randxi]=sx[randxj];
sxnew[randxj]=sx[randxi];
synew[randxi]=sy[randxj];
synew[randxj]=sy[randxi];
}
__device__ void newneighbourhood(int *sx,int *sy, int *sxnew, int *synew, long *seed){
int randxi=rand1(seed,N-1),randxj=rand1(seed,N-1); //In 1D gridding, use N-1,N. for 2D gridding use N-1,N-1
int randyi=rand1(seed,N ),randyj=rand1(seed,N ); //for 1D N, 2D N-1
int choice=rand1(seed,3 );
for(int i=0;i<N;i++){
sxnew[i]=sx[i]; synew[i]=sy[i];
}
if(choice==1){
//swap sx
sxnew[randxi]=sx[randxj];
sxnew[randxj]=sx[randxi];
}
else if(choice==2){
synew[randxi]=sy[randxj];
synew[randxj]=sy[randxi];
}
else{
sxnew[randxi]=sx[randxj];
sxnew[randxj]=sx[randxi];
synew[randxi]=sy[randxj];
synew[randxj]=sy[randxi];
}
}
__global__ void annealing(float *w, float *h, int *perms , long *seed, int totalarea, float *returnArea){
int tid=threadIdx.x, uid=blockIdx.x;
int index1d=threadIdx.x+blockDim.x*blockIdx.x;
__shared__ float minarea[N];
__shared__ int minid[N];
auto int sxA[N], syA[N], sxnewA[N], synewA[N]; //Arrays
auto int *sx=sxA, *sy=syA, *sxnew=sxnewA, *synew=synewA; //pointer seperately declared to tackle lvalue error
for(int i=0;i<N;i++){
sx[i]=perms[uid*N+i];
sy[i]=perms[tid*N+i]; //use uid, if 1D grid, else 2D grid
//perms[uid*N+i]=-sy[i];
}
float t0=0.00000001, ts=0.01,T,delta; //dead space %4.868905504 in python, %8.26 in CUDA.
int accepted=0;
float Wid=0,Len=0,Area=0;
auto float xpos[N],ypos[N];
float sdWid=0,sdLen=0,sdArea=0;
float minsofar=0;
Wid=lencalc(sx,sy,w,xpos,FALSE); //X-axis
Len=lencalc(sx,sy,h,ypos,TRUE ); //Y-axis
Area=Wid*Len;
returnArea[index1d]=Area;
for(int ancnt=0;ancnt<5000;ancnt++) //20000
{
newneighbourhood(sx,sy,sxnew,synew,seed);
sdWid=lencalc(sxnew,synew,w,xpos,FALSE); //X-axis
sdLen=lencalc(sxnew,synew,h,ypos,TRUE ); //Y-axis
sdArea=sdWid*sdLen;
auto int *temp;
if(sdArea<Area){
temp=sx;
sx=sxnew;
sxnew=temp;
temp=sy;
sy=synew;
synew=temp;
Area=sdArea;
accepted=accepted+1;
minsofar=Area;
}
else{ //take risk (uses random fn)
float p= float(rand1(seed,N))/float(N);
T = 1.0/(t0+ts*accepted);
delta=sdArea-Area;
if(p<exp(-delta/T)){
temp=sx;
sx=sxnew;
sxnew=temp;
temp=sy;
sy=synew;
synew=temp;
Area=sdArea;
accepted=accepted+1;
}
}
}
minarea[threadIdx.x]=minsofar;
minid[threadIdx.x]=threadIdx.x;
for(int ruled=N/2;ruled>0;ruled/=2)
if(threadIdx.x<ruled)
if(minarea[threadIdx.x]>minarea[threadIdx.x+ruled]){
minarea[threadIdx.x]=minarea[threadIdx.x+ruled];
minid[threadIdx.x]=minid[threadIdx.x+ruled];
}
if(threadIdx.x==minid[0])
for(int i=0;i<N;i++){
perms[uid*N+i]=sx[i];
//perms[uid*N+i]=sy[i]; //use uid, if 1D grid, else tid for 2D grid
}
returnArea[index1d]=minsofar;
}