-
Notifications
You must be signed in to change notification settings - Fork 1
/
SolveStrategy.m
274 lines (256 loc) · 9.33 KB
/
SolveStrategy.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
global alphaPower maxB rou numOfStates superOverride lowerBoundPolicy;
more = maxB+1;
numOfStates=((((maxB*more+maxB)*more+maxB)*2+1)*2+1)*more+maxB;
disp(['numOfStates: ' num2str(numOfStates)]);
choices = 5;
adopt = 1; override = 2; match = 3; even = 4; hide = 5;
lastH = 0; lastS = 1;
P = cell(1,choices);
% Rs is the reward for selfish miner
Rs = cell(1,choices);
% Rh is the reward for honest miners
Rh = cell(1,choices);
Wrou = cell(1,choices);
for i = 1:choices
P{i} = sparse(numOfStates, numOfStates);
Rs{i} = sparse(numOfStates, numOfStates);
Rh{i} = sparse(numOfStates, numOfStates);
Wrou{i} = sparse(numOfStates, numOfStates);
end
Psb = alphaPower;
Phb = 1-alphaPower;
baseState = st2stnum(1, 0, 1, 0, lastH, 0);
baseStateS = st2stnum(0, 1, 0, 0, lastS, 0);
% define adopt
P{adopt}(:, baseState) = Phb;
P{adopt}(:, baseStateS) = Psb;
for i = 1:numOfStates
if mod(i, 2000)==0
disp(['processing state: ' num2str(i)]);
end
[Bh, Bs, Dw, luck, last, published] = stnum2st(i);
Rh{adopt}(i, baseState) = Bh;
Rh{adopt}(i, baseStateS) = Bh;
% illegal states, force adoption
% always decide whether to publish the latest selfish block at the
% beginning of the next step!
% ensure that if lucky == 1, Bh>0, Bs>=Bh and when lastS, Bs>Bh
if published > Bs-last || Dw > Bh+1 || last == lastH && Bh == 0 ...
|| last == lastS && Bs == 0 || (luck == 1 && (Bh == 0 || Bs < Bh ...
|| Bs == Bh && last == lastS))
P{override}(i, baseState) = 1;
Rh{override}(i, baseState) = 10000;
P{match}(i, baseState) = 1;
Rh{match}(i, baseState) = 10000;
P{even}(i, baseState) = 1;
Rh{even}(i, baseState) = 10000;
P{hide}(i, baseState) = 1;
Rh{hide}(i, baseState) = 10000;
continue;
end
% luckyOne: the selfish miner earns two points in weight by publishing this one
% newLuckHnext: if the next block is honest, whether luck remains 1
luckyOne = -1; newLuckHnext = 0;
if luck == 1
if last == lastH
luckyOne = Bh;
else
luckyOne = Bh+1;
newLuckHnext = 1;
end
end
% newLuckSnext: if the next block is selfish, whether it could be a lucky
% one
if Bs == Bh && Bh > 0
newLuckSnext = 1;
else
newLuckSnext = 0;
end
% calculate if the selfish miner publishes everything, the minDw
% next: the next selfish block that can earn weight
next = published+1;
if next < Bh && last == lastH
next = Bh;
elseif next < Bh+1 && last == lastS
next = Bh+1;
end
if Bs < next % nothing to publish
minDw = Dw;
else
minDw = Dw-(Bs-next+1+luck);
end
% define override
if minDw < 0 || Bs-Bh >= superOverride
publishUntil = Bs+minDw+1;
if publishUntil-Bh > superOverride
publishUntil = Bh+superOverride;
end
% impossible to override without publishing the lucky one
if publishUntil < luckyOne
publishUntil = luckyOne;
end
P{override}(i, st2stnum(0, Bs-publishUntil+1, 0, 0, lastS, 0)) = Psb;
Rs{override}(i, st2stnum(0, Bs-publishUntil+1, 0, 0, lastS, 0)) = publishUntil;
P{override}(i, st2stnum(1, Bs-publishUntil, 1, 0, lastH, 0)) = Phb;
Rs{override}(i, st2stnum(1, Bs-publishUntil, 1, 0, lastH, 0)) = publishUntil;
else
P{override}(i, baseState) = 1;
Rh{override}(i, baseState) = 10000;
end
if minDw <= 0
matchPublishUntil = Bs+minDw;
if matchPublishUntil == Bh-1 && luckyOne == Bh+1
matchPublishUntil = Bh;
elseif luck == 1 && matchPublishUntil == luckyOne-1
matchPublishUntil = -1;
end
else
matchPublishUntil = -1;
end
% define match: make two chains the same weight or at least try
if matchPublishUntil ~= -1 && Bs < maxB && Bh < maxB
% honestLuck: if the Bh-th selfish block is published, the new honest
% block would earn two points
if matchPublishUntil >= Bh && Bh > 0
honestLuck = 2;
else
honestLuck = 1;
end
% my block on my branch
if luckyOne == Bh+1 && matchPublishUntil == Bh % preserve the secret luckyOne
P{match}(i, st2stnum(Bh, Bs+1, 0, 1, lastS, matchPublishUntil)) = Psb;
else
P{match}(i, st2stnum(Bh, Bs+1, 0, newLuckSnext, lastS, matchPublishUntil)) = Psb;
end
% honest block on honest branch
P{match}(i, st2stnum(Bh+1, Bs, honestLuck, newLuckHnext, lastH, matchPublishUntil)) = Phb*0.5;
% honest block on my branch
if Bs > matchPublishUntil
P{match}(i, st2stnum(1, Bs-matchPublishUntil, honestLuck, 0, lastH, 0)) = ...
P{match}(i, st2stnum(1, Bs-matchPublishUntil, honestLuck, 0, lastH, 0)) + Phb*0.5;
Rs{match}(i, st2stnum(1, Bs-matchPublishUntil, honestLuck, 0, lastH, 0)) = matchPublishUntil;
else % the next selfish block can still be lucky, no point to introduce "honestLuck"
P{match}(i, st2stnum(1, 0, 1, 0, lastH, 0)) = ...
P{match}(i, st2stnum(1, 0, 1, 0, lastH, 0)) + Phb*0.5;
Rs{match}(i, st2stnum(1, 0, 1, 0, lastH, 0)) = matchPublishUntil;
end
elseif matchPublishUntil == -1 && Bs < maxB && Bh < maxB % impossible to maintain same weight, publish everything
P{match}(i, st2stnum(Bh, Bs+1, minDw, newLuckSnext, lastS, Bs)) = Psb;
P{match}(i, st2stnum(Bh+1, Bs, minDw+honestLuck, 0, lastH, Bs)) = Phb;
else
P{match}(i, baseState) = 1;
Rh{match}(i, baseState) = 10000;
end
evenPublishUntil = -1;
if Bs >= Bh && Dw > 0 % enough blocks to publish, possible to have honest miners work on own chain
if published >= Bh % no more blocks need to be published
evenPublishUntil = published;
newDw = Dw;
elseif last == lastH
evenPublishUntil = Bh;
newDw = Dw-1;
if luckyOne == Bh
newDw = newDw -1;
end
else % if last == lastS, publishing till Bh won't get the selfish miner any reward
evenPublishUntil = Bh;
newDw = Dw;
end
end
% define even: publish until at least Bh but make sure Dw>0
if evenPublishUntil ~= -1 && Bs < maxB
if Bh > 0
honestLuck = 2;
else
honestLuck = 1;
end
if luckyOne == Bh+1 % preserve the secret luckyOne
P{even}(i, st2stnum(Bh, Bs+1, newDw, 1, lastS, evenPublishUntil)) = Psb;
else
P{even}(i, st2stnum(Bh, Bs+1, newDw, newLuckSnext, lastS, evenPublishUntil)) = Psb;
end
P{even}(i, st2stnum(Bh+1, Bs, newDw+honestLuck, newLuckHnext, lastH, evenPublishUntil)) = Phb;
else
P{even}(i, baseState) = 1;
Rh{even}(i, baseState) = 10000;
end
% define hide: publish until Bh-1
if published < Bh && Bh >= 1 && Bs < maxB && Bh < maxB
if Bs >= Bh-1
hidePublishUntil = Bh-1;
else
hidePublishUntil = Bs;
end
if luckyOne == Bh+1 % preserve the secret luckyOne
P{hide}(i, st2stnum(Bh, Bs+1, Dw, 1, lastS, hidePublishUntil)) = Psb;
else
P{hide}(i, st2stnum(Bh, Bs+1, Dw, newLuckSnext, lastS, hidePublishUntil)) = Psb;
end
P{hide}(i, st2stnum(Bh+1, Bs, Dw+1, newLuckHnext, lastH, hidePublishUntil)) = Phb;
else
P{hide}(i, baseState) = 1;
Rh{hide}(i, baseState) = 10000;
end
end
% for i=1:5
% sumP2 = sum(P{i},2);
% for j=1:numOfStates
% if sumP2(j) ~= 1
% disp([num2str(i) ' ' num2str(j) ' sum ' num2str(sumP2(j))]);
% end
% end
% end
disp(mdp_check(P, Rs));
epsilon = 0.0001;
lowRou = 0;
highRou = 1;
while(highRou - lowRou > epsilon/8)
rou = (highRou + lowRou) / 2;
for i = 1:choices
Wrou{i} = (1-rou).*Rs{i} - rou.*Rh{i};
end
[lowerBoundPolicy reward cpuTime] = mdp_relative_value_iteration(P, Wrou, epsilon/8);
if(reward > 0)
lowRou = rou;
else
highRou = rou;
end
end
format long
disp(['alpha: ' num2str(alphaPower) ' lowerBoundReward: ']);
disp(rou);
% lowRou = rou;
% highRou = min(rou + 0.1, 1);
% while(highRou - lowRou > epsilon/8)
% rou = (highRou + lowRou) / 2;
% for i=1:numOfStates
% [Bh, Bs, Dw, luck, last, published] = stnum2st(i);
% if Bs == maxB
% mid1 = (1-rou)*alphaPower*(1-alphaPower)/(1-2*alphaPower)^2+0.5*((Bs-Bh)/(1-2*alphaPower)+Bs+Bh);
% Rs{adopt}(i, baseState) = mid1;
% Rs{adopt}(i, baseStateS) = mid1;
% Rh{adopt}(i, baseState) = 0;
% Rh{adopt}(i, baseStateS) = 0;
% elseif Bh == maxB
% mid1=alphaPower*(1-alphaPower)/((1-2*alphaPower)^2);
% mid2=(alphaPower/(1-alphaPower))^(Bh-Bs);
% mid3=(1-mid2)*(0-rou)*Bh+mid2*(1-rou)*(mid1+(Bh-Bs)/(1-2*alphaPower));
% Rs{adopt}(i, baseState) = mid3;
% Rs{adopt}(i, baseStateS) = mid3;
% Rh{adopt}(i, baseState) = 0;
% Rh{adopt}(i, baseStateS) = 0;
% end
% end
% for i = 1:choices
% Wrou{i} = (1-rou).*Rs{i} - rou.*Rh{i};
% end
% rouPrime = max(lowRou-epsilon/4, 0);
% [upperBoundPolicy reward cpuTime] = mdp_relative_value_iteration(P, Wrou, epsilon/8);
% if(reward > 0)
% lowRou = rou;
% else
% highRou = rou;
% end
% end
% disp('upperBoundReward: ');
% disp(rou);