-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.js
524 lines (434 loc) · 21.6 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
import { LSTM } from './models/lstm.js';
import { DDPG } from './models/ddpg.js';
import { DataProcessor } from './utils/dataProcessor.js';
const INPUT_SIZE = 20; // OHLCV
const HIDDEN_SIZE = 64;
const ACTION_SIZE = 3; // [position, sl, tp]
//ReplayBuffer class to store experiences:
export class ReplayBuffer {
constructor(maxSize = 100000) {
this.buffer = [];
this.maxSize = maxSize;
}
add(state, action, reward, nextState, done) {
if (this.buffer.length >= this.maxSize) {
this.buffer.shift();
}
this.buffer.push({state, action, reward, nextState, done});
}
sample(batchSize) {
const samples = [];
for(let i = 0; i < batchSize; i++) {
const index = Math.floor(Math.random() * this.buffer.length);
samples.push(this.buffer[index]);
}
return samples;
}
}
export class TradingSystem {
constructor() {
this.lstm = new LSTM(INPUT_SIZE, HIDDEN_SIZE);
this.ddpg = new DDPG(HIDDEN_SIZE, ACTION_SIZE);
this.replayBuffer = new ReplayBuffer();
this.batchSize = 1000;
this.gamma = 0.99; // Discount factor
this.tau = 0.001; // Soft update parameter
this.barsPredicted = 5;
// Create target networks
this.targetLSTM = new LSTM(INPUT_SIZE, HIDDEN_SIZE);
this.targetDDPG = new DDPG(HIDDEN_SIZE, ACTION_SIZE);
// Copy initial weights
this.updateTargetNetworks(1.0);
}
async loadData() {
const data = await Bun.file('./data/EURUSD_D1.json').json();
this.dataProcessor = new DataProcessor(data);
return data;
}
predict(state, addNoise = false) {
// Ensure state is properly normalized
//console.log(state);
console.warn('PREDICTION')
if (!Array.isArray(state) || state.some(val => isNaN(val))) {
console.log("state: ", state);
throw new Error('Invalid state values detected');
}
const { hiddenState } = this.lstm.forward(state);
let actions = this.ddpg.actorForward(hiddenState);
//console.log("state ",state, "\nactions ",actions);
if (addNoise) {
// Use Ornstein-Uhlenbeck noise for better exploration
actions = actions.map(a => {
const theta = 0.15;
const sigma = 0.2;
const noise = -theta * a + sigma * (Math.random() * 2 - 1);
return Math.max(-1, Math.min(1, a + noise)); // Clip between -1 and 1
});
}
// Ensure actions are within valid ranges
// Custom position size logic keep between 0.01 and 1
let positionSize = actions[0]*50;
if (positionSize < 0.01 && positionSize >= 0) {
positionSize = 0.01;
} else if (positionSize > -0.01 && positionSize < 0) {
positionSize = -0.01;
} else {
positionSize = Math.max(-1, Math.min(1, positionSize));
positionSize = positionSize.toFixed(2);
}
const stopLoss = Math.max(-1, Math.min(1.0, (actions[1]) )); // Convert to 0.1-0.5 range
const takeProfit = Math.max(-1, Math.min(1.0, (actions[2]) )); // Convert to 0.2-1.0 range
console.log("[position] [SL] [TP]", positionSize,stopLoss,takeProfit);
return {
positionSize,
stopLoss,
takeProfit,
hiddenState
};
}
calculateReward(action, nextPrice, currentPrice, index, currentState) {
if (isNaN(nextPrice) || isNaN(currentPrice) || isNaN(action.positionSize)) {
console.error('Invalid inputs in calculateReward:', { nextPrice, currentPrice, action });
return 0;
}
//lower than TP or Entry stoploss for Sell BLOCK
else if(action.positionSize < 0 && (action.stopLoss < action.takeProfit || action.stopLoss < 0 || action.takeProfit > 0)) {
console.log("INVALID TRADE");
return 0;
}
//Higher than TP or Entry stoploss for Buy BLOCK
else if(action.positionSize > 0 && (action.stopLoss > action.takeProfit || action.stopLoss > 0 || action.takeProfit < 0)) {
console.log("INVALID TRADE");
return 0;
}
else {
const checkStoploss = (indx) => {
const newCandle = this.dataProcessor.normalized[indx + this.barsPredicted - 1]; //o,h,l,c,v
const range = this.dataProcessor.calculateRange(currentState);
console.log("range ",range)
const sl = action.positionSize > 0 ?
currentPrice - (range * Math.sqrt(action.stopLoss*action.stopLoss)) :
currentPrice + (range * Math.sqrt(action.stopLoss*action.stopLoss));
const tp = action.positionSize > 0 ?
currentPrice + (range * Math.sqrt(action.takeProfit*action.takeProfit)) :
currentPrice - (range * Math.sqrt(action.takeProfit*action.takeProfit));
let tradeHighs= [], tradeLows = [], tradeHigh, tradeLow;
for (let i = indx; i < indx + this.barsPredicted; i++) {
tradeHighs.push(this.dataProcessor.normalized[i][1] );
tradeLows.push(this.dataProcessor.normalized[i][2]);
}
console.log("trade bounds(H,L): \n", tradeHighs,"\n", tradeLows);
console.log("Entry: ",currentPrice," LastClose: ", newCandle ? newCandle[3]: 0," SL: ",sl," TP: ",tp);
if (action.positionSize > 0 && newCandle) { //BUY
if (/*newCandle[2]*/ Math.min(...tradeLows) < sl) {
console.log("SL HIT");
const priceChange = (sl - currentPrice) / currentPrice;
const scaledPriceChange = Math.tanh(priceChange * 10); // Scale and bound between -1 and 1
const reward = (scaledPriceChange) * action.positionSize;
return reward;
//return -1* Math.sqrt(action.positionSize*action.positionSize);
}//if newCandle low is lower than SL
if (/*newCandle[1]*/ Math.max(...tradeHighs) > tp || newCandle[3] > tp ) {//TP is hit
console.log("TP HIT");
const priceChange = (tp - currentPrice) / currentPrice;
const scaledPriceChange = Math.tanh(priceChange * 10); // Scale and bound between -1 and 1
const reward = (scaledPriceChange) * action.positionSize;
return reward;
}
if (Math.max(...tradeHighs) < tp && Math.min(...tradeLows) > sl) {//Floating BUY
// Scale price change to reasonable range
console.log("FLOATING BUY");
const priceChange = (currentPrice - nextPrice) / currentPrice;
const scaledPriceChange = Math.tanh(priceChange * 10); // Scale and bound between -1 and 1
// Calculate scaled PnL
const reward = (scaledPriceChange) * action.positionSize;
return reward;
}
} else if (action.positionSize < 0 && newCandle) { //SELL
if (Math.max(...tradeHighs) > sl) {
console.log("SL HIT");
const priceChange = (sl - currentPrice) / currentPrice;
const scaledPriceChange = Math.tanh(priceChange * 10); // Scale and bound between -1 and 1
const reward = (scaledPriceChange) * action.positionSize;
return reward;
// return -1* Math.sqrt(action.positionSize*action.positionSize);
} //if newCandle High is higher than SL
if (Math.min(...tradeLows) < tp || newCandle[3] < tp ) {//TP is Hit
console.log("TP HIT");
const priceChange = (currentPrice - tp) / currentPrice;
const scaledPriceChange = Math.tanh(priceChange * 10); // Scale and bound between -1 and 1
const reward = (scaledPriceChange) * Math.sqrt(action.positionSize * action.positionSize);
return reward;
}
if (Math.max(...tradeHighs) < sl && Math.min(...tradeLows) > tp) {//Floating SELL
// Scale price change to reasonable range
console.log("FLOATING SELL");
const priceChange = (currentPrice - nextPrice) / currentPrice;
const scaledPriceChange = Math.tanh(priceChange * 10); // Scale and bound between -1 and 1
// Calculate scaled PnL
const reward = (scaledPriceChange) * action.positionSize;
return reward;
}
} else {
// Scale price change to reasonable range
console.log("FLOATING PROFIT");
const priceChange = (currentPrice - nextPrice) / currentPrice;
const scaledPriceChange = Math.tanh(priceChange * 10); // Scale and bound between -1 and 1
// Calculate scaled PnL
const reward = (scaledPriceChange) * action.positionSize;
return reward;
}
}
const pnl = checkStoploss(index);
// Risk management penalties (scaled)
const slPenalty = action.stopLoss < 0.1 ? -0.1 :
action.stopLoss > 0.5 ? -0.1 : 0;
const tpPenalty = action.takeProfit < 0.1 ? -0.1 :
action.takeProfit > 1.0 ? -0.1 : 0;
const sizePenalty = Math.abs(action.positionSize) > 0.8 ? -0.05 : 0;
// Combine rewards with appropriate scaling
const totalReward = pnl + slPenalty + tpPenalty + sizePenalty;
return Math.max(-1, Math.min(2, totalReward));
}
}
async train(epochs = 100, stepsPerEpoch = 1000) {
const data = await this.loadData();
this.dataProcessor.normalizeData();
console.log(this.dataProcessor.normalized.length);
//this.dataProcessor.combineData(this.dataProcessor.normalized);
let bestReward = -Infinity;
let noImprovementCount = 0;
const patience = 20; // Number of epochs without improvement before stopping
for (let epoch = 0; epoch < epochs; epoch++) {
let totalReward = 0;
let validSteps = 0;
for (let step = 0; step < stepsPerEpoch; step++) {
try {
const startIdx = this.getRandomIndex();//Math.floor(Math.random() * (this.dataProcessor.normalized.length - this.dataProcessor.lookback - 2));
const currentState = await this.dataProcessor.getState(startIdx);
console.log("\nSTEP\nstartIdx ", startIdx); //, "currentState ", currentState.length);
//currentState[0] = totalReward; //1st state is reward
const action = this.predict(currentState, true);
const nextState = await this.dataProcessor.getState(startIdx + this.barsPredicted - 1);
//console.log("price change: ",nextState[nextState.length - 1] - currentState[currentState.length-1]);
const reward = this.calculateReward(
action,
nextState[nextState.length - 1],
currentState[currentState.length-1],
startIdx,
currentState
);
console.log('step: ',step," reward: ",reward, "\n\n");
if (!isNaN(reward)) {
totalReward += reward;
validSteps++;
this.replayBuffer.add(
currentState,
[action.positionSize, action.stopLoss, action.takeProfit],
reward,
nextState,
false
);
}
if (this.replayBuffer.buffer.length >= this.batchSize) {
this.replayBuffer.buffer = this.replayBuffer.buffer.slice(-this.batchSize);
const loss = await this.trainStep();
if (step % 1 === 0) {
console.log(`Epoch ${epoch}, Step ${step}, Loss: ${loss.toFixed(4)}`);
//await Bun.sleep(2000);
}
}
} catch (error) {
console.error(`Error at epoch ${epoch}, step ${step}:`, error);
continue;
}
}
// Validation phase
const validationReward = await this.validate();
// Early stopping check
if (validationReward > bestReward) {
bestReward = validationReward;
noImprovementCount = 0;
await this.saveModels(`best_model`);
} else {
noImprovementCount++;
if (noImprovementCount >= patience) {
console.log(`\n\n~Early stopping at epoch ${epoch}\n\n`);
break;
}
}
const averageReward = validSteps > 0 ? totalReward / validSteps : 0;
console.log(`Epoch ${epoch + 1}/${epochs}, Total Reward: ${totalReward.toFixed(4)}, Average Reward: ${averageReward.toFixed(4)}, Valid Steps: ${validSteps}`);
//await Bun.sleep(2000);
// Save model weights periodically
if ((epoch + 1) % 10 === 0) {
await this.saveModels(`models_epoch_${epoch + 1}`);
}
}
}
async validate(steps = 100) {
let totalReward = 0;
for (let i = 0; i < steps; i++) {
const startIdx = this.getRandomIndex();
const state = await this.dataProcessor.getState(startIdx);
const action = this.predict(state, false); // No noise during validation
const reward = this.calculateReward(
action,
this.dataProcessor.normalized[startIdx + this.barsPredicted - 1][4],
this.dataProcessor.normalized[startIdx][4]
);
if (!isNaN(reward)) {
totalReward += reward;
}
}
return totalReward / steps;
}
getRandomIndex() {
const min = this.dataProcessor.lookback + this.barsPredicted;
const max = this.dataProcessor.normalized.length - 2*(min) - this.barsPredicted;
const randomNumber = Math.floor(Math.random() * (max - min + 1)) + min;
return randomNumber;
}
async trainStep() {
const batch = this.replayBuffer.sample(this.batchSize);
let totalLoss = 0;
for (const experience of batch) {
const { state, action, reward, nextState, done } = experience;
try {
// Get next action from target networks
//console.log("trainingStep next state: ", nextState);
const nextHiddenState = this.targetLSTM.forward(nextState).hiddenState;
const nextAction = this.targetDDPG.actorForward(nextHiddenState);
// Calculate target Q-value with clipping
const nextQ = this.targetDDPG.criticForward(nextHiddenState, nextAction);
const targetQ = reward + (done ? 0 : this.gamma * Math.max(-10, Math.min(10, nextQ)));
// Update networks if targetQ is valid
if (!isNaN(targetQ)) {
// Get current Q-value
const { hiddenState } = this.lstm.forward(state);
const currentQ = this.ddpg.criticForward(hiddenState, action);
// Calculate TD error with clipping
const tdError = Math.max(-1, Math.min(1, targetQ - currentQ));
// Update networks
this.ddpg.updateCritic(tdError);
const actorGradient = this.ddpg.getActorGradient(hiddenState);
// Clip gradients
const clippedGradient = actorGradient.map(g => Math.max(-1, Math.min(1, g)));
this.ddpg.updateActor(clippedGradient);
// Update LSTM
const lstmGradient = this.lstm.backward(tdError);
this.lstm.updateWeights(lstmGradient);
totalLoss += Math.abs(tdError);
}
} catch (error) {
console.error('Error in training step:', error);
continue;
}
}
// Soft update target networks
this.updateTargetNetworks(this.tau);
return totalLoss / batch.length;
}
updateNetworks(state, action, targetQ) {
// Get current Q-value
const { hiddenState } = this.lstm.forward(state);
const currentQ = this.ddpg.criticForward(hiddenState, action);
// Calculate TD error
const tdError = targetQ - currentQ;
// Update critic
this.ddpg.updateCritic(tdError);
// Update actor using policy gradient
const actorGradient = this.ddpg.getActorGradient(hiddenState);
this.ddpg.updateActor(actorGradient);
// Update LSTM
const lstmGradient = this.lstm.backward(tdError);
this.lstm.updateWeights(lstmGradient);
}
updateTargetNetworks(tau) {
// Deep copy the weights instead of shallow copying
this.targetDDPG.actorWeights = JSON.parse(JSON.stringify(this.ddpg.actorWeights));
this.targetDDPG.criticWeights = JSON.parse(JSON.stringify(this.ddpg.criticWeights));
// Apply soft update
Object.keys(this.targetDDPG.actorWeights).forEach(key => {
const target = this.targetDDPG.actorWeights[key];
const source = this.ddpg.actorWeights[key];
for (let i = 0; i < target.length; i++) {
for (let j = 0; j < target[i].length; j++) {
target[i][j] = (1 - tau) * target[i][j] + tau * source[i][j];
}
}
});
Object.keys(this.targetDDPG.criticWeights).forEach(key => {
const target = this.targetDDPG.criticWeights[key];
const source = this.ddpg.criticWeights[key];
for (let i = 0; i < target.length; i++) {
for (let j = 0; j < target[i].length; j++) {
target[i][j] = (1 - tau) * target[i][j] + tau * source[i][j];
}
}
});
}
// updateTargetNetworks(tau) {
// // Soft update target networks
// this.softUpdate(this.targetLSTM, this.lstm, tau);
// this.softUpdate(this.targetDDPG, this.ddpg, tau);
// }
softUpdate(target, source, tau) {
// Implement soft update for network weights
Object.keys(source).forEach(key => {
if (Array.isArray(source[key])) {
if (Array.isArray(source[key][0])) {
// Handle 2D arrays (matrices)
target[key] = target[key].map((row, i) =>
row.map((val, j) =>
(1 - tau) * val + tau * source[key][i][j]
)
);
} else {
// Handle 1D arrays (vectors)
target[key] = target[key].map((val, i) =>
(1 - tau) * val + tau * source[key][i]
);
}
} else {
// Handle scalar values
target[key] = (1 - tau) * target[key] + tau * source[key];
}
});
}
async saveModels(filename) {
const modelData = {
lstm: this.lstm,
ddpg: this.ddpg.toJSON()
};
await Bun.write(`./saved_models/${filename}.json`, JSON.stringify(modelData));
}
async loadModels(filename) {
const modelData = await Bun.file(`./saved_models/${filename}.json`).json();
this.lstm = Object.assign(new LSTM(INPUT_SIZE, HIDDEN_SIZE), modelData.lstm);
this.ddpg = DDPG.fromJSON(modelData.ddpg);
}
}
// Main execution
async function main() {
const trader = new TradingSystem();
// Training phase
console.log("\nStarting training...");
await trader.train(100, 100); // 100 epochs, 1000 steps per epoch
// Save final model
await trader.saveModels('final_model');
// Testing phase
console.log("\nTesting model...");
const data = await trader.loadData();
console.log("data length ", data.open.length);
await Bun.sleep(3000);
// Example prediction
for(let i = 128; i < 256; i+5) {
const currentState = await trader.dataProcessor.getState(i);
const prediction = trader.predict(currentState);
const nextState = await trader.dataProcessor.getState(i+1);
console.log('Trading Decision: Position/Dir[', prediction.positionSize,'] SL[', prediction.stopLoss,'] TP[', prediction.takeProfit,']');
console.log('price change: ', nextState[nextState.length-1] - currentState[currentState.length-1],'\n');
}
}
main().catch(console.error);