1
1
/* Copyright © 2017-2023 ABBYY
2
+
2
3
Licensed under the Apache License, Version 2.0 (the "License");
3
4
you may not use this file except in compliance with the License.
4
5
You may obtain a copy of the License at
@@ -23,6 +24,7 @@ namespace NeoML {
23
24
// Interface for setting input to a neural network
24
25
class IDistributedDataset {
25
26
public:
27
+ virtual ~IDistributedDataset () {}
26
28
// This method must set batches for all of the source layers in CDnn
27
29
// Returns the current batch size (or 0, if there is no data for this thread on this run)
28
30
// This batch size affects weights balance between different threads
@@ -54,47 +56,51 @@ class NEOML_API CDistributedTraining {
54
56
CDistributedTraining ( CArchive& archive, const CArray<int >& cudaDevs,
55
57
TDistributedInitializer initializer = TDistributedInitializer::Xavier, int seed = 42 );
56
58
57
- ~CDistributedTraining ();
59
+ virtual ~CDistributedTraining ();
58
60
59
61
// Gets the number of models in disitrbuted traning
60
62
int GetModelCount () const { return cnns.Size (); }
61
63
// Sets the solver for all of the models
62
- void SetSolver ( CArchive& archive );
64
+ void SetSolver ( CArchive& );
63
65
// Sets the learning rate for all of the models
64
66
void SetLearningRate ( float rate );
65
67
// Returns the current learning rate
66
68
float GetLearningRate () const ;
69
+
67
70
// Runs the networks without backward and training
68
- void RunOnce ( IDistributedDataset& data );
71
+ void RunOnce ( IDistributedDataset& );
69
72
// Runs the networks and performs a backward pass
70
- void RunAndBackwardOnce ( IDistributedDataset& data );
73
+ void RunAndBackwardOnce ( IDistributedDataset& );
71
74
// Runs the networks, performs a backward pass and updates the trainable weights of all models
72
- void RunAndLearnOnce ( IDistributedDataset& data );
75
+ void RunAndLearnOnce ( IDistributedDataset& );
73
76
// Updates the trainable weights of all models (after RunAndBackwardOnce)
74
77
void Train ();
78
+
75
79
// Returns last loss of `layerName` for all models
76
80
// `layerName` should correspond to CLossLayer, CCtcLossLayer or CCrfLossLayer
77
- void GetLastLoss ( const CString& layerName, CArray<float >& losses );
81
+ void GetLastLoss ( const CString& layerName, CArray<float >& losses ) const ;
78
82
// Returns last blobs of `layerName` for all models
79
83
// `layerName` should correspond to CSinkLayer
80
- void GetLastBlob ( const CString& layerName, CObjectArray<CDnnBlob>& blobs );
84
+ void GetLastBlob ( const CString& layerName, CObjectArray<CDnnBlob>& blobs ) const ;
85
+
81
86
// Save trained net
82
- void Serialize ( CArchive& archive );
87
+ void Serialize ( CArchive& );
83
88
// Save the trained net with the given `index` with its solver state (optional)
84
89
// An archive with solver state can later be passed to CDnn::SerializeCheckpoint to resume training
85
- void StoreDnn ( CArchive& archive , int index, bool storeSolver );
90
+ void StoreDnn ( CArchive&, int index, bool storeSolver );
86
91
87
92
private:
88
- const bool isCpu;
89
- IThreadPool* threadPool;
90
- CArray<IMathEngine*> mathEngines;
91
- CArray<CRandom*> rands;
92
- CArray<CDnn*> cnns;
93
- CArray<int > batchSize;
94
- bool isFirstRun = true ;
95
- CString errorMessage;
96
-
97
- void initialize ( CArchive& archive, int count, TDistributedInitializer initializer, int seed );
93
+ enum class TRunType { Invalid, RunOnce, RunBackwardOnce, Train };
94
+ class CParams ;
95
+ CParams* const params = nullptr ;
96
+ IThreadPool* const threadPool = nullptr ;
97
+ CArray<IMathEngine*> mathEngines{};
98
+ CArray<CRandom*> rands{};
99
+ CArray<CDnn*> cnns{};
100
+ CArray<int > batchSize{};
101
+
102
+ void runOnce ( IDistributedDataset*, TRunType );
103
+ void initialize ( CArchive&, int count, TDistributedInitializer initializer, int seed );
98
104
};
99
105
100
106
} // namespace NeoML
0 commit comments