neoml-lib
diff --git a/‎NeoML/Python/src/PyDnnDistributed.cpp
Lines changed: 9 additions & 8 deletions b/‎NeoML/Python/src/PyDnnDistributed.cpp
Lines changed: 9 additions & 8 deletions
diff --git a/‎NeoML/Python/src/PyDnnDistributed.h
Lines changed: 8 additions & 6 deletions b/‎NeoML/Python/src/PyDnnDistributed.h
Lines changed: 8 additions & 6 deletions
diff --git a/‎NeoML/include/NeoML/Dnn/DnnDistributed.h
Lines changed: 25 additions & 19 deletions b/‎NeoML/include/NeoML/Dnn/DnnDistributed.h
Lines changed: 25 additions & 19 deletions
@@ -1,4 +1,5 @@
 /* Copyright © 2017-2023 ABBYY
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -28,7 +29,7 @@ int CPyDistributedDataset::SetInputBatch( CDnn& dnn, int thread )
 	const int batchSize = py::int_( input_data[0] );
 	py::dict inputs = py::dict( input_data[1] );
 
-	for ( std::pair<py::handle, py::handle> item : inputs ){
+	for( std::pair<py::handle, py::handle> item : inputs ) {
 		auto layerName = item.first.cast<std::string>();
 		auto input = item.second.attr( "_internal" ).cast<CPyBlob>();
 		CPtr<CSourceLayer> layer = dynamic_cast<CSourceLayer*>( dnn.GetLayer( layerName.c_str() ).Ptr() );
@@ -81,7 +82,7 @@ py::list CPyDistributedTraining::GetOutput( const std::string& layer )
 	py::list output( blobs.Size() );
 
 	CPtr<CPyMathEngineOwner> owner = new CPyMathEngineOwner( &GetDefaultCpuMathEngine(), false );
-	for( int i = 0; i < blobs.Size(); i++ ){
+	for( int i = 0; i < blobs.Size(); i++ ) {
 		CPtr<CDnnBlob> blob = CDnnBlob::CreateBlob( GetDefaultCpuMathEngine(), CT_Float, blobs[i]->GetDesc() );
 		blob->CopyFrom( blobs[i] );
 		output[i] = CPyBlob( *owner, blob );
@@ -123,35 +124,35 @@ void InitializeDistributedTraining(py::module& m)
 				CArchiveFile file( path.c_str(), CArchive::load );
 				CArchive archive( &file, CArchive::load );
 				return new CPyDistributedTraining( archive, count, getInitializer( initializerName ), seed );
-			})
+			} )
 		)
 
 		.def( py::init(
 			[]( CPyDnn& dnn, int count, const std::string& initializerName, int seed ) {
 				return new CPyDistributedTraining( dnn.Dnn(), count, getInitializer( initializerName ), seed );
-			})
+			} )
 		)
 
 		.def( py::init(
 			[]( const std::string& path, py::list cudaDevs, const std::string& initializerName, int seed ) {
 				CArchiveFile file( path.c_str(), CArchive::load );
 				CArchive archive( &file, CArchive::load );
 				CArray<int> devs;
-				for( int i = 0; i < cudaDevs.size(); i++ ){
+				for( int i = 0; i < cudaDevs.size(); i++ ) {
 					devs.Add( cudaDevs[i].cast<int>() );
 				}
 				return new CPyDistributedTraining( archive, devs, getInitializer( initializerName ), seed );
-			})
+			} )
 		)
 
 		.def( py::init(
 			[]( CPyDnn& dnn, py::list cudaDevs, const std::string& initializerName, int seed ) {
 				CArray<int> devs;
-				for( int i = 0; i < cudaDevs.size(); i++ ){
+				for( int i = 0; i < cudaDevs.size(); i++ ) {
 					devs.Add( cudaDevs[i].cast<int>() );
 				}
 				return new CPyDistributedTraining( dnn.Dnn(), devs, getInitializer( initializerName ), seed );
-			})
+			} )
 		)
 
 		.def( "_run", &CPyDistributedTraining::Run )
 
@@ -1,4 +1,5 @@
 /* Copyright © 2017-2023 ABBYY
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -20,7 +21,7 @@ limitations under the License.
 
 class CPyDistributedDataset : public IDistributedDataset {
 public:
-	CPyDistributedDataset( const py::object& data ) : getData( data ) {};
+	CPyDistributedDataset( const py::object& data ) : getData( data ) {}
 	int SetInputBatch( CDnn& dnn, int thread ) override;
 private:
 	py::object getData;
@@ -29,13 +30,14 @@ class CPyDistributedDataset : public IDistributedDataset {
 class CPyDistributedTraining : public CDistributedTraining {
 public:
 	CPyDistributedTraining( CDnn& dnn, int count, TDistributedInitializer initializer, int seed )
-		: CDistributedTraining( dnn, count, initializer, seed ) {};
+		: CDistributedTraining( dnn, count, initializer, seed ) {}
 	CPyDistributedTraining( CArchive& archive, int count, TDistributedInitializer initializer, int seed )
-		: CDistributedTraining( archive, count, initializer, seed ) {};
+		: CDistributedTraining( archive, count, initializer, seed ) {}
 	CPyDistributedTraining( CDnn& dnn, const CArray<int>& cudaDevs, TDistributedInitializer initializer, int seed )
-		: CDistributedTraining( dnn, cudaDevs, initializer, seed ) {};
+		: CDistributedTraining( dnn, cudaDevs, initializer, seed ) {}
 	CPyDistributedTraining( CArchive& archive, const CArray<int>& cudaDevs, TDistributedInitializer initializer, int seed )
-		: CDistributedTraining( archive, cudaDevs, initializer, seed ) {};
+		: CDistributedTraining( archive, cudaDevs, initializer, seed ) {}
+
 	void Run( const py::object& data );
 	void RunAndBackward( const py::object& data );
 	void Learn( const py::object& data );
@@ -46,4 +48,4 @@ class CPyDistributedTraining : public CDistributedTraining {
 	void Save( const std::string& path );
 };
 
-void InitializeDistributedTraining(py::module& m);
+void InitializeDistributedTraining( py::module& m );
@@ -1,4 +1,5 @@
 /* Copyright © 2017-2023 ABBYY
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -23,6 +24,7 @@ namespace NeoML {
 // Interface for setting input to a neural network
 class IDistributedDataset {
 public:
+	virtual ~IDistributedDataset() {}
 	// This method must set batches for all of the source layers in CDnn
 	// Returns the current batch size (or 0, if there is no data for this thread on this run)
 	// This batch size affects weights balance between different threads
@@ -54,47 +56,51 @@ class NEOML_API CDistributedTraining {
 	CDistributedTraining( CArchive& archive, const CArray<int>& cudaDevs,
 		TDistributedInitializer initializer = TDistributedInitializer::Xavier, int seed = 42 );
 
-	~CDistributedTraining();
+	virtual ~CDistributedTraining();
 
 	// Gets the number of models in disitrbuted traning
 	int GetModelCount() const { return cnns.Size(); }
 	// Sets the solver for all of the models
-	void SetSolver( CArchive& archive );
+	void SetSolver( CArchive& );
 	// Sets the learning rate for all of the models
 	void SetLearningRate( float rate );
 	// Returns the current learning rate
 	float GetLearningRate() const;
+
 	// Runs the networks without backward and training
-	void RunOnce( IDistributedDataset& data );
+	void RunOnce( IDistributedDataset& );
 	// Runs the networks and performs a backward pass
-	void RunAndBackwardOnce( IDistributedDataset& data );
+	void RunAndBackwardOnce( IDistributedDataset& );
 	// Runs the networks, performs a backward pass and updates the trainable weights of all models
-	void RunAndLearnOnce( IDistributedDataset& data );
+	void RunAndLearnOnce( IDistributedDataset& );
 	// Updates the trainable weights of all models (after RunAndBackwardOnce)
 	void Train();
+
 	// Returns last loss of `layerName` for all models
 	// `layerName` should correspond to CLossLayer, CCtcLossLayer or CCrfLossLayer
-	void GetLastLoss( const CString& layerName, CArray<float>& losses );
+	void GetLastLoss( const CString& layerName, CArray<float>& losses ) const;
 	// Returns last blobs of `layerName` for all models
 	// `layerName` should correspond to CSinkLayer
-	void GetLastBlob( const CString& layerName, CObjectArray<CDnnBlob>& blobs );
+	void GetLastBlob( const CString& layerName, CObjectArray<CDnnBlob>& blobs ) const;
+
 	// Save trained net
-	void Serialize( CArchive& archive );
+	void Serialize( CArchive& );
 	// Save the trained net with the given `index` with its solver state (optional)
 	// An archive with solver state can later be passed to CDnn::SerializeCheckpoint to resume training
-	void StoreDnn( CArchive& archive, int index, bool storeSolver );
+	void StoreDnn( CArchive&, int index, bool storeSolver );
 
 private:
-	const bool isCpu;
-	IThreadPool* threadPool;
-	CArray<IMathEngine*> mathEngines;
-	CArray<CRandom*> rands;
-	CArray<CDnn*> cnns;
-	CArray<int> batchSize;
-	bool isFirstRun = true;
-	CString errorMessage;
-
-	void initialize( CArchive& archive, int count, TDistributedInitializer initializer, int seed );
+	enum class TRunType { Invalid, RunOnce, RunBackwardOnce, Train };
+	class CParams;
+	CParams* const params = nullptr;
+	IThreadPool* const threadPool = nullptr;
+	CArray<IMathEngine*> mathEngines{};
+	CArray<CRandom*> rands{};
+	CArray<CDnn*> cnns{};
+	CArray<int> batchSize{};
+
+	void runOnce( IDistributedDataset*, TRunType );
+	void initialize( CArchive&, int count, TDistributedInitializer initializer, int seed );
 };
 
 } // namespace NeoML