@@ -169,6 +169,7 @@ enum StorageMode {
169169 defaultMajor = columnMajor
170170};
171171
172+
172173/* *
173174 * This library uses tensors to store and manipulate data on a GPU device.
174175 * A tensor has three axes: [rows (m) x columns (n) x matrices (k)].
@@ -256,13 +257,16 @@ public:
256257 *
257258 * This static function reads data from a text file, creates a DTensor and uploads the data to the device.
258259 *
260+ * The data may be stored in a text file or a binary file. Binary files must have the extension .bt.
261+ *
259262 * @param path_to_file path to file as string
260263 * @param mode storage mode (default: StorageMode::defaultMajor)
261264 * @return instance of DTensor
262265 *
263266 * @throws std::invalid_argument if the file is not found
264267 */
265- static DTensor<T> parseFromTextFile (std::string path_to_file, StorageMode mode = StorageMode::defaultMajor);
268+ static DTensor<T> parseFromFile (std::string path_to_file,
269+ StorageMode mode = StorageMode::defaultMajor);
266270
267271 /* *
268272 * Constructs a DTensor object.
@@ -504,7 +508,12 @@ public:
504508 /* *
505509 * Saves the current instance of DTensor to a (text) file
506510 *
507- * @param pathToFile
511+ * If the file extension is .bt, the data will be stored in a binary file.
512+ * Writing to and reading from a binary file is significantly faster and
513+ * the generated binary files tend to have a smaller size (about 40% of the
514+ * size of text files for data of type double and float).
515+ *
516+ * @param pathToFile path to file
508517 */
509518 void saveToFile (std::string pathToFile);
510519
@@ -595,7 +604,7 @@ struct data_t {
595604};
596605
597606template <typename T>
598- data_t <T> vectorFromFile (std::string path_to_file) {
607+ data_t <T> vectorFromTextFile (std::string path_to_file) {
599608 data_t <T> dataStruct;
600609 std::ifstream file;
601610 file.open (path_to_file, std::ios::in);
@@ -641,24 +650,70 @@ data_t<T> vectorFromFile(std::string path_to_file) {
641650}
642651
643652template <typename T>
644- DTensor<T> DTensor<T>::parseFromTextFile(std::string path_to_file,
645- StorageMode mode) {
646- auto parsedData = vectorFromFile<T>(path_to_file);
653+ data_t <T> vectorFromBinaryFile (std::string path_to_file) {
654+ data_t <T> dataStruct;
655+ /* Read from binary file */
656+ std::ifstream inFile;
657+ inFile.open (path_to_file, std::ios::binary);
658+ inFile.read (reinterpret_cast <char *>(&(dataStruct.numRows )), sizeof (uint64_t ));
659+ inFile.read (reinterpret_cast <char *>(&(dataStruct.numCols )), sizeof (uint64_t ));
660+ inFile.read (reinterpret_cast <char *>(&(dataStruct.numMats )), sizeof (uint64_t ));
661+ uint64_t numElements = dataStruct.numRows * dataStruct.numCols * dataStruct.numMats ;
662+ std::vector<T> vecDataFromFile (numElements);
663+ for (size_t i = 0 ; i < numElements; i++) {
664+ T el;
665+ inFile.read (reinterpret_cast <char *>(&el), sizeof (T));
666+ vecDataFromFile[i] = el;
667+ }
668+ inFile.close ();
669+ dataStruct.data = vecDataFromFile;
670+ return dataStruct;
671+ }
672+
673+ template <typename T>
674+ DTensor<T> DTensor<T>::parseFromFile(std::string path_to_file,
675+ StorageMode mode) {
676+ // Figure out file extension
677+ size_t pathToFileLength = path_to_file.length () ;
678+ std::string fileNameExtension = path_to_file.substr (pathToFileLength-3 );
679+ typedef data_t <T> (*PARSER)(std::string);
680+ PARSER parser = (fileNameExtension == " .bt" ) ? vectorFromBinaryFile<T> : vectorFromTextFile<T>;
681+ auto parsedData = parser (path_to_file);
647682 DTensor<T> tensorFromData (parsedData.data , parsedData.numRows , parsedData.numCols , parsedData.numMats , mode);
648683 return tensorFromData;
649684}
650685
651686template <typename T>
652687void DTensor<T>::saveToFile(std::string pathToFile) {
653- std::ofstream file (pathToFile);
654- file << numRows () << std::endl << numCols () << std::endl << numMats () << std::endl;
655- std::vector<T> myData (numEl ()); download (myData);
656- if constexpr (std::is_floating_point<T>::value) {
657- file << std::setprecision (std::numeric_limits<T>::max_digits10);
658- }
659- for (const T& el : myData) file << el << std::endl;
688+ std::vector<T> myData (numEl ());
689+ download (myData);
690+
691+ // Figure out file extension
692+ size_t pathToFileLength = pathToFile.length () ;
693+ std::string fileNameExtension = pathToFile.substr (pathToFileLength-3 );
694+ // If the extension is .bt...
695+ if (fileNameExtension == " .bt" ) {
696+ uint64_t nr = (uint64_t ) numRows (),
697+ nc = (uint64_t ) numCols (),
698+ nm = (uint64_t ) numMats ();
699+ std::ofstream outFile;
700+ outFile.open (pathToFile, std::ios::binary);
701+ outFile.write (reinterpret_cast <const char *>(&nr), sizeof (uint64_t ));
702+ outFile.write (reinterpret_cast <const char *>(&nc), sizeof (uint64_t ));
703+ outFile.write (reinterpret_cast <const char *>(&nm), sizeof (uint64_t ));
704+ for (const T &el: myData) outFile.write (reinterpret_cast <const char *>(&el), sizeof (T));
705+ outFile.close ();
706+ } else {
707+ std::ofstream file (pathToFile);
708+ file << numRows () << std::endl << numCols () << std::endl << numMats () << std::endl;
709+ if constexpr (std::is_floating_point<T>::value) {
710+ file << std::setprecision (std::numeric_limits<T>::max_digits10);
711+ }
712+ for (const T &el: myData) file << el << std::endl;
713+ }
660714}
661715
716+
662717template <typename T>
663718void DTensor<T>::reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats) {
664719 if (m_numRows == newNumRows && m_numCols == newNumCols && m_numMats == newNumMats) return ;
0 commit comments