Skip to content

Commit

Permalink
[AVRO-4086][C++] Fix missing data file reader close handle on windows (
Browse files Browse the repository at this point in the history
…#3230)

* AVRO-4086: Fix missing data file reader close handle on windows

* AVRO-4085: Fix file size extraction error on windows platform (#3229)

* Use Ubuntu 24.04 for the Interop tests to be able to use newer Python

Python 3.12 needs `--break-system-packages`

```
Run python3 -m pip install --break-system-packages --upgrade pip setuptools tox
  python3 -m pip install --break-system-packages --upgrade pip setuptools tox
  python3 -m pip install --break-system-packages python-snappy zstandard
  shell: /usr/bin/bash -e {0}

Usage:
  /usr/bin/python3 -m pip install [options] <requirement specifier> [package-index-options] ...
  /usr/bin/python3 -m pip install [options] -r <requirements file> [package-index-options] ...
  /usr/bin/python3 -m pip install [options] [-e] <vcs project url> ...
  /usr/bin/python3 -m pip install [options] [-e] <local project path> ...
  /usr/bin/python3 -m pip install [options] <archive url/path> ...
```

Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

* AVRO-4086: Fix missing data file reader close handle on windows

* AVRO-4086: Simplify closing stream reader

* Fix the name of the CI runner

---------

Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
Co-authored-by: Martin Tzvetanov Grigorov <[email protected]>
Co-authored-by: Martin Grigorov <[email protected]>
  • Loading branch information
3 people authored Dec 24, 2024
1 parent 30b39e9 commit 3621ef2
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 1 deletion.
6 changes: 6 additions & 0 deletions lang/c++/impl/DataFile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ boost::iostreams::zlib_params get_zlib_params() {
ret.noheader = true;
return ret;
}

} // namespace

DataFileWriterBase::DataFileWriterBase(const char *filename, const ValidSchema &schema, size_t syncInterval,
Expand Down Expand Up @@ -442,6 +443,11 @@ void DataFileReaderBase::readDataBlock() {
}

void DataFileReaderBase::close() {
stream_.reset();
eof_ = true;
objectCount_ = 0;
blockStart_ = 0;
blockEnd_ = 0;
}

static string toString(const vector<uint8_t> &v) {
Expand Down
2 changes: 1 addition & 1 deletion lang/c++/include/avro/DataFile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ public:
*/
class AVRO_DECL DataFileReaderBase {
const std::string filename_;
const std::unique_ptr<InputStream> stream_;
std::unique_ptr<InputStream> stream_;
const DecoderPtr decoder_;
int64_t objectCount_;
bool eof_;
Expand Down
3 changes: 3 additions & 0 deletions lang/c++/test/CommonsSchemasTests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ void testCommonSchema(const std::filesystem::path &dir_path) {
}
BOOST_CHECK(!readerNew.read(datumNew));

readerNew.close();
readerOrig.close();

std::filesystem::remove(outputDataFile);
}

Expand Down
36 changes: 36 additions & 0 deletions lang/c++/test/DataFileTests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,27 @@ class DataFileTest {
BOOST_CHECK_EQUAL(root->leafAt(5)->getDoc(), "extra slashes\\\\");
}
}

void testClosedReader() {
const auto isNonSeekableInputStreamError = [](const avro::Exception &e) { return e.what() == std::string("seek not supported on non-SeekableInputStream"); };

avro::DataFileReader<ComplexDouble> df(filename, writerSchema);
df.close();
ComplexDouble unused;
BOOST_CHECK(!df.read(unused)); // closed stream can't be read
BOOST_CHECK_EQUAL(df.previousSync(), 0ul); // closed stream always returns begin position
BOOST_CHECK(df.pastSync(10l)); // closed stream always point after position // closed stream always returns begin position
BOOST_CHECK_EQUAL(df.previousSync(), 0u); // closed stream always point at position 0 // closed stream always returns begin position
BOOST_CHECK_EXCEPTION(df.sync(10l), avro::Exception, isNonSeekableInputStreamError); // closed stream always returns begin position
BOOST_CHECK_EXCEPTION(df.seek(10l), avro::Exception, isNonSeekableInputStreamError); // closed stream always returns begin position
}

void testClosedWriter() {
avro::DataFileWriter<ComplexDouble> df(filename, writerSchema);
df.close();
ComplexDouble unused;
BOOST_CHECK_NO_THROW(df.write(unused)); // write has not effect on closed stream
}
};

void addReaderTests(test_suite *ts, const shared_ptr<DataFileTest> &t) {
Expand Down Expand Up @@ -1123,6 +1144,21 @@ init_unit_test_suite(int, char *[]) {
ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t));
boost::unit_test::framework::master_test_suite().add(ts);
}
{
auto *ts = BOOST_TEST_SUITE("DataFile tests: test13.df");
shared_ptr<DataFileTest> t(new DataFileTest("test13.df", ischWithDoc, ischWithDoc));
ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testWrite, t));
ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testClosedReader, t));
ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t));
boost::unit_test::framework::master_test_suite().add(ts);
}
{
auto *ts = BOOST_TEST_SUITE("DataFile tests: test14.df");
shared_ptr<DataFileTest> t(new DataFileTest("test14.df", ischWithDoc, ischWithDoc));
ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testClosedWriter, t));
ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t));
boost::unit_test::framework::master_test_suite().add(ts);
}

boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testSkipStringNullCodec));
boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testSkipStringDeflateCodec));
Expand Down

0 comments on commit 3621ef2

Please sign in to comment.