diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu index 3ea8639641c..81ef3a51afc 100644 --- a/cpp/src/io/json/read_json.cu +++ b/cpp/src/io/json/read_json.cu @@ -210,9 +210,13 @@ table_with_metadata read_json(host_span> sources, { CUDF_FUNC_RANGE(); + // TODO remove this if-statement once legacy is removed +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" if (reader_opts.is_enabled_legacy()) { return legacy::read_json(sources, reader_opts, stream, mr); } +#pragma GCC diagnostic pop if (reader_opts.get_byte_range_offset() != 0 or reader_opts.get_byte_range_size() != 0) { CUDF_EXPECTS(reader_opts.is_enabled_lines(), diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index bae71d3c2a8..81cedf3d23e 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -169,26 +169,15 @@ struct JsonReaderTest : public cudf::test::BaseFixture {}; * @brief Enum class to be used to specify the test case of parametrized tests */ enum class json_test_t { - // Run test with the existing JSON lines reader using row-orient input data - legacy_lines_row_orient, - // Run test with the existing JSON lines reader using record-orient input data - legacy_lines_record_orient, // Run test with the nested JSON lines reader using record-orient input data json_experimental_record_orient, // Run test with the nested JSON lines reader using row-orient input data json_experimental_row_orient }; -constexpr bool is_legacy_test(json_test_t test_opt) -{ - return test_opt == json_test_t::legacy_lines_row_orient or - test_opt == json_test_t::legacy_lines_record_orient; -} - constexpr bool is_row_orient_test(json_test_t test_opt) { - return test_opt == json_test_t::legacy_lines_row_orient or - test_opt == json_test_t::json_experimental_row_orient; + return test_opt == json_test_t::json_experimental_row_orient; } /** @@ -198,17 +187,10 @@ struct JsonReaderParamTest : public cudf::test::BaseFixture, public testing::WithParamInterface {}; /** - * @brief Test fixture for parametrized JSON reader tests, testing record orient-only for legacy - * JSON lines reader and the nested reader + * @brief Test fixture for parametrized JSON reader tests with both orients */ -struct JsonReaderDualTest : public cudf::test::BaseFixture, - public testing::WithParamInterface {}; - -/** - * @brief Test fixture for parametrized JSON reader tests that only tests the new nested JSON reader - */ -struct JsonReaderNoLegacy : public cudf::test::BaseFixture, - public testing::WithParamInterface {}; +struct JsonReaderRecordTest : public cudf::test::BaseFixture, + public testing::WithParamInterface {}; /** * @brief Generates a JSON lines string that uses the record orient @@ -244,9 +226,7 @@ struct JsonFixedPointReaderTest : public JsonReaderTest {}; template struct JsonValidFixedPointReaderTest : public JsonFixedPointReaderTest { - void run_test(std::vector const& reference_strings, - numeric::scale_type scale, - bool use_legacy_parser) + void run_test(std::vector const& reference_strings, numeric::scale_type scale) { cudf::test::strings_column_wrapper const strings(reference_strings.begin(), reference_strings.end()); @@ -263,8 +243,7 @@ struct JsonValidFixedPointReaderTest : public JsonFixedPointReaderTest(), scale}}) - .lines(true) - .legacy(use_legacy_parser); + .lines(true); auto const result = cudf::io::read_json(in_opts); auto const result_view = result.tbl->view(); @@ -277,8 +256,8 @@ struct JsonValidFixedPointReaderTest : public JsonFixedPointReaderTest const& reference_strings, numeric::scale_type scale) { // Test both parsers - run_test(reference_strings, scale, false); - run_test(reference_strings, scale, true); + run_test(reference_strings, scale); + run_test(reference_strings, scale); } }; @@ -288,22 +267,13 @@ TYPED_TEST_SUITE(JsonValidFixedPointReaderTest, cudf::test::FixedPointTypes); // Parametrize qualifying JSON tests for executing both nested reader and legacy JSON lines reader INSTANTIATE_TEST_CASE_P(JsonReaderParamTest, JsonReaderParamTest, - ::testing::Values(json_test_t::legacy_lines_row_orient, - json_test_t::legacy_lines_record_orient, - json_test_t::json_experimental_record_orient, + ::testing::Values(json_test_t::json_experimental_record_orient, json_test_t::json_experimental_row_orient)); // Parametrize qualifying JSON tests for executing both nested reader and legacy JSON lines reader -INSTANTIATE_TEST_CASE_P(JsonReaderDualTest, - JsonReaderDualTest, - ::testing::Values(json_test_t::legacy_lines_record_orient, - json_test_t::json_experimental_record_orient)); - -// Parametrize qualifying JSON tests for executing nested reader only -INSTANTIATE_TEST_CASE_P(JsonReaderNoLegacy, - JsonReaderNoLegacy, - ::testing::Values(json_test_t::json_experimental_row_orient, - json_test_t::json_experimental_record_orient)); +INSTANTIATE_TEST_CASE_P(JsonReaderRecordTest, + JsonReaderRecordTest, + ::testing::Values(json_test_t::json_experimental_record_orient)); TEST_P(JsonReaderParamTest, BasicJsonLines) { @@ -316,8 +286,7 @@ TEST_P(JsonReaderParamTest, BasicJsonLines) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .dtypes(std::vector{dtype(), dtype()}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 2); @@ -359,8 +328,7 @@ TEST_P(JsonReaderParamTest, FloatingPoint) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype()}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -384,8 +352,7 @@ TEST_P(JsonReaderParamTest, JsonLinesStrings) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .dtypes({{"2", dtype()}, {"0", dtype()}, {"1", dtype()}}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -454,8 +421,7 @@ TEST_P(JsonReaderParamTest, MultiColumn) dtype(), dtype(), dtype()}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); auto const view = result.tbl->view(); @@ -504,8 +470,7 @@ TEST_P(JsonReaderParamTest, Booleans) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype()}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); // Booleans are the same (integer) data type, but valued at 0 or 1 @@ -548,8 +513,7 @@ TEST_P(JsonReaderParamTest, Dates) cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}}) .lines(true) - .dayfirst(true) - .legacy(is_legacy_test(test_opt)); + .dayfirst(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); auto const view = result.tbl->view(); @@ -604,8 +568,7 @@ TEST_P(JsonReaderParamTest, Durations) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({data_type{type_id::DURATION_NANOSECONDS}}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); auto const view = result.tbl->view(); @@ -642,8 +605,7 @@ TEST_P(JsonReaderParamTest, JsonLinesDtypeInference) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -678,9 +640,7 @@ TEST_P(JsonReaderParamTest, JsonLinesFileInput) outfile.close(); cudf::io::json_reader_options in_options = - cudf::io::json_reader_options::builder(cudf::io::source_info{fname}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + cudf::io::json_reader_options::builder(cudf::io::source_info{fname}).lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -707,7 +667,6 @@ TEST_F(JsonReaderTest, JsonLinesByteRange) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{fname}) .lines(true) - .legacy(true) // Support in new reader coming in https://github.com/rapidsai/cudf/pull/12498 .byte_range_offset(11) .byte_range_size(20); @@ -722,18 +681,15 @@ TEST_F(JsonReaderTest, JsonLinesByteRange) CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), int64_wrapper{{3000, 4000, 5000}}); } -TEST_P(JsonReaderDualTest, JsonLinesObjects) +TEST_P(JsonReaderRecordTest, JsonLinesObjects) { - auto const test_opt = GetParam(); const std::string fname = temp_env->get_temp_dir() + "JsonLinesObjectsTest.json"; std::ofstream outfile(fname, std::ofstream::out); outfile << " {\"co\\\"l1\" : 1, \"col2\" : 2.0} \n"; outfile.close(); cudf::io::json_reader_options in_options = - cudf::io::json_reader_options::builder(cudf::io::source_info{fname}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + cudf::io::json_reader_options::builder(cudf::io::source_info{fname}).lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -741,7 +697,7 @@ TEST_P(JsonReaderDualTest, JsonLinesObjects) EXPECT_EQ(result.tbl->num_rows(), 1); EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT64); - EXPECT_EQ(result.metadata.schema_info[0].name, is_legacy_test(test_opt) ? "co\\\"l1" : "co\"l1"); + EXPECT_EQ(result.metadata.schema_info[0].name, "co\"l1"); EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64); EXPECT_EQ(result.metadata.schema_info[1].name, "col2"); @@ -749,14 +705,13 @@ TEST_P(JsonReaderDualTest, JsonLinesObjects) CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1), float64_wrapper{{2.0}}); } -TEST_P(JsonReaderDualTest, JsonLinesObjectsStrings) +TEST_P(JsonReaderRecordTest, JsonLinesObjectsStrings) { auto const test_opt = GetParam(); auto test_json_objects = [test_opt](std::string const& data) { cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -786,17 +741,15 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsStrings) "{\"col3\":\"bbb\", \"col1\":200, \"col2\":2.2}\n"); } -TEST_P(JsonReaderDualTest, JsonLinesObjectsMissingData) +TEST_P(JsonReaderRecordTest, JsonLinesObjectsMissingData) { - auto const test_opt = GetParam(); - // Note: columns will be ordered based on which fields appear first + // Note: columns will be ordered based on which fields appear first std::string const data = "{ \"col2\":1.1, \"col3\":\"aaa\"}\n" "{\"col1\":200, \"col3\":\"bbb\"}\n"; cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -823,17 +776,15 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsMissingData) cudf::test::strings_column_wrapper({"aaa", "bbb"})); } -TEST_P(JsonReaderDualTest, JsonLinesObjectsOutOfOrder) +TEST_P(JsonReaderRecordTest, JsonLinesObjectsOutOfOrder) { - auto const test_opt = GetParam(); std::string const data = "{\"col1\":100, \"col2\":1.1, \"col3\":\"aaa\"}\n" "{\"col3\":\"bbb\", \"col1\":200, \"col2\":2.2}\n"; cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -919,8 +870,7 @@ TEST_F(JsonReaderTest, ArrowFileSource) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{&arrow_source}) .dtypes({dtype()}) - .lines(true) - .legacy(true); // Support in new reader coming in https://github.com/rapidsai/cudf/pull/12498 + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -952,8 +902,7 @@ TEST_P(JsonReaderParamTest, InvalidFloatingPoint) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype()}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); @@ -972,8 +921,7 @@ TEST_P(JsonReaderParamTest, StringInference) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{data.c_str(), data.size()}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); @@ -1054,9 +1002,7 @@ TEST_P(JsonReaderParamTest, ParseInRangeIntegers) outfile << line.str(); } cudf::io::json_reader_options in_options = - cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -1158,9 +1104,7 @@ TEST_P(JsonReaderParamTest, ParseOutOfRangeIntegers) outfile << line.str(); } cudf::io::json_reader_options in_options = - cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -1198,9 +1142,7 @@ TEST_P(JsonReaderParamTest, JsonLinesMultipleFileInputs) outfile2.close(); cudf::io::json_reader_options in_options = - cudf::io::json_reader_options::builder(cudf::io::source_info{{file1, file2}}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + cudf::io::json_reader_options::builder(cudf::io::source_info{{file1, file2}}).lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -1217,7 +1159,7 @@ TEST_P(JsonReaderParamTest, JsonLinesMultipleFileInputs) CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1), float64_wrapper{{1.1, 2.2, 3.3, 4.4}}); } -TEST_P(JsonReaderNoLegacy, JsonLinesMultipleFileInputsNoNL) +TEST_P(JsonReaderParamTest, JsonLinesMultipleFileInputsNoNL) { auto const test_opt = GetParam(); // Strings for the two separate input files in row-orient that do not end with a newline @@ -1239,9 +1181,7 @@ TEST_P(JsonReaderNoLegacy, JsonLinesMultipleFileInputsNoNL) outfile2.close(); cudf::io::json_reader_options in_options = - cudf::io::json_reader_options::builder(cudf::io::source_info{{file1, file2}}) - .lines(true) - .legacy(is_legacy_test(test_opt)); + cudf::io::json_reader_options::builder(cudf::io::source_info{{file1, file2}}).lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -1258,15 +1198,16 @@ TEST_P(JsonReaderNoLegacy, JsonLinesMultipleFileInputsNoNL) CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1), float64_wrapper{{1.1, 2.2, 3.3, 4.4}}); } -TEST_F(JsonReaderTest, BadDtypeParams) +// This can be removed once the legacy option has been removed. +// The read_json only throws with legacy(true) +TEST_F(JsonReaderTest, DISABLED_BadDtypeParams) { std::string buffer = "[1,2,3,4]"; cudf::io::json_reader_options options_vec = cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .lines(true) - .dtypes({dtype()}) - .legacy(true); + .dtypes({dtype()}); // should throw because there are four columns and only one dtype EXPECT_THROW(cudf::io::read_json(options_vec), cudf::logic_error); @@ -1274,7 +1215,6 @@ TEST_F(JsonReaderTest, BadDtypeParams) cudf::io::json_reader_options options_map = cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .lines(true) - .legacy(true) .dtypes(std::map{{"0", dtype()}, {"1", dtype()}, {"2", dtype()}, @@ -1328,7 +1268,6 @@ TEST_F(JsonReaderTest, JsonExperimentalLines) auto const table = cudf::io::read_json(json_lines_options); // Read test data via legacy, non-nested JSON lines reader - json_lines_options.enable_legacy(true); auto const legacy_reader_table = cudf::io::read_json(json_lines_options); // Verify that the data read via non-nested JSON lines reader matches the data read via nested @@ -1433,8 +1372,7 @@ TEST_F(JsonReaderTest, ErrorStrings) cudf::io::json_reader_options const in_opts = cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .dtypes({data_type{cudf::type_id::STRING}}) - .lines(true) - .legacy(false); + .lines(true); auto const result = cudf::io::read_json(in_opts); auto const result_view = result.tbl->view().column(0); @@ -1506,7 +1444,6 @@ TEST_F(JsonReaderTest, ExperimentalLinesNoOmissions) auto const table = cudf::io::read_json(json_lines_options); // Read test data via legacy, non-nested JSON lines reader - json_lines_options.enable_legacy(true); auto const legacy_reader_table = cudf::io::read_json(json_lines_options); // Verify that the data read via non-nested JSON lines reader matches the data read via @@ -1592,8 +1529,7 @@ TEST_P(JsonReaderParamTest, JsonDtypeSchema) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .dtypes(dtype_schema) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -1789,8 +1725,7 @@ TEST_P(JsonReaderParamTest, JsonDtypeParsing) cudf::io::json_reader_options in_options = cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .dtypes(dtype_schema) - .lines(true) - .legacy(is_legacy_test(test_opt)); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); @@ -1824,13 +1759,12 @@ TYPED_TEST(JsonValidFixedPointReaderTest, SingleColumnPositiveScale) TYPED_TEST(JsonFixedPointReaderTest, EmptyValues) { - auto const buffer = std::string{"{\"col0\":}"}; + auto const buffer = std::string{"{\"col0\":\"\"}"}; cudf::io::json_reader_options const in_opts = cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .dtypes({data_type{type_to_id(), 0}}) - .lines(true) - .legacy(true); // Legacy behavior; not aligned with JSON specs + .lines(true); auto const result = cudf::io::read_json(in_opts); auto const result_view = result.tbl->view(); @@ -1838,7 +1772,7 @@ TYPED_TEST(JsonFixedPointReaderTest, EmptyValues) ASSERT_EQ(result_view.num_columns(), 1); EXPECT_EQ(result_view.num_rows(), 1); EXPECT_EQ(result.metadata.schema_info[0].name, "col0"); - EXPECT_EQ(result_view.column(0).null_count(), 1); + EXPECT_EQ(result_view.column(0).null_count(), 0); } TEST_F(JsonReaderTest, UnsupportedMultipleFileInputs) diff --git a/cpp/tests/streams/io/json_test.cpp b/cpp/tests/streams/io/json_test.cpp index 21da19a5a38..f98e685ed0c 100644 --- a/cpp/tests/streams/io/json_test.cpp +++ b/cpp/tests/streams/io/json_test.cpp @@ -37,8 +37,7 @@ TEST_F(JSONTest, JSONreader) cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .dtypes(std::vector{cudf::data_type{cudf::type_id::INT32}, cudf::data_type{cudf::type_id::FLOAT64}}) - .lines(true) - .legacy(true); + .lines(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options, cudf::test::get_default_stream()); }