Optimize binary get_number implementation by reading multiple bytes at once (#4391)

* multibyte binary reader

* wide_string_input_adapter fallback to get_character

Update input_adapters.hpp

* Update json.hpp

* Add from msgpack test

* Test for broken msgpack with stream, address some warnings

* Reading binary number from wchar as an error, address warnings

* Not casting float to int, it violates strict aliasing rule
This commit is contained in:
Tianyi Chen
2024-11-29 00:19:58 -08:00
committed by GitHub
parent e41905fcb0
commit 935c6eeb5a
5 changed files with 258 additions and 42 deletions

View File

@@ -140,6 +140,46 @@ BENCHMARK_CAPTURE(ToCbor, signed_ints, TEST_DATA_DIRECTORY "/regression/si
BENCHMARK_CAPTURE(ToCbor, unsigned_ints, TEST_DATA_DIRECTORY "/regression/unsigned_ints.json");
BENCHMARK_CAPTURE(ToCbor, small_signed_ints, TEST_DATA_DIRECTORY "/regression/small_signed_ints.json");
//////////////////////////////////////////////////////////////////////////////
// Parse Msgpack
//////////////////////////////////////////////////////////////////////////////
static void FromMsgpack(benchmark::State& state, const char* filename)
{
std::ifstream f(filename);
std::string str((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
auto bytes = json::to_msgpack(json::parse(str));
std::ofstream o("test.msgpack");
o.write((char*)bytes.data(), bytes.size());
o.flush();
o.close();
for (auto _ : state)
{
state.PauseTiming();
auto* j = new json();
auto file = fopen("test.msgpack", "rb");
state.ResumeTiming();
*j = json::from_msgpack(file);
state.PauseTiming();
fclose(file);
delete j;
state.ResumeTiming();
}
state.SetBytesProcessed(state.iterations() * bytes.size());
}
BENCHMARK_CAPTURE(FromMsgpack, jeopardy, TEST_DATA_DIRECTORY "/jeopardy/jeopardy.json");
BENCHMARK_CAPTURE(FromMsgpack, canada, TEST_DATA_DIRECTORY "/nativejson-benchmark/canada.json");
BENCHMARK_CAPTURE(FromMsgpack, citm_catalog, TEST_DATA_DIRECTORY "/nativejson-benchmark/citm_catalog.json");
BENCHMARK_CAPTURE(FromMsgpack, twitter, TEST_DATA_DIRECTORY "/nativejson-benchmark/twitter.json");
BENCHMARK_CAPTURE(FromMsgpack, floats, TEST_DATA_DIRECTORY "/regression/floats.json");
BENCHMARK_CAPTURE(FromMsgpack, signed_ints, TEST_DATA_DIRECTORY "/regression/signed_ints.json");
BENCHMARK_CAPTURE(FromMsgpack, unsigned_ints, TEST_DATA_DIRECTORY "/regression/unsigned_ints.json");
BENCHMARK_CAPTURE(FromMsgpack, small_signed_ints, TEST_DATA_DIRECTORY "/regression/small_signed_ints.json");
//////////////////////////////////////////////////////////////////////////////
// serialize binary CBOR
//////////////////////////////////////////////////////////////////////////////

View File

@@ -1508,6 +1508,22 @@ TEST_CASE("MessagePack")
CHECK(json::from_msgpack(std::vector<uint8_t>({0xc4}), true, false).is_discarded());
}
SECTION("unexpected end inside int with stream")
{
json _;
const std::string data = {static_cast<char>(0xd2u), static_cast<char>(0x12u), static_cast<char>(0x34u), static_cast<char>(0x56u)};
CHECK_THROWS_WITH_AS(_ = json::from_msgpack(std::istringstream(data, std::ios::binary)),
"[json.exception.parse_error.110] parse error at byte 5: syntax error while parsing MessagePack number: unexpected end of input", json::parse_error&);
}
SECTION("misuse wchar for binary")
{
json _;
// creates 0xd2 after UTF-8 decoding, triggers get_elements in wide_string_input_adapter for code coverage
const std::u32string data = {static_cast<char32_t>(0x0280)};
CHECK_THROWS_WITH_AS(_ = json::from_msgpack(data),
"[json.exception.parse_error.112] parse error at byte 1: wide string type cannot be interpreted as binary data", json::parse_error&);
}
SECTION("unsupported bytes")
{
SECTION("concrete examples")