Horizon
binary_reader.hpp
1 #pragma once
2 
3 #include <algorithm> // generate_n
4 #include <array> // array
5 #include <cmath> // ldexp
6 #include <cstddef> // size_t
7 #include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
8 #include <cstdio> // snprintf
9 #include <cstring> // memcpy
10 #include <iterator> // back_inserter
11 #include <limits> // numeric_limits
12 #include <string> // char_traits, string
13 #include <utility> // make_pair, move
14 
15 #include <nlohmann/detail/exceptions.hpp>
16 #include <nlohmann/detail/input/input_adapters.hpp>
17 #include <nlohmann/detail/input/json_sax.hpp>
18 #include <nlohmann/detail/input/lexer.hpp>
19 #include <nlohmann/detail/macro_scope.hpp>
20 #include <nlohmann/detail/meta/is_sax.hpp>
21 #include <nlohmann/detail/value_t.hpp>
22 
23 namespace nlohmann
24 {
25 namespace detail
26 {
27 
30 {
31  error,
32  ignore
33 };
34 
42 static inline bool little_endianess(int num = 1) noexcept
43 {
44  return *reinterpret_cast<char*>(&num) == 1;
45 }
46 
47 
49 // binary reader //
51 
55 template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
57 {
58  using number_integer_t = typename BasicJsonType::number_integer_t;
59  using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
60  using number_float_t = typename BasicJsonType::number_float_t;
61  using string_t = typename BasicJsonType::string_t;
62  using binary_t = typename BasicJsonType::binary_t;
63  using json_sax_t = SAX;
64  using char_type = typename InputAdapterType::char_type;
65  using char_int_type = typename std::char_traits<char_type>::int_type;
66 
67  public:
73  explicit binary_reader(InputAdapterType&& adapter) : ia(std::move(adapter))
74  {
76  }
77 
78  // make class move-only
79  binary_reader(const binary_reader&) = delete;
80  binary_reader(binary_reader&&) = default;
81  binary_reader& operator=(const binary_reader&) = delete;
82  binary_reader& operator=(binary_reader&&) = default;
83  ~binary_reader() = default;
84 
93  JSON_HEDLEY_NON_NULL(3)
94  bool sax_parse(const input_format_t format,
95  json_sax_t* sax_,
96  const bool strict = true,
97  const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
98  {
99  sax = sax_;
100  bool result = false;
101 
102  switch (format)
103  {
104  case input_format_t::bson:
105  result = parse_bson_internal();
106  break;
107 
108  case input_format_t::cbor:
109  result = parse_cbor_internal(true, tag_handler);
110  break;
111 
112  case input_format_t::msgpack:
113  result = parse_msgpack_internal();
114  break;
115 
116  case input_format_t::ubjson:
117  result = parse_ubjson_internal();
118  break;
119 
120  default: // LCOV_EXCL_LINE
121  JSON_ASSERT(false); // LCOV_EXCL_LINE
122  }
123 
124  // strict mode: next byte must be EOF
125  if (result && strict)
126  {
127  if (format == input_format_t::ubjson)
128  {
129  get_ignore_noop();
130  }
131  else
132  {
133  get();
134  }
135 
136  if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char_type>::eof()))
137  {
138  return sax->parse_error(chars_read, get_token_string(),
139  parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value")));
140  }
141  }
142 
143  return result;
144  }
145 
146  private:
148  // BSON //
150 
155  bool parse_bson_internal()
156  {
157  std::int32_t document_size{};
158  get_number<std::int32_t, true>(input_format_t::bson, document_size);
159 
160  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
161  {
162  return false;
163  }
164 
165  if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false)))
166  {
167  return false;
168  }
169 
170  return sax->end_object();
171  }
172 
180  bool get_bson_cstr(string_t& result)
181  {
182  auto out = std::back_inserter(result);
183  while (true)
184  {
185  get();
186  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring")))
187  {
188  return false;
189  }
190  if (current == 0x00)
191  {
192  return true;
193  }
194  *out++ = static_cast<typename string_t::value_type>(current);
195  }
196  }
197 
209  template<typename NumberType>
210  bool get_bson_string(const NumberType len, string_t& result)
211  {
212  if (JSON_HEDLEY_UNLIKELY(len < 1))
213  {
214  auto last_token = get_token_string();
215  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string")));
216  }
217 
218  return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != std::char_traits<char_type>::eof();
219  }
220 
230  template<typename NumberType>
231  bool get_bson_binary(const NumberType len, binary_t& result)
232  {
233  if (JSON_HEDLEY_UNLIKELY(len < 0))
234  {
235  auto last_token = get_token_string();
236  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "byte array length cannot be negative, is " + std::to_string(len), "binary")));
237  }
238 
239  // All BSON binary values have a subtype
240  std::uint8_t subtype{};
241  get_number<std::uint8_t>(input_format_t::bson, subtype);
242  result.set_subtype(subtype);
243 
244  return get_binary(input_format_t::bson, len, result);
245  }
246 
257  bool parse_bson_element_internal(const char_int_type element_type,
258  const std::size_t element_type_parse_position)
259  {
260  switch (element_type)
261  {
262  case 0x01: // double
263  {
264  double number{};
265  return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
266  }
267 
268  case 0x02: // string
269  {
270  std::int32_t len{};
271  string_t value;
272  return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value);
273  }
274 
275  case 0x03: // object
276  {
277  return parse_bson_internal();
278  }
279 
280  case 0x04: // array
281  {
282  return parse_bson_array();
283  }
284 
285  case 0x05: // binary
286  {
287  std::int32_t len{};
288  binary_t value;
289  return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value);
290  }
291 
292  case 0x08: // boolean
293  {
294  return sax->boolean(get() != 0);
295  }
296 
297  case 0x0A: // null
298  {
299  return sax->null();
300  }
301 
302  case 0x10: // int32
303  {
305  return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
306  }
307 
308  case 0x12: // int64
309  {
311  return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
312  }
313 
314  default: // anything else not supported (yet)
315  {
316  std::array<char, 3> cr{{}};
317  (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type));
318  return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data())));
319  }
320  }
321  }
322 
335  bool parse_bson_element_list(const bool is_array)
336  {
337  string_t key;
338 
339  while (auto element_type = get())
340  {
341  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list")))
342  {
343  return false;
344  }
345 
346  const std::size_t element_type_parse_position = chars_read;
347  if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
348  {
349  return false;
350  }
351 
352  if (!is_array && !sax->key(key))
353  {
354  return false;
355  }
356 
357  if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
358  {
359  return false;
360  }
361 
362  // get_bson_cstr only appends
363  key.clear();
364  }
365 
366  return true;
367  }
368 
373  bool parse_bson_array()
374  {
375  std::int32_t document_size{};
376  get_number<std::int32_t, true>(input_format_t::bson, document_size);
377 
378  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
379  {
380  return false;
381  }
382 
383  if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true)))
384  {
385  return false;
386  }
387 
388  return sax->end_array();
389  }
390 
392  // CBOR //
394 
403  bool parse_cbor_internal(const bool get_char,
404  const cbor_tag_handler_t tag_handler)
405  {
406  switch (get_char ? get() : current)
407  {
408  // EOF
409  case std::char_traits<char_type>::eof():
410  return unexpect_eof(input_format_t::cbor, "value");
411 
412  // Integer 0x00..0x17 (0..23)
413  case 0x00:
414  case 0x01:
415  case 0x02:
416  case 0x03:
417  case 0x04:
418  case 0x05:
419  case 0x06:
420  case 0x07:
421  case 0x08:
422  case 0x09:
423  case 0x0A:
424  case 0x0B:
425  case 0x0C:
426  case 0x0D:
427  case 0x0E:
428  case 0x0F:
429  case 0x10:
430  case 0x11:
431  case 0x12:
432  case 0x13:
433  case 0x14:
434  case 0x15:
435  case 0x16:
436  case 0x17:
437  return sax->number_unsigned(static_cast<number_unsigned_t>(current));
438 
439  case 0x18: // Unsigned integer (one-byte uint8_t follows)
440  {
441  std::uint8_t number{};
442  return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
443  }
444 
445  case 0x19: // Unsigned integer (two-byte uint16_t follows)
446  {
447  std::uint16_t number{};
448  return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
449  }
450 
451  case 0x1A: // Unsigned integer (four-byte uint32_t follows)
452  {
453  std::uint32_t number{};
454  return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
455  }
456 
457  case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
458  {
459  std::uint64_t number{};
460  return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
461  }
462 
463  // Negative integer -1-0x00..-1-0x17 (-1..-24)
464  case 0x20:
465  case 0x21:
466  case 0x22:
467  case 0x23:
468  case 0x24:
469  case 0x25:
470  case 0x26:
471  case 0x27:
472  case 0x28:
473  case 0x29:
474  case 0x2A:
475  case 0x2B:
476  case 0x2C:
477  case 0x2D:
478  case 0x2E:
479  case 0x2F:
480  case 0x30:
481  case 0x31:
482  case 0x32:
483  case 0x33:
484  case 0x34:
485  case 0x35:
486  case 0x36:
487  case 0x37:
488  return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
489 
490  case 0x38: // Negative integer (one-byte uint8_t follows)
491  {
492  std::uint8_t number{};
493  return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
494  }
495 
496  case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
497  {
498  std::uint16_t number{};
499  return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
500  }
501 
502  case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
503  {
504  std::uint32_t number{};
505  return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
506  }
507 
508  case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
509  {
510  std::uint64_t number{};
511  return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
512  - static_cast<number_integer_t>(number));
513  }
514 
515  // Binary data (0x00..0x17 bytes follow)
516  case 0x40:
517  case 0x41:
518  case 0x42:
519  case 0x43:
520  case 0x44:
521  case 0x45:
522  case 0x46:
523  case 0x47:
524  case 0x48:
525  case 0x49:
526  case 0x4A:
527  case 0x4B:
528  case 0x4C:
529  case 0x4D:
530  case 0x4E:
531  case 0x4F:
532  case 0x50:
533  case 0x51:
534  case 0x52:
535  case 0x53:
536  case 0x54:
537  case 0x55:
538  case 0x56:
539  case 0x57:
540  case 0x58: // Binary data (one-byte uint8_t for n follows)
541  case 0x59: // Binary data (two-byte uint16_t for n follow)
542  case 0x5A: // Binary data (four-byte uint32_t for n follow)
543  case 0x5B: // Binary data (eight-byte uint64_t for n follow)
544  case 0x5F: // Binary data (indefinite length)
545  {
546  binary_t b;
547  return get_cbor_binary(b) && sax->binary(b);
548  }
549 
550  // UTF-8 string (0x00..0x17 bytes follow)
551  case 0x60:
552  case 0x61:
553  case 0x62:
554  case 0x63:
555  case 0x64:
556  case 0x65:
557  case 0x66:
558  case 0x67:
559  case 0x68:
560  case 0x69:
561  case 0x6A:
562  case 0x6B:
563  case 0x6C:
564  case 0x6D:
565  case 0x6E:
566  case 0x6F:
567  case 0x70:
568  case 0x71:
569  case 0x72:
570  case 0x73:
571  case 0x74:
572  case 0x75:
573  case 0x76:
574  case 0x77:
575  case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
576  case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
577  case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
578  case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
579  case 0x7F: // UTF-8 string (indefinite length)
580  {
581  string_t s;
582  return get_cbor_string(s) && sax->string(s);
583  }
584 
585  // array (0x00..0x17 data items follow)
586  case 0x80:
587  case 0x81:
588  case 0x82:
589  case 0x83:
590  case 0x84:
591  case 0x85:
592  case 0x86:
593  case 0x87:
594  case 0x88:
595  case 0x89:
596  case 0x8A:
597  case 0x8B:
598  case 0x8C:
599  case 0x8D:
600  case 0x8E:
601  case 0x8F:
602  case 0x90:
603  case 0x91:
604  case 0x92:
605  case 0x93:
606  case 0x94:
607  case 0x95:
608  case 0x96:
609  case 0x97:
610  return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
611 
612  case 0x98: // array (one-byte uint8_t for n follows)
613  {
614  std::uint8_t len{};
615  return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
616  }
617 
618  case 0x99: // array (two-byte uint16_t for n follow)
619  {
620  std::uint16_t len{};
621  return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
622  }
623 
624  case 0x9A: // array (four-byte uint32_t for n follow)
625  {
626  std::uint32_t len{};
627  return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
628  }
629 
630  case 0x9B: // array (eight-byte uint64_t for n follow)
631  {
632  std::uint64_t len{};
633  return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
634  }
635 
636  case 0x9F: // array (indefinite length)
637  return get_cbor_array(std::size_t(-1), tag_handler);
638 
639  // map (0x00..0x17 pairs of data items follow)
640  case 0xA0:
641  case 0xA1:
642  case 0xA2:
643  case 0xA3:
644  case 0xA4:
645  case 0xA5:
646  case 0xA6:
647  case 0xA7:
648  case 0xA8:
649  case 0xA9:
650  case 0xAA:
651  case 0xAB:
652  case 0xAC:
653  case 0xAD:
654  case 0xAE:
655  case 0xAF:
656  case 0xB0:
657  case 0xB1:
658  case 0xB2:
659  case 0xB3:
660  case 0xB4:
661  case 0xB5:
662  case 0xB6:
663  case 0xB7:
664  return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
665 
666  case 0xB8: // map (one-byte uint8_t for n follows)
667  {
668  std::uint8_t len{};
669  return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
670  }
671 
672  case 0xB9: // map (two-byte uint16_t for n follow)
673  {
674  std::uint16_t len{};
675  return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
676  }
677 
678  case 0xBA: // map (four-byte uint32_t for n follow)
679  {
680  std::uint32_t len{};
681  return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
682  }
683 
684  case 0xBB: // map (eight-byte uint64_t for n follow)
685  {
686  std::uint64_t len{};
687  return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
688  }
689 
690  case 0xBF: // map (indefinite length)
691  return get_cbor_object(std::size_t(-1), tag_handler);
692 
693  case 0xC6: // tagged item
694  case 0xC7:
695  case 0xC8:
696  case 0xC9:
697  case 0xCA:
698  case 0xCB:
699  case 0xCC:
700  case 0xCD:
701  case 0xCE:
702  case 0xCF:
703  case 0xD0:
704  case 0xD1:
705  case 0xD2:
706  case 0xD3:
707  case 0xD4:
708  case 0xD8: // tagged item (1 bytes follow)
709  case 0xD9: // tagged item (2 bytes follow)
710  case 0xDA: // tagged item (4 bytes follow)
711  case 0xDB: // tagged item (8 bytes follow)
712  {
713  switch (tag_handler)
714  {
716  {
717  auto last_token = get_token_string();
718  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value")));
719  }
720 
722  {
723  switch (current)
724  {
725  case 0xD8:
726  {
727  std::uint8_t len{};
728  get_number(input_format_t::cbor, len);
729  break;
730  }
731  case 0xD9:
732  {
733  std::uint16_t len{};
734  get_number(input_format_t::cbor, len);
735  break;
736  }
737  case 0xDA:
738  {
739  std::uint32_t len{};
740  get_number(input_format_t::cbor, len);
741  break;
742  }
743  case 0xDB:
744  {
745  std::uint64_t len{};
746  get_number(input_format_t::cbor, len);
747  break;
748  }
749  default:
750  break;
751  }
752  return parse_cbor_internal(true, tag_handler);
753  }
754 
755  default: // LCOV_EXCL_LINE
756  JSON_ASSERT(false); // LCOV_EXCL_LINE
757  }
758  }
759 
760  case 0xF4: // false
761  return sax->boolean(false);
762 
763  case 0xF5: // true
764  return sax->boolean(true);
765 
766  case 0xF6: // null
767  return sax->null();
768 
769  case 0xF9: // Half-Precision Float (two-byte IEEE 754)
770  {
771  const auto byte1_raw = get();
772  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
773  {
774  return false;
775  }
776  const auto byte2_raw = get();
777  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
778  {
779  return false;
780  }
781 
782  const auto byte1 = static_cast<unsigned char>(byte1_raw);
783  const auto byte2 = static_cast<unsigned char>(byte2_raw);
784 
785  // code from RFC 7049, Appendix D, Figure 3:
786  // As half-precision floating-point numbers were only added
787  // to IEEE 754 in 2008, today's programming platforms often
788  // still only have limited support for them. It is very
789  // easy to include at least decoding support for them even
790  // without such support. An example of a small decoder for
791  // half-precision floating-point numbers in the C language
792  // is shown in Fig. 3.
793  const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
794  const double val = [&half]
795  {
796  const int exp = (half >> 10u) & 0x1Fu;
797  const unsigned int mant = half & 0x3FFu;
798  JSON_ASSERT(0 <= exp&& exp <= 32);
799  JSON_ASSERT(mant <= 1024);
800  switch (exp)
801  {
802  case 0:
803  return std::ldexp(mant, -24);
804  case 31:
805  return (mant == 0)
806  ? std::numeric_limits<double>::infinity()
807  : std::numeric_limits<double>::quiet_NaN();
808  default:
809  return std::ldexp(mant + 1024, exp - 25);
810  }
811  }();
812  return sax->number_float((half & 0x8000u) != 0
813  ? static_cast<number_float_t>(-val)
814  : static_cast<number_float_t>(val), "");
815  }
816 
817  case 0xFA: // Single-Precision Float (four-byte IEEE 754)
818  {
819  float number{};
820  return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
821  }
822 
823  case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
824  {
825  double number{};
826  return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
827  }
828 
829  default: // anything else (0xFF is handled inside the other types)
830  {
831  auto last_token = get_token_string();
832  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value")));
833  }
834  }
835  }
836 
848  bool get_cbor_string(string_t& result)
849  {
850  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string")))
851  {
852  return false;
853  }
854 
855  switch (current)
856  {
857  // UTF-8 string (0x00..0x17 bytes follow)
858  case 0x60:
859  case 0x61:
860  case 0x62:
861  case 0x63:
862  case 0x64:
863  case 0x65:
864  case 0x66:
865  case 0x67:
866  case 0x68:
867  case 0x69:
868  case 0x6A:
869  case 0x6B:
870  case 0x6C:
871  case 0x6D:
872  case 0x6E:
873  case 0x6F:
874  case 0x70:
875  case 0x71:
876  case 0x72:
877  case 0x73:
878  case 0x74:
879  case 0x75:
880  case 0x76:
881  case 0x77:
882  {
883  return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
884  }
885 
886  case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
887  {
888  std::uint8_t len{};
889  return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
890  }
891 
892  case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
893  {
894  std::uint16_t len{};
895  return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
896  }
897 
898  case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
899  {
900  std::uint32_t len{};
901  return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
902  }
903 
904  case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
905  {
906  std::uint64_t len{};
907  return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
908  }
909 
910  case 0x7F: // UTF-8 string (indefinite length)
911  {
912  while (get() != 0xFF)
913  {
914  string_t chunk;
915  if (!get_cbor_string(chunk))
916  {
917  return false;
918  }
919  result.append(chunk);
920  }
921  return true;
922  }
923 
924  default:
925  {
926  auto last_token = get_token_string();
927  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
928  }
929  }
930  }
931 
943  bool get_cbor_binary(binary_t& result)
944  {
945  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary")))
946  {
947  return false;
948  }
949 
950  switch (current)
951  {
952  // Binary data (0x00..0x17 bytes follow)
953  case 0x40:
954  case 0x41:
955  case 0x42:
956  case 0x43:
957  case 0x44:
958  case 0x45:
959  case 0x46:
960  case 0x47:
961  case 0x48:
962  case 0x49:
963  case 0x4A:
964  case 0x4B:
965  case 0x4C:
966  case 0x4D:
967  case 0x4E:
968  case 0x4F:
969  case 0x50:
970  case 0x51:
971  case 0x52:
972  case 0x53:
973  case 0x54:
974  case 0x55:
975  case 0x56:
976  case 0x57:
977  {
978  return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
979  }
980 
981  case 0x58: // Binary data (one-byte uint8_t for n follows)
982  {
983  std::uint8_t len{};
984  return get_number(input_format_t::cbor, len) &&
985  get_binary(input_format_t::cbor, len, result);
986  }
987 
988  case 0x59: // Binary data (two-byte uint16_t for n follow)
989  {
990  std::uint16_t len{};
991  return get_number(input_format_t::cbor, len) &&
992  get_binary(input_format_t::cbor, len, result);
993  }
994 
995  case 0x5A: // Binary data (four-byte uint32_t for n follow)
996  {
997  std::uint32_t len{};
998  return get_number(input_format_t::cbor, len) &&
999  get_binary(input_format_t::cbor, len, result);
1000  }
1001 
1002  case 0x5B: // Binary data (eight-byte uint64_t for n follow)
1003  {
1004  std::uint64_t len{};
1005  return get_number(input_format_t::cbor, len) &&
1006  get_binary(input_format_t::cbor, len, result);
1007  }
1008 
1009  case 0x5F: // Binary data (indefinite length)
1010  {
1011  while (get() != 0xFF)
1012  {
1013  binary_t chunk;
1014  if (!get_cbor_binary(chunk))
1015  {
1016  return false;
1017  }
1018  result.insert(result.end(), chunk.begin(), chunk.end());
1019  }
1020  return true;
1021  }
1022 
1023  default:
1024  {
1025  auto last_token = get_token_string();
1026  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x" + last_token, "binary")));
1027  }
1028  }
1029  }
1030 
1037  bool get_cbor_array(const std::size_t len,
1038  const cbor_tag_handler_t tag_handler)
1039  {
1040  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1041  {
1042  return false;
1043  }
1044 
1045  if (len != std::size_t(-1))
1046  {
1047  for (std::size_t i = 0; i < len; ++i)
1048  {
1049  if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1050  {
1051  return false;
1052  }
1053  }
1054  }
1055  else
1056  {
1057  while (get() != 0xFF)
1058  {
1059  if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler)))
1060  {
1061  return false;
1062  }
1063  }
1064  }
1065 
1066  return sax->end_array();
1067  }
1068 
1075  bool get_cbor_object(const std::size_t len,
1076  const cbor_tag_handler_t tag_handler)
1077  {
1078  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1079  {
1080  return false;
1081  }
1082 
1083  string_t key;
1084  if (len != std::size_t(-1))
1085  {
1086  for (std::size_t i = 0; i < len; ++i)
1087  {
1088  get();
1089  if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1090  {
1091  return false;
1092  }
1093 
1094  if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1095  {
1096  return false;
1097  }
1098  key.clear();
1099  }
1100  }
1101  else
1102  {
1103  while (get() != 0xFF)
1104  {
1105  if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1106  {
1107  return false;
1108  }
1109 
1110  if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1111  {
1112  return false;
1113  }
1114  key.clear();
1115  }
1116  }
1117 
1118  return sax->end_object();
1119  }
1120 
1122  // MsgPack //
1124 
1128  bool parse_msgpack_internal()
1129  {
1130  switch (get())
1131  {
1132  // EOF
1133  case std::char_traits<char_type>::eof():
1134  return unexpect_eof(input_format_t::msgpack, "value");
1135 
1136  // positive fixint
1137  case 0x00:
1138  case 0x01:
1139  case 0x02:
1140  case 0x03:
1141  case 0x04:
1142  case 0x05:
1143  case 0x06:
1144  case 0x07:
1145  case 0x08:
1146  case 0x09:
1147  case 0x0A:
1148  case 0x0B:
1149  case 0x0C:
1150  case 0x0D:
1151  case 0x0E:
1152  case 0x0F:
1153  case 0x10:
1154  case 0x11:
1155  case 0x12:
1156  case 0x13:
1157  case 0x14:
1158  case 0x15:
1159  case 0x16:
1160  case 0x17:
1161  case 0x18:
1162  case 0x19:
1163  case 0x1A:
1164  case 0x1B:
1165  case 0x1C:
1166  case 0x1D:
1167  case 0x1E:
1168  case 0x1F:
1169  case 0x20:
1170  case 0x21:
1171  case 0x22:
1172  case 0x23:
1173  case 0x24:
1174  case 0x25:
1175  case 0x26:
1176  case 0x27:
1177  case 0x28:
1178  case 0x29:
1179  case 0x2A:
1180  case 0x2B:
1181  case 0x2C:
1182  case 0x2D:
1183  case 0x2E:
1184  case 0x2F:
1185  case 0x30:
1186  case 0x31:
1187  case 0x32:
1188  case 0x33:
1189  case 0x34:
1190  case 0x35:
1191  case 0x36:
1192  case 0x37:
1193  case 0x38:
1194  case 0x39:
1195  case 0x3A:
1196  case 0x3B:
1197  case 0x3C:
1198  case 0x3D:
1199  case 0x3E:
1200  case 0x3F:
1201  case 0x40:
1202  case 0x41:
1203  case 0x42:
1204  case 0x43:
1205  case 0x44:
1206  case 0x45:
1207  case 0x46:
1208  case 0x47:
1209  case 0x48:
1210  case 0x49:
1211  case 0x4A:
1212  case 0x4B:
1213  case 0x4C:
1214  case 0x4D:
1215  case 0x4E:
1216  case 0x4F:
1217  case 0x50:
1218  case 0x51:
1219  case 0x52:
1220  case 0x53:
1221  case 0x54:
1222  case 0x55:
1223  case 0x56:
1224  case 0x57:
1225  case 0x58:
1226  case 0x59:
1227  case 0x5A:
1228  case 0x5B:
1229  case 0x5C:
1230  case 0x5D:
1231  case 0x5E:
1232  case 0x5F:
1233  case 0x60:
1234  case 0x61:
1235  case 0x62:
1236  case 0x63:
1237  case 0x64:
1238  case 0x65:
1239  case 0x66:
1240  case 0x67:
1241  case 0x68:
1242  case 0x69:
1243  case 0x6A:
1244  case 0x6B:
1245  case 0x6C:
1246  case 0x6D:
1247  case 0x6E:
1248  case 0x6F:
1249  case 0x70:
1250  case 0x71:
1251  case 0x72:
1252  case 0x73:
1253  case 0x74:
1254  case 0x75:
1255  case 0x76:
1256  case 0x77:
1257  case 0x78:
1258  case 0x79:
1259  case 0x7A:
1260  case 0x7B:
1261  case 0x7C:
1262  case 0x7D:
1263  case 0x7E:
1264  case 0x7F:
1265  return sax->number_unsigned(static_cast<number_unsigned_t>(current));
1266 
1267  // fixmap
1268  case 0x80:
1269  case 0x81:
1270  case 0x82:
1271  case 0x83:
1272  case 0x84:
1273  case 0x85:
1274  case 0x86:
1275  case 0x87:
1276  case 0x88:
1277  case 0x89:
1278  case 0x8A:
1279  case 0x8B:
1280  case 0x8C:
1281  case 0x8D:
1282  case 0x8E:
1283  case 0x8F:
1284  return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1285 
1286  // fixarray
1287  case 0x90:
1288  case 0x91:
1289  case 0x92:
1290  case 0x93:
1291  case 0x94:
1292  case 0x95:
1293  case 0x96:
1294  case 0x97:
1295  case 0x98:
1296  case 0x99:
1297  case 0x9A:
1298  case 0x9B:
1299  case 0x9C:
1300  case 0x9D:
1301  case 0x9E:
1302  case 0x9F:
1303  return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1304 
1305  // fixstr
1306  case 0xA0:
1307  case 0xA1:
1308  case 0xA2:
1309  case 0xA3:
1310  case 0xA4:
1311  case 0xA5:
1312  case 0xA6:
1313  case 0xA7:
1314  case 0xA8:
1315  case 0xA9:
1316  case 0xAA:
1317  case 0xAB:
1318  case 0xAC:
1319  case 0xAD:
1320  case 0xAE:
1321  case 0xAF:
1322  case 0xB0:
1323  case 0xB1:
1324  case 0xB2:
1325  case 0xB3:
1326  case 0xB4:
1327  case 0xB5:
1328  case 0xB6:
1329  case 0xB7:
1330  case 0xB8:
1331  case 0xB9:
1332  case 0xBA:
1333  case 0xBB:
1334  case 0xBC:
1335  case 0xBD:
1336  case 0xBE:
1337  case 0xBF:
1338  case 0xD9: // str 8
1339  case 0xDA: // str 16
1340  case 0xDB: // str 32
1341  {
1342  string_t s;
1343  return get_msgpack_string(s) && sax->string(s);
1344  }
1345 
1346  case 0xC0: // nil
1347  return sax->null();
1348 
1349  case 0xC2: // false
1350  return sax->boolean(false);
1351 
1352  case 0xC3: // true
1353  return sax->boolean(true);
1354 
1355  case 0xC4: // bin 8
1356  case 0xC5: // bin 16
1357  case 0xC6: // bin 32
1358  case 0xC7: // ext 8
1359  case 0xC8: // ext 16
1360  case 0xC9: // ext 32
1361  case 0xD4: // fixext 1
1362  case 0xD5: // fixext 2
1363  case 0xD6: // fixext 4
1364  case 0xD7: // fixext 8
1365  case 0xD8: // fixext 16
1366  {
1367  binary_t b;
1368  return get_msgpack_binary(b) && sax->binary(b);
1369  }
1370 
1371  case 0xCA: // float 32
1372  {
1373  float number{};
1374  return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1375  }
1376 
1377  case 0xCB: // float 64
1378  {
1379  double number{};
1380  return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1381  }
1382 
1383  case 0xCC: // uint 8
1384  {
1385  std::uint8_t number{};
1386  return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1387  }
1388 
1389  case 0xCD: // uint 16
1390  {
1391  std::uint16_t number{};
1392  return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1393  }
1394 
1395  case 0xCE: // uint 32
1396  {
1397  std::uint32_t number{};
1398  return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1399  }
1400 
1401  case 0xCF: // uint 64
1402  {
1403  std::uint64_t number{};
1404  return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1405  }
1406 
1407  case 0xD0: // int 8
1408  {
1409  std::int8_t number{};
1410  return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1411  }
1412 
1413  case 0xD1: // int 16
1414  {
1415  std::int16_t number{};
1416  return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1417  }
1418 
1419  case 0xD2: // int 32
1420  {
1421  std::int32_t number{};
1422  return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1423  }
1424 
1425  case 0xD3: // int 64
1426  {
1427  std::int64_t number{};
1428  return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1429  }
1430 
1431  case 0xDC: // array 16
1432  {
1433  std::uint16_t len{};
1434  return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1435  }
1436 
1437  case 0xDD: // array 32
1438  {
1439  std::uint32_t len{};
1440  return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1441  }
1442 
1443  case 0xDE: // map 16
1444  {
1445  std::uint16_t len{};
1446  return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1447  }
1448 
1449  case 0xDF: // map 32
1450  {
1451  std::uint32_t len{};
1452  return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1453  }
1454 
1455  // negative fixint
1456  case 0xE0:
1457  case 0xE1:
1458  case 0xE2:
1459  case 0xE3:
1460  case 0xE4:
1461  case 0xE5:
1462  case 0xE6:
1463  case 0xE7:
1464  case 0xE8:
1465  case 0xE9:
1466  case 0xEA:
1467  case 0xEB:
1468  case 0xEC:
1469  case 0xED:
1470  case 0xEE:
1471  case 0xEF:
1472  case 0xF0:
1473  case 0xF1:
1474  case 0xF2:
1475  case 0xF3:
1476  case 0xF4:
1477  case 0xF5:
1478  case 0xF6:
1479  case 0xF7:
1480  case 0xF8:
1481  case 0xF9:
1482  case 0xFA:
1483  case 0xFB:
1484  case 0xFC:
1485  case 0xFD:
1486  case 0xFE:
1487  case 0xFF:
1488  return sax->number_integer(static_cast<std::int8_t>(current));
1489 
1490  default: // anything else
1491  {
1492  auto last_token = get_token_string();
1493  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value")));
1494  }
1495  }
1496  }
1497 
1508  bool get_msgpack_string(string_t& result)
1509  {
1510  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string")))
1511  {
1512  return false;
1513  }
1514 
1515  switch (current)
1516  {
1517  // fixstr
1518  case 0xA0:
1519  case 0xA1:
1520  case 0xA2:
1521  case 0xA3:
1522  case 0xA4:
1523  case 0xA5:
1524  case 0xA6:
1525  case 0xA7:
1526  case 0xA8:
1527  case 0xA9:
1528  case 0xAA:
1529  case 0xAB:
1530  case 0xAC:
1531  case 0xAD:
1532  case 0xAE:
1533  case 0xAF:
1534  case 0xB0:
1535  case 0xB1:
1536  case 0xB2:
1537  case 0xB3:
1538  case 0xB4:
1539  case 0xB5:
1540  case 0xB6:
1541  case 0xB7:
1542  case 0xB8:
1543  case 0xB9:
1544  case 0xBA:
1545  case 0xBB:
1546  case 0xBC:
1547  case 0xBD:
1548  case 0xBE:
1549  case 0xBF:
1550  {
1551  return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
1552  }
1553 
1554  case 0xD9: // str 8
1555  {
1556  std::uint8_t len{};
1557  return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1558  }
1559 
1560  case 0xDA: // str 16
1561  {
1562  std::uint16_t len{};
1563  return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1564  }
1565 
1566  case 0xDB: // str 32
1567  {
1568  std::uint32_t len{};
1569  return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1570  }
1571 
1572  default:
1573  {
1574  auto last_token = get_token_string();
1575  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string")));
1576  }
1577  }
1578  }
1579 
1590  bool get_msgpack_binary(binary_t& result)
1591  {
1592  // helper function to set the subtype
1593  auto assign_and_return_true = [&result](std::int8_t subtype)
1594  {
1595  result.set_subtype(static_cast<std::uint8_t>(subtype));
1596  return true;
1597  };
1598 
1599  switch (current)
1600  {
1601  case 0xC4: // bin 8
1602  {
1603  std::uint8_t len{};
1604  return get_number(input_format_t::msgpack, len) &&
1605  get_binary(input_format_t::msgpack, len, result);
1606  }
1607 
1608  case 0xC5: // bin 16
1609  {
1610  std::uint16_t len{};
1611  return get_number(input_format_t::msgpack, len) &&
1612  get_binary(input_format_t::msgpack, len, result);
1613  }
1614 
1615  case 0xC6: // bin 32
1616  {
1617  std::uint32_t len{};
1618  return get_number(input_format_t::msgpack, len) &&
1619  get_binary(input_format_t::msgpack, len, result);
1620  }
1621 
1622  case 0xC7: // ext 8
1623  {
1624  std::uint8_t len{};
1625  std::int8_t subtype{};
1626  return get_number(input_format_t::msgpack, len) &&
1627  get_number(input_format_t::msgpack, subtype) &&
1628  get_binary(input_format_t::msgpack, len, result) &&
1629  assign_and_return_true(subtype);
1630  }
1631 
1632  case 0xC8: // ext 16
1633  {
1634  std::uint16_t len{};
1635  std::int8_t subtype{};
1636  return get_number(input_format_t::msgpack, len) &&
1637  get_number(input_format_t::msgpack, subtype) &&
1638  get_binary(input_format_t::msgpack, len, result) &&
1639  assign_and_return_true(subtype);
1640  }
1641 
1642  case 0xC9: // ext 32
1643  {
1644  std::uint32_t len{};
1645  std::int8_t subtype{};
1646  return get_number(input_format_t::msgpack, len) &&
1647  get_number(input_format_t::msgpack, subtype) &&
1648  get_binary(input_format_t::msgpack, len, result) &&
1649  assign_and_return_true(subtype);
1650  }
1651 
1652  case 0xD4: // fixext 1
1653  {
1654  std::int8_t subtype{};
1655  return get_number(input_format_t::msgpack, subtype) &&
1656  get_binary(input_format_t::msgpack, 1, result) &&
1657  assign_and_return_true(subtype);
1658  }
1659 
1660  case 0xD5: // fixext 2
1661  {
1662  std::int8_t subtype{};
1663  return get_number(input_format_t::msgpack, subtype) &&
1664  get_binary(input_format_t::msgpack, 2, result) &&
1665  assign_and_return_true(subtype);
1666  }
1667 
1668  case 0xD6: // fixext 4
1669  {
1670  std::int8_t subtype{};
1671  return get_number(input_format_t::msgpack, subtype) &&
1672  get_binary(input_format_t::msgpack, 4, result) &&
1673  assign_and_return_true(subtype);
1674  }
1675 
1676  case 0xD7: // fixext 8
1677  {
1678  std::int8_t subtype{};
1679  return get_number(input_format_t::msgpack, subtype) &&
1680  get_binary(input_format_t::msgpack, 8, result) &&
1681  assign_and_return_true(subtype);
1682  }
1683 
1684  case 0xD8: // fixext 16
1685  {
1686  std::int8_t subtype{};
1687  return get_number(input_format_t::msgpack, subtype) &&
1688  get_binary(input_format_t::msgpack, 16, result) &&
1689  assign_and_return_true(subtype);
1690  }
1691 
1692  default: // LCOV_EXCL_LINE
1693  return false; // LCOV_EXCL_LINE
1694  }
1695  }
1696 
1701  bool get_msgpack_array(const std::size_t len)
1702  {
1703  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1704  {
1705  return false;
1706  }
1707 
1708  for (std::size_t i = 0; i < len; ++i)
1709  {
1710  if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1711  {
1712  return false;
1713  }
1714  }
1715 
1716  return sax->end_array();
1717  }
1718 
1723  bool get_msgpack_object(const std::size_t len)
1724  {
1725  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1726  {
1727  return false;
1728  }
1729 
1730  string_t key;
1731  for (std::size_t i = 0; i < len; ++i)
1732  {
1733  get();
1734  if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key)))
1735  {
1736  return false;
1737  }
1738 
1739  if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1740  {
1741  return false;
1742  }
1743  key.clear();
1744  }
1745 
1746  return sax->end_object();
1747  }
1748 
1750  // UBJSON //
1752 
1760  bool parse_ubjson_internal(const bool get_char = true)
1761  {
1762  return get_ubjson_value(get_char ? get_ignore_noop() : current);
1763  }
1764 
1779  bool get_ubjson_string(string_t& result, const bool get_char = true)
1780  {
1781  if (get_char)
1782  {
1783  get(); // TODO(niels): may we ignore N here?
1784  }
1785 
1786  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1787  {
1788  return false;
1789  }
1790 
1791  switch (current)
1792  {
1793  case 'U':
1794  {
1795  std::uint8_t len{};
1796  return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1797  }
1798 
1799  case 'i':
1800  {
1801  std::int8_t len{};
1802  return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1803  }
1804 
1805  case 'I':
1806  {
1807  std::int16_t len{};
1808  return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1809  }
1810 
1811  case 'l':
1812  {
1813  std::int32_t len{};
1814  return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1815  }
1816 
1817  case 'L':
1818  {
1819  std::int64_t len{};
1820  return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1821  }
1822 
1823  default:
1824  auto last_token = get_token_string();
1825  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string")));
1826  }
1827  }
1828 
1833  bool get_ubjson_size_value(std::size_t& result)
1834  {
1835  switch (get_ignore_noop())
1836  {
1837  case 'U':
1838  {
1839  std::uint8_t number{};
1840  if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1841  {
1842  return false;
1843  }
1844  result = static_cast<std::size_t>(number);
1845  return true;
1846  }
1847 
1848  case 'i':
1849  {
1850  std::int8_t number{};
1851  if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1852  {
1853  return false;
1854  }
1855  result = static_cast<std::size_t>(number);
1856  return true;
1857  }
1858 
1859  case 'I':
1860  {
1861  std::int16_t number{};
1862  if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1863  {
1864  return false;
1865  }
1866  result = static_cast<std::size_t>(number);
1867  return true;
1868  }
1869 
1870  case 'l':
1871  {
1872  std::int32_t number{};
1873  if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1874  {
1875  return false;
1876  }
1877  result = static_cast<std::size_t>(number);
1878  return true;
1879  }
1880 
1881  case 'L':
1882  {
1883  std::int64_t number{};
1884  if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1885  {
1886  return false;
1887  }
1888  result = static_cast<std::size_t>(number);
1889  return true;
1890  }
1891 
1892  default:
1893  {
1894  auto last_token = get_token_string();
1895  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size")));
1896  }
1897  }
1898  }
1899 
1910  bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
1911  {
1912  result.first = string_t::npos; // size
1913  result.second = 0; // type
1914 
1915  get_ignore_noop();
1916 
1917  if (current == '$')
1918  {
1919  result.second = get(); // must not ignore 'N', because 'N' maybe the type
1920  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "type")))
1921  {
1922  return false;
1923  }
1924 
1925  get_ignore_noop();
1926  if (JSON_HEDLEY_UNLIKELY(current != '#'))
1927  {
1928  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1929  {
1930  return false;
1931  }
1932  auto last_token = get_token_string();
1933  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size")));
1934  }
1935 
1936  return get_ubjson_size_value(result.first);
1937  }
1938 
1939  if (current == '#')
1940  {
1941  return get_ubjson_size_value(result.first);
1942  }
1943 
1944  return true;
1945  }
1946 
1951  bool get_ubjson_value(const char_int_type prefix)
1952  {
1953  switch (prefix)
1954  {
1955  case std::char_traits<char_type>::eof(): // EOF
1956  return unexpect_eof(input_format_t::ubjson, "value");
1957 
1958  case 'T': // true
1959  return sax->boolean(true);
1960  case 'F': // false
1961  return sax->boolean(false);
1962 
1963  case 'Z': // null
1964  return sax->null();
1965 
1966  case 'U':
1967  {
1968  std::uint8_t number{};
1969  return get_number(input_format_t::ubjson, number) && sax->number_unsigned(number);
1970  }
1971 
1972  case 'i':
1973  {
1974  std::int8_t number{};
1975  return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1976  }
1977 
1978  case 'I':
1979  {
1980  std::int16_t number{};
1981  return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1982  }
1983 
1984  case 'l':
1985  {
1986  std::int32_t number{};
1987  return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1988  }
1989 
1990  case 'L':
1991  {
1992  std::int64_t number{};
1993  return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1994  }
1995 
1996  case 'd':
1997  {
1998  float number{};
1999  return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2000  }
2001 
2002  case 'D':
2003  {
2004  double number{};
2005  return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2006  }
2007 
2008  case 'H':
2009  {
2010  return get_ubjson_high_precision_number();
2011  }
2012 
2013  case 'C': // char
2014  {
2015  get();
2016  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "char")))
2017  {
2018  return false;
2019  }
2020  if (JSON_HEDLEY_UNLIKELY(current > 127))
2021  {
2022  auto last_token = get_token_string();
2023  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char")));
2024  }
2025  string_t s(1, static_cast<typename string_t::value_type>(current));
2026  return sax->string(s);
2027  }
2028 
2029  case 'S': // string
2030  {
2031  string_t s;
2032  return get_ubjson_string(s) && sax->string(s);
2033  }
2034 
2035  case '[': // array
2036  return get_ubjson_array();
2037 
2038  case '{': // object
2039  return get_ubjson_object();
2040 
2041  default: // anything else
2042  {
2043  auto last_token = get_token_string();
2044  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value")));
2045  }
2046  }
2047  }
2048 
2052  bool get_ubjson_array()
2053  {
2054  std::pair<std::size_t, char_int_type> size_and_type;
2055  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2056  {
2057  return false;
2058  }
2059 
2060  if (size_and_type.first != string_t::npos)
2061  {
2062  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
2063  {
2064  return false;
2065  }
2066 
2067  if (size_and_type.second != 0)
2068  {
2069  if (size_and_type.second != 'N')
2070  {
2071  for (std::size_t i = 0; i < size_and_type.first; ++i)
2072  {
2073  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2074  {
2075  return false;
2076  }
2077  }
2078  }
2079  }
2080  else
2081  {
2082  for (std::size_t i = 0; i < size_and_type.first; ++i)
2083  {
2084  if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2085  {
2086  return false;
2087  }
2088  }
2089  }
2090  }
2091  else
2092  {
2093  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
2094  {
2095  return false;
2096  }
2097 
2098  while (current != ']')
2099  {
2100  if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false)))
2101  {
2102  return false;
2103  }
2104  get_ignore_noop();
2105  }
2106  }
2107 
2108  return sax->end_array();
2109  }
2110 
2114  bool get_ubjson_object()
2115  {
2116  std::pair<std::size_t, char_int_type> size_and_type;
2117  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2118  {
2119  return false;
2120  }
2121 
2122  string_t key;
2123  if (size_and_type.first != string_t::npos)
2124  {
2125  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
2126  {
2127  return false;
2128  }
2129 
2130  if (size_and_type.second != 0)
2131  {
2132  for (std::size_t i = 0; i < size_and_type.first; ++i)
2133  {
2134  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2135  {
2136  return false;
2137  }
2138  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2139  {
2140  return false;
2141  }
2142  key.clear();
2143  }
2144  }
2145  else
2146  {
2147  for (std::size_t i = 0; i < size_and_type.first; ++i)
2148  {
2149  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2150  {
2151  return false;
2152  }
2153  if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2154  {
2155  return false;
2156  }
2157  key.clear();
2158  }
2159  }
2160  }
2161  else
2162  {
2163  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
2164  {
2165  return false;
2166  }
2167 
2168  while (current != '}')
2169  {
2170  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key)))
2171  {
2172  return false;
2173  }
2174  if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2175  {
2176  return false;
2177  }
2178  get_ignore_noop();
2179  key.clear();
2180  }
2181  }
2182 
2183  return sax->end_object();
2184  }
2185 
2186  // Note, no reader for UBJSON binary types is implemented because they do
2187  // not exist
2188 
2189  bool get_ubjson_high_precision_number()
2190  {
2191  // get size of following number string
2192  std::size_t size{};
2193  auto res = get_ubjson_size_value(size);
2194  if (JSON_HEDLEY_UNLIKELY(!res))
2195  {
2196  return res;
2197  }
2198 
2199  // get number string
2200  std::vector<char> number_vector;
2201  for (std::size_t i = 0; i < size; ++i)
2202  {
2203  get();
2204  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "number")))
2205  {
2206  return false;
2207  }
2208  number_vector.push_back(static_cast<char>(current));
2209  }
2210 
2211  // parse number string
2212  auto number_ia = detail::input_adapter(std::forward<decltype(number_vector)>(number_vector));
2213  auto number_lexer = detail::lexer<BasicJsonType, decltype(number_ia)>(std::move(number_ia), false);
2214  const auto result_number = number_lexer.scan();
2215  const auto number_string = number_lexer.get_token_string();
2216  const auto result_remainder = number_lexer.scan();
2217 
2218  using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
2219 
2220  if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
2221  {
2222  return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number")));
2223  }
2224 
2225  switch (result_number)
2226  {
2227  case token_type::value_integer:
2228  return sax->number_integer(number_lexer.get_number_integer());
2229  case token_type::value_unsigned:
2230  return sax->number_unsigned(number_lexer.get_number_unsigned());
2231  case token_type::value_float:
2232  return sax->number_float(number_lexer.get_number_float(), std::move(number_string));
2233  default:
2234  return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number")));
2235  }
2236  }
2237 
2239  // Utility functions //
2241 
2251  char_int_type get()
2252  {
2253  ++chars_read;
2254  return current = ia.get_character();
2255  }
2256 
2260  char_int_type get_ignore_noop()
2261  {
2262  do
2263  {
2264  get();
2265  }
2266  while (current == 'N');
2267 
2268  return current;
2269  }
2270 
2271  /*
2272  @brief read a number from the input
2273 
2274  @tparam NumberType the type of the number
2275  @param[in] format the current format (for diagnostics)
2276  @param[out] result number of type @a NumberType
2277 
2278  @return whether conversion completed
2279 
2280  @note This function needs to respect the system's endianess, because
2281  bytes in CBOR, MessagePack, and UBJSON are stored in network order
2282  (big endian) and therefore need reordering on little endian systems.
2283  */
2284  template<typename NumberType, bool InputIsLittleEndian = false>
2285  bool get_number(const input_format_t format, NumberType& result)
2286  {
2287  // step 1: read input into array with system's byte order
2288  std::array<std::uint8_t, sizeof(NumberType)> vec;
2289  for (std::size_t i = 0; i < sizeof(NumberType); ++i)
2290  {
2291  get();
2292  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
2293  {
2294  return false;
2295  }
2296 
2297  // reverse byte order prior to conversion if necessary
2298  if (is_little_endian != InputIsLittleEndian)
2299  {
2300  vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
2301  }
2302  else
2303  {
2304  vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
2305  }
2306  }
2307 
2308  // step 2: convert array into number of type T and return
2309  std::memcpy(&result, vec.data(), sizeof(NumberType));
2310  return true;
2311  }
2312 
2327  template<typename NumberType>
2328  bool get_string(const input_format_t format,
2329  const NumberType len,
2330  string_t& result)
2331  {
2332  bool success = true;
2333  for (NumberType i = 0; i < len; i++)
2334  {
2335  get();
2336  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string")))
2337  {
2338  success = false;
2339  break;
2340  }
2341  result.push_back(static_cast<typename string_t::value_type>(current));
2342  };
2343  return success;
2344  }
2345 
2360  template<typename NumberType>
2361  bool get_binary(const input_format_t format,
2362  const NumberType len,
2363  binary_t& result)
2364  {
2365  bool success = true;
2366  for (NumberType i = 0; i < len; i++)
2367  {
2368  get();
2369  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary")))
2370  {
2371  success = false;
2372  break;
2373  }
2374  result.push_back(static_cast<std::uint8_t>(current));
2375  }
2376  return success;
2377  }
2378 
2384  JSON_HEDLEY_NON_NULL(3)
2385  bool unexpect_eof(const input_format_t format, const char* context) const
2386  {
2387  if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof()))
2388  {
2389  return sax->parse_error(chars_read, "<end of file>",
2390  parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context)));
2391  }
2392  return true;
2393  }
2394 
2398  std::string get_token_string() const
2399  {
2400  std::array<char, 3> cr{{}};
2401  (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current));
2402  return std::string{cr.data()};
2403  }
2404 
2411  std::string exception_message(const input_format_t format,
2412  const std::string& detail,
2413  const std::string& context) const
2414  {
2415  std::string error_msg = "syntax error while parsing ";
2416 
2417  switch (format)
2418  {
2419  case input_format_t::cbor:
2420  error_msg += "CBOR";
2421  break;
2422 
2423  case input_format_t::msgpack:
2424  error_msg += "MessagePack";
2425  break;
2426 
2427  case input_format_t::ubjson:
2428  error_msg += "UBJSON";
2429  break;
2430 
2431  case input_format_t::bson:
2432  error_msg += "BSON";
2433  break;
2434 
2435  default: // LCOV_EXCL_LINE
2436  JSON_ASSERT(false); // LCOV_EXCL_LINE
2437  }
2438 
2439  return error_msg + " " + context + ": " + detail;
2440  }
2441 
2442  private:
2444  InputAdapterType ia;
2445 
2447  char_int_type current = std::char_traits<char_type>::eof();
2448 
2450  std::size_t chars_read = 0;
2451 
2453  const bool is_little_endian = little_endianess();
2454 
2456  json_sax_t* sax = nullptr;
2457 };
2458 } // namespace detail
2459 } // namespace nlohmann
deserialization of CBOR, MessagePack, and UBJSON values
Definition: binary_reader.hpp:57
bool sax_parse(const input_format_t format, json_sax_t *sax_, const bool strict=true, const cbor_tag_handler_t tag_handler=cbor_tag_handler_t::error)
Definition: binary_reader.hpp:94
binary_reader(InputAdapterType &&adapter)
create a binary reader
Definition: binary_reader.hpp:73
token_type
token types for the parser
Definition: lexer.hpp:31
static parse_error create(int id_, const position_t &pos, const std::string &what_arg)
create a parse error exception
Definition: exceptions.hpp:130
zip_uint64_t uint64_t
zip_uint64_t_t typedef.
Definition: zip.hpp:108
zip_int64_t int64_t
zip_int64_t typedef.
Definition: zip.hpp:103
zip_uint32_t uint32_t
zip_uint32_t typedef.
Definition: zip.hpp:98
zip_int32_t int32_t
zip_int32_t typedef.
Definition: zip.hpp:93
zip_uint8_t uint8_t
zip_uint8_t typedef.
Definition: zip.hpp:78
zip_uint16_t uint16_t
zip_uint16_t typedef.
Definition: zip.hpp:88
zip_int16_t int16_t
zip_int16_t typedef.
Definition: zip.hpp:83
zip_int8_t int8_t
zip_int8_t typedef.
Definition: zip.hpp:73
@ value
the parser finished reading a JSON value
@ key
the parser read a key of a value in an object
cbor_tag_handler_t
how to treat CBOR tags
Definition: binary_reader.hpp:30
@ error
throw a parse_error exception in case of a tag
@ strict
throw a type_error exception in case of invalid UTF-8
input_format_t
the supported input formats
Definition: input_adapters.hpp:23
namespace for Niels Lohmann
Definition: adl_serializer.hpp:9