protozero
Minimalistic protocol buffer decoder and encoder in C++.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Macros Pages
pbf_reader.hpp
Go to the documentation of this file.
1 #ifndef PROTOZERO_PBF_READER_HPP
2 #define PROTOZERO_PBF_READER_HPP
3 
4 /*****************************************************************************
5 
6 protozero - Minimalistic protocol buffer decoder and encoder in C++.
7 
8 This file is from https://github.com/mapbox/protozero where you can find more
9 documentation.
10 
11 *****************************************************************************/
12 
19 #include <cstddef>
20 #include <cstdint>
21 #include <string>
22 #include <utility>
23 
24 #include <protozero/config.hpp>
25 #include <protozero/exception.hpp>
26 #include <protozero/iterators.hpp>
27 #include <protozero/types.hpp>
28 #include <protozero/varint.hpp>
29 
30 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
31 # include <protozero/byteswap.hpp>
32 #endif
33 
34 namespace protozero {
35 
60 class pbf_reader {
61 
62  // A pointer to the next unread data.
63  const char* m_data = nullptr;
64 
65  // A pointer to one past the end of data.
66  const char* m_end = nullptr;
67 
68  // The wire type of the current field.
69  pbf_wire_type m_wire_type = pbf_wire_type::unknown;
70 
71  // The tag of the current field.
72  pbf_tag_type m_tag = 0;
73 
74  template <typename T>
75  T get_fixed() {
76  T result;
77  skip_bytes(sizeof(T));
78  std::memcpy(&result, m_data - sizeof(T), sizeof(T));
79 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
80  detail::byteswap_inplace(&result);
81 #endif
82  return result;
83  }
84 
85  template <typename T>
87  protozero_assert(tag() != 0 && "call next() before accessing field value");
88  const auto len = get_len_and_skip();
89  protozero_assert(len % sizeof(T) == 0);
91  const_fixed_iterator<T>(m_data, m_data)};
92  }
93 
94  template <typename T>
95  T get_varint() {
96  return static_cast<T>(decode_varint(&m_data, m_end));
97  }
98 
99  template <typename T>
100  T get_svarint() {
101  protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint");
102  return static_cast<T>(decode_zigzag64(decode_varint(&m_data, m_end)));
103  }
104 
105  pbf_length_type get_length() {
106  return get_varint<pbf_length_type>();
107  }
108 
109  void skip_bytes(pbf_length_type len) {
110  if (m_data + len > m_end) {
111  throw end_of_buffer_exception();
112  }
113  m_data += len;
114 
115  // In debug builds reset the tag to zero so that we can detect (some)
116  // wrong code.
117 #ifndef NDEBUG
118  m_tag = 0;
119 #endif
120  }
121 
122  pbf_length_type get_len_and_skip() {
123  const auto len = get_length();
124  skip_bytes(len);
125  return len;
126  }
127 
128  template <typename T>
129  iterator_range<T> get_packed() {
130  protozero_assert(tag() != 0 && "call next() before accessing field value");
131  const auto len = get_len_and_skip();
132  return iterator_range<T>{T{m_data - len, m_data},
133  T{m_data, m_data}};
134  }
135 
136 public:
137 
148  explicit pbf_reader(const data_view& view) noexcept
149  : m_data(view.data()),
150  m_end(view.data() + view.size()),
151  m_wire_type(pbf_wire_type::unknown),
152  m_tag(0) {
153  }
154 
165  pbf_reader(const char* data, std::size_t size) noexcept
166  : m_data(data),
167  m_end(data + size),
168  m_wire_type(pbf_wire_type::unknown),
169  m_tag(0) {
170  }
171 
182  pbf_reader(const std::pair<const char*, std::size_t>& data) noexcept
183  : m_data(data.first),
184  m_end(data.first + data.second),
185  m_wire_type(pbf_wire_type::unknown),
186  m_tag(0) {
187  }
188 
199  pbf_reader(const std::string& data) noexcept
200  : m_data(data.data()),
201  m_end(data.data() + data.size()),
202  m_wire_type(pbf_wire_type::unknown),
203  m_tag(0) {
204  }
205 
210  pbf_reader() noexcept = default;
211 
213  pbf_reader(const pbf_reader&) noexcept = default;
214 
216  pbf_reader(pbf_reader&&) noexcept = default;
217 
219  pbf_reader& operator=(const pbf_reader& other) noexcept = default;
220 
222  pbf_reader& operator=(pbf_reader&& other) noexcept = default;
223 
224  ~pbf_reader() = default;
225 
231  void swap(pbf_reader& other) noexcept {
232  using std::swap;
233  swap(m_data, other.m_data);
234  swap(m_end, other.m_end);
235  swap(m_wire_type, other.m_wire_type);
236  swap(m_tag, other.m_tag);
237  }
238 
244  operator bool() const noexcept {
245  return m_data < m_end;
246  }
247 
257  std::size_t length() const noexcept {
258  return std::size_t(m_end - m_data);
259  }
260 
276  bool next() {
277  if (m_data == m_end) {
278  return false;
279  }
280 
281  const auto value = get_varint<uint32_t>();
282  m_tag = pbf_tag_type(value >> 3);
283 
284  // tags 0 and 19000 to 19999 are not allowed as per
285  // https://developers.google.com/protocol-buffers/docs/proto
286  protozero_assert(((m_tag > 0 && m_tag < 19000) ||
287  (m_tag > 19999 && m_tag <= ((1 << 29) - 1))) && "tag out of range");
288 
289  m_wire_type = pbf_wire_type(value & 0x07);
290  switch (m_wire_type) {
291  case pbf_wire_type::varint:
292  case pbf_wire_type::fixed64:
293  case pbf_wire_type::length_delimited:
294  case pbf_wire_type::fixed32:
295  break;
296  default:
298  }
299 
300  return true;
301  }
302 
328  bool next(pbf_tag_type next_tag) {
329  while (next()) {
330  if (m_tag == next_tag) {
331  return true;
332  } else {
333  skip();
334  }
335  }
336  return false;
337  }
338 
348  pbf_tag_type tag() const noexcept {
349  return m_tag;
350  }
351 
367  pbf_wire_type wire_type() const noexcept {
368  return m_wire_type;
369  }
370 
377  bool has_wire_type(pbf_wire_type type) const noexcept {
378  return wire_type() == type;
379  }
380 
387  void skip() {
388  protozero_assert(tag() != 0 && "call next() before calling skip()");
389  switch (wire_type()) {
390  case pbf_wire_type::varint:
391  skip_varint(&m_data, m_end);
392  break;
393  case pbf_wire_type::fixed64:
394  skip_bytes(8);
395  break;
396  case pbf_wire_type::length_delimited:
397  skip_bytes(get_length());
398  break;
399  case pbf_wire_type::fixed32:
400  skip_bytes(4);
401  break;
402  default:
403  protozero_assert(false && "can not be here because next() should have thrown already");
404  }
405  }
406 
408 
419  bool get_bool() {
420  protozero_assert(tag() != 0 && "call next() before accessing field value");
421  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
422  protozero_assert((*m_data & 0x80) == 0 && "not a 1 byte varint");
423  skip_bytes(1);
424  return m_data[-1] != 0; // -1 okay because we incremented m_data the line before
425  }
426 
434  int32_t get_enum() {
435  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
436  return get_varint<int32_t>();
437  }
438 
446  int32_t get_int32() {
447  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
448  return get_varint<int32_t>();
449  }
450 
458  int32_t get_sint32() {
459  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
460  return get_svarint<int32_t>();
461  }
462 
470  uint32_t get_uint32() {
471  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
472  return get_varint<uint32_t>();
473  }
474 
482  int64_t get_int64() {
483  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
484  return get_varint<int64_t>();
485  }
486 
494  int64_t get_sint64() {
495  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
496  return get_svarint<int64_t>();
497  }
498 
506  uint64_t get_uint64() {
507  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
508  return get_varint<uint64_t>();
509  }
510 
518  uint32_t get_fixed32() {
519  protozero_assert(tag() != 0 && "call next() before accessing field value");
520  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
521  return get_fixed<uint32_t>();
522  }
523 
531  int32_t get_sfixed32() {
532  protozero_assert(tag() != 0 && "call next() before accessing field value");
533  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
534  return get_fixed<int32_t>();
535  }
536 
544  uint64_t get_fixed64() {
545  protozero_assert(tag() != 0 && "call next() before accessing field value");
546  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
547  return get_fixed<uint64_t>();
548  }
549 
557  int64_t get_sfixed64() {
558  protozero_assert(tag() != 0 && "call next() before accessing field value");
559  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
560  return get_fixed<int64_t>();
561  }
562 
570  float get_float() {
571  protozero_assert(tag() != 0 && "call next() before accessing field value");
572  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
573  return get_fixed<float>();
574  }
575 
583  double get_double() {
584  protozero_assert(tag() != 0 && "call next() before accessing field value");
585  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
586  return get_fixed<double>();
587  }
588 
599  protozero_assert(tag() != 0 && "call next() before accessing field value");
600  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
601  const auto len = get_len_and_skip();
602  return data_view{m_data-len, len};
603  }
604 
605 #ifndef PROTOZERO_STRICT_API
606 
614  std::pair<const char*, pbf_length_type> get_data() {
615  protozero_assert(tag() != 0 && "call next() before accessing field value");
616  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
617  const auto len = get_len_and_skip();
618  return std::make_pair(m_data-len, len);
619  }
620 #endif
621 
629  std::string get_bytes() {
630  return std::string(get_view());
631  }
632 
640  std::string get_string() {
641  return std::string(get_view());
642  }
643 
652  return pbf_reader(get_view());
653  }
654 
656 
659 
662 
665 
668 
671 
674 
677 
680 
682 
696  return get_packed<pbf_reader::const_bool_iterator>();
697  }
698 
709  return get_packed<pbf_reader::const_enum_iterator>();
710  }
711 
722  return get_packed<pbf_reader::const_int32_iterator>();
723  }
724 
735  return get_packed<pbf_reader::const_sint32_iterator>();
736  }
737 
748  return get_packed<pbf_reader::const_uint32_iterator>();
749  }
750 
761  return get_packed<pbf_reader::const_int64_iterator>();
762  }
763 
774  return get_packed<pbf_reader::const_sint64_iterator>();
775  }
776 
787  return get_packed<pbf_reader::const_uint64_iterator>();
788  }
789 
799  auto get_packed_fixed32() -> decltype(packed_fixed<uint32_t>()) {
800  return packed_fixed<uint32_t>();
801  }
802 
812  auto get_packed_sfixed32() -> decltype(packed_fixed<int32_t>()) {
813  return packed_fixed<int32_t>();
814  }
815 
825  auto get_packed_fixed64() -> decltype(packed_fixed<uint64_t>()) {
826  return packed_fixed<uint64_t>();
827  }
828 
838  auto get_packed_sfixed64() -> decltype(packed_fixed<int64_t>()) {
839  return packed_fixed<int64_t>();
840  }
841 
851  auto get_packed_float() -> decltype(packed_fixed<float>()) {
852  return packed_fixed<float>();
853  }
854 
864  auto get_packed_double() -> decltype(packed_fixed<double>()) {
865  return packed_fixed<double>();
866  }
867 
869 
870 }; // class pbf_reader
871 
878 inline void swap(pbf_reader& lhs, pbf_reader& rhs) noexcept {
879  lhs.swap(rhs);
880 }
881 
882 } // end namespace protozero
883 
884 #endif // PROTOZERO_PBF_READER_HPP
pbf_reader(const data_view &view) noexcept
Definition: pbf_reader.hpp:148
int64_t get_sfixed64()
Definition: pbf_reader.hpp:557
uint32_t get_uint32()
Definition: pbf_reader.hpp:470
uint64_t get_fixed64()
Definition: pbf_reader.hpp:544
int32_t get_sfixed32()
Definition: pbf_reader.hpp:531
Definition: exception.hpp:48
uint64_t get_uint64()
Definition: pbf_reader.hpp:506
auto get_packed_double() -> decltype(packed_fixed< double >())
Definition: pbf_reader.hpp:864
Definition: iterators.hpp:146
int32_t get_int32()
Definition: pbf_reader.hpp:446
pbf_reader(const std::string &data) noexcept
Definition: pbf_reader.hpp:199
iterator_range< pbf_reader::const_sint64_iterator > get_packed_sint64()
Definition: pbf_reader.hpp:773
auto get_packed_float() -> decltype(packed_fixed< float >())
Definition: pbf_reader.hpp:851
auto get_packed_fixed32() -> decltype(packed_fixed< uint32_t >())
Definition: pbf_reader.hpp:799
void swap(pbf_reader &lhs, pbf_reader &rhs) noexcept
Definition: pbf_reader.hpp:878
Contains macro checks for different configurations.
Contains the declaration of low-level types used in the pbf format.
bool has_wire_type(pbf_wire_type type) const noexcept
Definition: pbf_reader.hpp:377
std::size_t length() const noexcept
Definition: pbf_reader.hpp:257
void skip()
Definition: pbf_reader.hpp:387
iterator_range< pbf_reader::const_int32_iterator > get_packed_int32()
Definition: pbf_reader.hpp:721
pbf_reader get_message()
Definition: pbf_reader.hpp:651
void swap(iterator_range< T > &lhs, iterator_range< T > &rhs) noexcept
Definition: iterators.hpp:137
pbf_reader() noexcept=default
iterator_range< pbf_reader::const_int64_iterator > get_packed_int64()
Definition: pbf_reader.hpp:760
void skip_varint(const char **data, const char *end)
Definition: varint.hpp:112
auto get_packed_sfixed64() -> decltype(packed_fixed< int64_t >())
Definition: pbf_reader.hpp:838
Contains the iterators for access to packed repeated fields.
auto get_packed_sfixed32() -> decltype(packed_fixed< int32_t >())
Definition: pbf_reader.hpp:812
pbf_wire_type
Definition: types.hpp:39
iterator_range< pbf_reader::const_uint64_iterator > get_packed_uint64()
Definition: pbf_reader.hpp:786
iterator_range< pbf_reader::const_sint32_iterator > get_packed_sint32()
Definition: pbf_reader.hpp:734
pbf_wire_type wire_type() const noexcept
Definition: pbf_reader.hpp:367
void swap(pbf_reader &other) noexcept
Definition: pbf_reader.hpp:231
int64_t get_sint64()
Definition: pbf_reader.hpp:494
bool next(pbf_tag_type next_tag)
Definition: pbf_reader.hpp:328
std::pair< const char *, pbf_length_type > get_data()
Definition: pbf_reader.hpp:614
auto get_packed_fixed64() -> decltype(packed_fixed< uint64_t >())
Definition: pbf_reader.hpp:825
iterator_range< pbf_reader::const_bool_iterator > get_packed_bool()
Definition: pbf_reader.hpp:695
Contains functions to swap bytes in values (for different endianness).
int32_t get_sint32()
Definition: pbf_reader.hpp:458
std::string get_bytes()
Definition: pbf_reader.hpp:629
double get_double()
Definition: pbf_reader.hpp:583
bool get_bool()
Definition: pbf_reader.hpp:419
std::string get_string()
Definition: pbf_reader.hpp:640
uint32_t pbf_length_type
Definition: types.hpp:51
Contains the exceptions used in the protozero library.
pbf_reader(const std::pair< const char *, std::size_t > &data) noexcept
Definition: pbf_reader.hpp:182
data_view get_view()
Definition: pbf_reader.hpp:598
pbf_reader(const char *data, std::size_t size) noexcept
Definition: pbf_reader.hpp:165
uint32_t pbf_tag_type
Definition: types.hpp:32
uint32_t get_fixed32()
Definition: pbf_reader.hpp:518
iterator_range< pbf_reader::const_enum_iterator > get_packed_enum()
Definition: pbf_reader.hpp:708
Definition: types.hpp:63
pbf_tag_type tag() const noexcept
Definition: pbf_reader.hpp:348
Definition: iterators.hpp:282
Definition: iterators.hpp:215
int32_t get_enum()
Definition: pbf_reader.hpp:434
Definition: pbf_reader.hpp:60
Definition: iterators.hpp:38
float get_float()
Definition: pbf_reader.hpp:570
Definition: exception.hpp:61
Contains low-level varint and zigzag encoding and decoding functions.
uint64_t decode_varint(const char **data, const char *end)
Definition: varint.hpp:89
bool next()
Definition: pbf_reader.hpp:276
int64_t get_int64()
Definition: pbf_reader.hpp:482
iterator_range< pbf_reader::const_uint32_iterator > get_packed_uint32()
Definition: pbf_reader.hpp:747
int64_t decode_zigzag64(uint64_t value) noexcept
Definition: varint.hpp:181
All parts of the protozero header-only library are in this namespace.
Definition: byteswap.hpp:24