Horizon
parser.hpp
1 #pragma once
2 
3 #include <cmath> // isfinite
4 #include <cstdint> // uint8_t
5 #include <functional> // function
6 #include <string> // string
7 #include <utility> // move
8 #include <vector> // vector
9 
10 #include <nlohmann/detail/exceptions.hpp>
11 #include <nlohmann/detail/input/input_adapters.hpp>
12 #include <nlohmann/detail/input/json_sax.hpp>
13 #include <nlohmann/detail/input/lexer.hpp>
14 #include <nlohmann/detail/macro_scope.hpp>
15 #include <nlohmann/detail/meta/is_sax.hpp>
16 #include <nlohmann/detail/value_t.hpp>
17 
18 namespace nlohmann
19 {
20 namespace detail
21 {
23 // parser //
25 
26 enum class parse_event_t : uint8_t
27 {
31  object_end,
35  array_end,
37  key,
39  value
40 };
41 
42 template<typename BasicJsonType>
43 using parser_callback_t =
44  std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
45 
51 template<typename BasicJsonType, typename InputAdapterType>
52 class parser
53 {
54  using number_integer_t = typename BasicJsonType::number_integer_t;
55  using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
56  using number_float_t = typename BasicJsonType::number_float_t;
57  using string_t = typename BasicJsonType::string_t;
59  using token_type = typename lexer_t::token_type;
60 
61  public:
63  explicit parser(InputAdapterType&& adapter,
64  const parser_callback_t<BasicJsonType> cb = nullptr,
65  const bool allow_exceptions_ = true,
66  const bool skip_comments = false)
67  : callback(cb)
68  , m_lexer(std::move(adapter), skip_comments)
69  , allow_exceptions(allow_exceptions_)
70  {
71  // read first token
72  get_token();
73  }
74 
85  void parse(const bool strict, BasicJsonType& result)
86  {
87  if (callback)
88  {
89  json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
90  sax_parse_internal(&sdp);
91  result.assert_invariant();
92 
93  // in strict mode, input must be completely read
94  if (strict && (get_token() != token_type::end_of_input))
95  {
96  sdp.parse_error(m_lexer.get_position(),
97  m_lexer.get_token_string(),
98  parse_error::create(101, m_lexer.get_position(),
99  exception_message(token_type::end_of_input, "value")));
100  }
101 
102  // in case of an error, return discarded value
103  if (sdp.is_errored())
104  {
105  result = value_t::discarded;
106  return;
107  }
108 
109  // set top-level value to null if it was discarded by the callback
110  // function
111  if (result.is_discarded())
112  {
113  result = nullptr;
114  }
115  }
116  else
117  {
118  json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
119  sax_parse_internal(&sdp);
120  result.assert_invariant();
121 
122  // in strict mode, input must be completely read
123  if (strict && (get_token() != token_type::end_of_input))
124  {
125  sdp.parse_error(m_lexer.get_position(),
126  m_lexer.get_token_string(),
127  parse_error::create(101, m_lexer.get_position(),
128  exception_message(token_type::end_of_input, "value")));
129  }
130 
131  // in case of an error, return discarded value
132  if (sdp.is_errored())
133  {
134  result = value_t::discarded;
135  return;
136  }
137  }
138  }
139 
146  bool accept(const bool strict = true)
147  {
149  return sax_parse(&sax_acceptor, strict);
150  }
151 
152  template<typename SAX>
153  JSON_HEDLEY_NON_NULL(2)
154  bool sax_parse(SAX* sax, const bool strict = true)
155  {
157  const bool result = sax_parse_internal(sax);
158 
159  // strict mode: next byte must be EOF
160  if (result && strict && (get_token() != token_type::end_of_input))
161  {
162  return sax->parse_error(m_lexer.get_position(),
163  m_lexer.get_token_string(),
164  parse_error::create(101, m_lexer.get_position(),
165  exception_message(token_type::end_of_input, "value")));
166  }
167 
168  return result;
169  }
170 
171  private:
172  template<typename SAX>
173  JSON_HEDLEY_NON_NULL(2)
174  bool sax_parse_internal(SAX* sax)
175  {
176  // stack to remember the hierarchy of structured values we are parsing
177  // true = array; false = object
178  std::vector<bool> states;
179  // value to avoid a goto (see comment where set to true)
180  bool skip_to_state_evaluation = false;
181 
182  while (true)
183  {
184  if (!skip_to_state_evaluation)
185  {
186  // invariant: get_token() was called before each iteration
187  switch (last_token)
188  {
189  case token_type::begin_object:
190  {
191  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
192  {
193  return false;
194  }
195 
196  // closing } -> we are done
197  if (get_token() == token_type::end_object)
198  {
199  if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
200  {
201  return false;
202  }
203  break;
204  }
205 
206  // parse key
207  if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
208  {
209  return sax->parse_error(m_lexer.get_position(),
210  m_lexer.get_token_string(),
211  parse_error::create(101, m_lexer.get_position(),
212  exception_message(token_type::value_string, "object key")));
213  }
214  if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
215  {
216  return false;
217  }
218 
219  // parse separator (:)
220  if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
221  {
222  return sax->parse_error(m_lexer.get_position(),
223  m_lexer.get_token_string(),
224  parse_error::create(101, m_lexer.get_position(),
225  exception_message(token_type::name_separator, "object separator")));
226  }
227 
228  // remember we are now inside an object
229  states.push_back(false);
230 
231  // parse values
232  get_token();
233  continue;
234  }
235 
236  case token_type::begin_array:
237  {
238  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
239  {
240  return false;
241  }
242 
243  // closing ] -> we are done
244  if (get_token() == token_type::end_array)
245  {
246  if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
247  {
248  return false;
249  }
250  break;
251  }
252 
253  // remember we are now inside an array
254  states.push_back(true);
255 
256  // parse values (no need to call get_token)
257  continue;
258  }
259 
260  case token_type::value_float:
261  {
262  const auto res = m_lexer.get_number_float();
263 
264  if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res)))
265  {
266  return sax->parse_error(m_lexer.get_position(),
267  m_lexer.get_token_string(),
268  out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'"));
269  }
270 
271  if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
272  {
273  return false;
274  }
275 
276  break;
277  }
278 
279  case token_type::literal_false:
280  {
281  if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
282  {
283  return false;
284  }
285  break;
286  }
287 
288  case token_type::literal_null:
289  {
290  if (JSON_HEDLEY_UNLIKELY(!sax->null()))
291  {
292  return false;
293  }
294  break;
295  }
296 
297  case token_type::literal_true:
298  {
299  if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
300  {
301  return false;
302  }
303  break;
304  }
305 
306  case token_type::value_integer:
307  {
308  if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
309  {
310  return false;
311  }
312  break;
313  }
314 
315  case token_type::value_string:
316  {
317  if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
318  {
319  return false;
320  }
321  break;
322  }
323 
324  case token_type::value_unsigned:
325  {
326  if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
327  {
328  return false;
329  }
330  break;
331  }
332 
333  case token_type::parse_error:
334  {
335  // using "uninitialized" to avoid "expected" message
336  return sax->parse_error(m_lexer.get_position(),
337  m_lexer.get_token_string(),
338  parse_error::create(101, m_lexer.get_position(),
339  exception_message(token_type::uninitialized, "value")));
340  }
341 
342  default: // the last token was unexpected
343  {
344  return sax->parse_error(m_lexer.get_position(),
345  m_lexer.get_token_string(),
346  parse_error::create(101, m_lexer.get_position(),
347  exception_message(token_type::literal_or_value, "value")));
348  }
349  }
350  }
351  else
352  {
353  skip_to_state_evaluation = false;
354  }
355 
356  // we reached this line after we successfully parsed a value
357  if (states.empty())
358  {
359  // empty stack: we reached the end of the hierarchy: done
360  return true;
361  }
362 
363  if (states.back()) // array
364  {
365  // comma -> next value
366  if (get_token() == token_type::value_separator)
367  {
368  // parse a new value
369  get_token();
370  continue;
371  }
372 
373  // closing ]
374  if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
375  {
376  if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
377  {
378  return false;
379  }
380 
381  // We are done with this array. Before we can parse a
382  // new value, we need to evaluate the new state first.
383  // By setting skip_to_state_evaluation to false, we
384  // are effectively jumping to the beginning of this if.
385  JSON_ASSERT(!states.empty());
386  states.pop_back();
387  skip_to_state_evaluation = true;
388  continue;
389  }
390 
391  return sax->parse_error(m_lexer.get_position(),
392  m_lexer.get_token_string(),
393  parse_error::create(101, m_lexer.get_position(),
394  exception_message(token_type::end_array, "array")));
395  }
396  else // object
397  {
398  // comma -> next value
399  if (get_token() == token_type::value_separator)
400  {
401  // parse key
402  if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
403  {
404  return sax->parse_error(m_lexer.get_position(),
405  m_lexer.get_token_string(),
406  parse_error::create(101, m_lexer.get_position(),
407  exception_message(token_type::value_string, "object key")));
408  }
409 
410  if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
411  {
412  return false;
413  }
414 
415  // parse separator (:)
416  if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
417  {
418  return sax->parse_error(m_lexer.get_position(),
419  m_lexer.get_token_string(),
420  parse_error::create(101, m_lexer.get_position(),
421  exception_message(token_type::name_separator, "object separator")));
422  }
423 
424  // parse values
425  get_token();
426  continue;
427  }
428 
429  // closing }
430  if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
431  {
432  if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
433  {
434  return false;
435  }
436 
437  // We are done with this object. Before we can parse a
438  // new value, we need to evaluate the new state first.
439  // By setting skip_to_state_evaluation to false, we
440  // are effectively jumping to the beginning of this if.
441  JSON_ASSERT(!states.empty());
442  states.pop_back();
443  skip_to_state_evaluation = true;
444  continue;
445  }
446 
447  return sax->parse_error(m_lexer.get_position(),
448  m_lexer.get_token_string(),
449  parse_error::create(101, m_lexer.get_position(),
450  exception_message(token_type::end_object, "object")));
451  }
452  }
453  }
454 
456  token_type get_token()
457  {
458  return last_token = m_lexer.scan();
459  }
460 
461  std::string exception_message(const token_type expected, const std::string& context)
462  {
463  std::string error_msg = "syntax error ";
464 
465  if (!context.empty())
466  {
467  error_msg += "while parsing " + context + " ";
468  }
469 
470  error_msg += "- ";
471 
472  if (last_token == token_type::parse_error)
473  {
474  error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
475  m_lexer.get_token_string() + "'";
476  }
477  else
478  {
479  error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
480  }
481 
482  if (expected != token_type::uninitialized)
483  {
484  error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
485  }
486 
487  return error_msg;
488  }
489 
490  private:
492  const parser_callback_t<BasicJsonType> callback = nullptr;
494  token_type last_token = token_type::uninitialized;
496  lexer_t m_lexer;
498  const bool allow_exceptions = true;
499 };
500 } // namespace detail
501 } // namespace nlohmann
Definition: json_sax.hpp:620
SAX implementation to create a JSON value from SAX events.
Definition: json_sax.hpp:150
JSON_HEDLEY_RETURNS_NON_NULL static JSON_HEDLEY_CONST const char * token_type_name(const token_type t) noexcept
return name of values of type token_type (only used for errors)
Definition: lexer.hpp:54
lexical analysis
Definition: lexer.hpp:104
string_t & get_string()
return current string value (implicitly resets the token; useful only once)
Definition: lexer.hpp:1422
constexpr position_t get_position() const noexcept
return position of last read token
Definition: lexer.hpp:1432
constexpr number_integer_t get_number_integer() const noexcept
return integer value
Definition: lexer.hpp:1404
constexpr JSON_HEDLEY_RETURNS_NON_NULL const char * get_error_message() const noexcept
return syntax error message
Definition: lexer.hpp:1465
constexpr number_unsigned_t get_number_unsigned() const noexcept
return unsigned integer value
Definition: lexer.hpp:1410
constexpr number_float_t get_number_float() const noexcept
return floating-point value
Definition: lexer.hpp:1416
std::string get_token_string() const
return the last read token (for errors only).
Definition: lexer.hpp:1440
static parse_error create(int id_, const position_t &pos, const std::string &what_arg)
create a parse error exception
Definition: exceptions.hpp:130
syntax analysis
Definition: parser.hpp:53
parser(InputAdapterType &&adapter, const parser_callback_t< BasicJsonType > cb=nullptr, const bool allow_exceptions_=true, const bool skip_comments=false)
a parser reading from an input adapter
Definition: parser.hpp:63
bool accept(const bool strict=true)
public accept interface
Definition: parser.hpp:146
void parse(const bool strict, BasicJsonType &result)
public parser interface
Definition: parser.hpp:85
zip_uint8_t uint8_t
zip_uint8_t typedef.
Definition: zip.hpp:78
@ discarded
discarded by the parser callback function
parse_event_t
Definition: parser.hpp:27
@ value
the parser finished reading a JSON value
@ key
the parser read a key of a value in an object
@ array_end
the parser read ] and finished processing a JSON array
@ array_start
the parser read [ and started to process a JSON array
@ object_start
the parser read { and started to process a JSON object
@ object_end
the parser read } and finished processing a JSON object
@ strict
throw a type_error exception in case of invalid UTF-8
namespace for Niels Lohmann
Definition: adl_serializer.hpp:9