ALib C++ Framework
by
Library Version: 2605 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
parser_impl.hpp
Go to the documentation of this file.
1//==================================================================================================
2/// \file
3/// This header-file is part of module \alib_expressions of the \aliblong.
4///
5/// Copyright 2013-2026 A-Worx GmbH, Germany.
6/// Published under #"mainpage_license".
7//==================================================================================================
8ALIB_EXPORT namespace alib { namespace expressions { namespace detail {
9
10//==================================================================================================
11/// Implementation of the default parser of module \alib_expressions_nl.
12///
13/// This internal class is not too well documented. Nevertheless, it is designed to be able
14/// to tweak its behavior slightly and in case of need, even derive and use a custom parser
15/// class. For doing so, please consult the source code of this class.
16/// A custom parser might be set to protected field #"Compiler::parser;*"by a derived
17/// compiler type before compiling a first expression.
18///
19/// It is possible to define scannable custom unary and binary operators. Definitions of binary
20/// operators include a "precedence value" that allows aligning them with the built-in types.
21/// Also, built-in operators can be \em removed if wanted.
22//==================================================================================================
23class ParserImpl : public Parser {
24 protected:
25 /// Types of tokens.
26 enum class Tokens : char {
27 EOT = '\0', ///< End of tokens. (No next token available.)
28 SymbolicOp = 'O' , ///< A symbolic operator. Can be unary or binary.
29 AlphaUnOp = 'U' , ///< An alphabetic unary operator.
30 AlphaBinOp = 'B' , ///< An alphabetic binary operator.
31
32 LitString = 'S' , ///< a string literal.
33 LitInteger = 'I' , ///< a integer literal.
34 LitFloat = 'F' , ///< a float literal.
35
36 Identifier = 'A' , ///< An identifier.
37
38 BraceOpen = '(' , ///< An opening brace.
39 BraceClose = ')' , ///< A closing brace.
40 Comma = ',' , ///< A comma.
41
42 SubscriptOpen = '[' , ///< An opening subscript brace.
43 SubscriptClose = ']' , ///< A closing subscript brace.
44 };
45
46 /// Memory for temporary allocations, like AST objects or literal strings with converted
47 /// escape sequences. Provided by the compiler with the method #".Parse".
49
50 /// The actual token type.
52
53 /// The actual token type.
55
56 /// Integer value of token (if applicable).
58
59 /// Float value of token (if applicable).
60 double tokFloat;
61
62 /// String value of token (if applicable).
64
65 /// The position of the token in #".expression".
67
68 /// The compiler that this parser works for.
70
71 /// Used for scanning literals. Provided to this class with each parse request.
73
74
75 /// The given expression to parse.
77
78 /// The rest of #".expression".
80
81 /// Lists single characters that get directly converted into tokens of corresponding type
82 /// when found in the expression string. Tokens are <c>"()[],"</c>.
84
85 /// Lists single characters that got found in operator strings which have been registered
86 /// with
87 /// #"Compiler::AddUnaryOperator;*" and
88 /// #"Compiler::AddBinaryOperator;*".
89 ///
90 /// Used by the internal token scanner (lexer) and by default will become something like
91 /// <c>"=+-*%/?:~!|&^!<>/%"</c>.
92 /// when found in the expression string.
94
95 /// Hash set of unary operators. The key of the table is the operator string, which usually
96 /// consists of one character, like <c>'-'</c> or <c>'!'</c>.
97 ///
98 /// This table is filled in the constructor of the class with the values stored in
99 /// #"Compiler::UnaryOperators;*" and used for testing of existence.
101 String,
102 alib::hash_string_ignore_case <character>,
103 alib::equal_to_string_ignore_case<character> > unaryOperators;
104
105 /// Hash set of binary operators. The key of the table is the operator string, which usually
106 /// consists of one to three characters, like <c>'+'</c> or <c>'<<='</c>.
107 ///
108 /// This table is filled in the constructor of the class with the values stored in
109 /// #"Compiler::BinaryOperators;*" and used for testing of existence.
111 String,
112 alib::hash_string_ignore_case <character>,
113 alib::equal_to_string_ignore_case<character> > binaryOperators;
114
115 /// List of ASTs currently created in recursion.
116 /// \note
117 /// This vector is created in the monotonic allocator and never even deleted, as all
118 /// inserted #"%AST" elements, exclusively allocate from the same temporary allocator.
120
121 //################################################################################################
122 // Constructor/destructor, interface
123 //################################################################################################
124 public:
125 /// Constructor.
126 /// @param compiler The compiler that this parser works for.
127 /// @param allocator A monotonic allocator for permanent allocations.
129
130 /// Virtual destructor.
131 virtual ~ParserImpl() override {}
132
133 /// Parses the given expression string.
134 /// \note
135 /// The return value is hidden by using <c>void*</c>. This is to allow avoid flooding
136 /// of \c boost header includes files to the code entities using module \alib_expressions_nl.
137 ///
138 /// @param exprString The string to parse.
139 /// @param nf Used to scan number literals.
140 /// @return The abstract syntax tree representing the expression.
141 ALIB_DLL virtual
142 detail::AST* Parse( const String& exprString, NumberFormat* nf ) override;
143
144
145 protected:
146 //################################################################################################
147 // Lexer
148 //################################################################################################
149 /// This is the "scanner" or "lexer" method.
150 void NextToken();
151
152
153 //################################################################################################
154 // Parser
155 //################################################################################################
156
157 /// Tests if the actual token represents a known unary operator.
158 /// @return Returns the binary operator symbol, respectively a \e nulled string on failure.
161
162 /// Tests if the actual token represents a known binary operator.
163 /// @return Returns the binary operator symbol, respectively a \e nulled string on failure.
166
167 /// Internal method that optionally parses a conditional operator (<c>Q ? T : F</c> )
168 ///
169 /// @return T.
171
172 /// Internal method that optionally parses a binary operator and levels (recursively)
173 /// trees of such according to operator precedence and brackets given.
174 /// @return T.
175 AST* parseBinary();
176
177 /// Parses unary ops, literals, identifiers, functions and expressions surrounded by
178 /// brackets.
179 /// @return The abstract syntax tree node parsed.
180 AST* parseSimple();
181
182 /// Invoked after an identifier or function was parsed. Tests for subscript
183 /// operator, otherwise returns the given ast as is.
184 /// @param function The identifier or function parsed.
185 /// @return Either the given node or a node of type #"detail::ASTBinaryOp"
186 /// with \c lhs set to \p{function}, \c rhs to the parsed subscript arg and
187 /// operator set to <c>'[]'</c>.
188 AST* parseSubscript( AST* function );
189
190
191
192 /// Simple shortcut popping and returning last ast from the current list.
193 /// @return Popped AST object.
194 AST* pop() {
195 AST* ast= ASTs->back();
196 ASTs->pop_back();
197 return ast;
198 }
199
200 /// Simple shortcut pushing an ast to current list and returning it.
201 /// @param ast The AST node to push.
202 /// @return Popped AST object.
203 AST* push( AST* ast ) { ASTs->emplace_back(ast); return ast; }
204
205 /// Simple shortcut to the topmost AST.
206 /// @return The topmost AST object.
207 AST* top() { return ASTs->back(); }
208
209 /// Simple shortcut replacing the topmost ast.
210 /// @param ast The new AST node to replace the existing one with.
211 /// @return The given object.
212 AST* replace( AST* ast ) { ASTs->back()= ast; return ast; }
213}; // class ParserImpl
214
215
216}}} // namespace [alib::expressions::detail]
#define ALIB_DLL
#define ALIB_EXPORT
Tokens token
The actual token type.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > unaryOperators
Compiler & compiler
The compiler that this parser works for.
ASTLiteral::NFHint tokLiteralHint
The actual token type.
double tokFloat
Float value of token (if applicable).
virtual detail::AST * Parse(const String &exprString, NumberFormat *nf) override
String expression
The given expression to parse.
integer tokPosition
The position of the token in #".expression".
String tokString
String value of token (if applicable).
integer tokInteger
Integer value of token (if applicable).
ParserImpl(Compiler &compiler, MonoAllocator &allocator)
virtual ~ParserImpl() override
Virtual destructor.
@ SubscriptClose
A closing subscript brace.
@ EOT
End of tokens. (No next token available.).
@ AlphaBinOp
An alphabetic binary operator.
@ SymbolicOp
A symbolic operator. Can be unary or binary.
@ AlphaUnOp
An alphabetic unary operator.
@ SubscriptOpen
An opening subscript brace.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > binaryOperators
void NextToken()
This is the "scanner" or "lexer" method.
Substring scanner
The rest of #".expression".
NumberFormat * numberFormat
Used for scanning literals. Provided to this class with each parse request.
Definition alox.cpp:14
monomem::TMonoAllocator< lang::HeapAllocator > MonoAllocator
strings::TNumberFormat< character > NumberFormat
Type alias in namespace #"%alib".
containers::HashSet< TAllocator, T, THash, TEqual, THashCaching, TRecycling > HashSet
Type alias in namespace #"%alib". See type definition #"alib::containers::HashSet".
lang::integer integer
Type alias in namespace #"%alib".
Definition integers.hpp:149
strings::TString< character > String
Type alias in namespace #"%alib".
Definition string.hpp:2165
strings::TSubstring< character > Substring
Type alias in namespace #"%alib".
lang::TBitSet< int, TEnd, TBegin > BitSet
Type alias in namespace #"%alib".
Definition bitset.hpp:813
std::vector< T, StdMA< T > > StdVectorMA
Type alias in namespace #"%alib".
This detail class constitutes an abstract base class for expression parsers.
Definition parser.hpp:14