forked from TUBITAK-TUTEL/verible
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathverilog_lexical_context.h
369 lines (296 loc) · 12.1 KB
/
verilog_lexical_context.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef VERIBLE_VERILOG_PARSER_VERILOG_LEXICAL_CONTEXT_H_
#define VERIBLE_VERILOG_PARSER_VERILOG_LEXICAL_CONTEXT_H_
#include <iosfwd>
#include <iterator>
#include <stack>
#include <vector>
#include "common/text/token_info.h"
#include "common/text/token_stream_view.h"
#include "common/util/with_reason.h"
namespace verilog {
// TODO(fangism): move all of these _classes into an internal namespace.
// Helper state machine to parse optional labels after certain keywords.
class _KeywordLabelStateMachine {
public:
// Updates the state machine, by looking ahead at the next token's enum.
void UpdateState(int);
// Returns true if a statement or item could start in this state.
bool ItemMayStart() const {
return state_ == kItemStart || state_ == kGotLabelableKeyword;
}
private:
enum State {
kItemStart, // Could be the start of an item.
kItemMiddle, // After the start of an item.
kGotLabelableKeyword, // Seen a keyword that can accept a label.
kGotColonExpectingLabel,
};
State state_ = kItemStart;
};
// Helper state machine for tracking constraint_block and constraint_set in the
// grammar.
class _ConstraintBlockStateMachine {
public:
_ConstraintBlockStateMachine() = default;
bool IsActive() const { return !states_.empty(); }
// Updates the state machine, by looking ahead at the next token's enum.
void UpdateState(int);
// Returns disambiguated enum for '->' token.
int InterpretToken(int token_enum) const;
// Show representation (for debugging).
std::ostream& Dump(std::ostream&) const;
private:
void DeferInvalidToken(int token_enum);
// See grammar for constraint_block_item and constraint_expression.
enum State {
kBeginningOfBlockItemOrExpression, // list item (home state)
// kIgnoreUntilSemicolon is applicable to:
// "soft ...;"
// "unique { ... };"
// "disable soft ...;"
// "solve ... before ...;" (from constraint_block_item)
kIgnoreUntilSemicolon,
// constraint_expression
// : expression_or_dist ;
// | expression -> constraint_set
kExpectingExpressionOrImplication,
kGotIf, // if ...
kGotForeach, // foreach ...
// constraint_set
// : constraint_expression
// | '{' { constraint_expression , }** '}'
//
// This is the final nonterminal for: if-clause, else-clause, foreach-body,
// and RHS of expression -> constraint_set (constraint-implication)
kExpectingConstraintSet,
kInParenExpression, // balance until ')'
kInBraceExpression, // balance until '}'
};
// Constraint sets are nestable, so we need a stack.
// Each level of this stack represents a level of constraint block or
// constraint set, both of which are wrapped in { }.
std::stack<State> states_;
};
inline std::ostream& operator<<(std::ostream& os,
const _ConstraintBlockStateMachine& s) {
return s.Dump(os);
}
// Helper state machine to parse randomize calls.
class _RandomizeCallStateMachine {
public:
bool IsActive() const { return state_ != kNone; }
// Updates the state machine, by looking ahead at the next token's enum.
void UpdateState(int);
int InterpretToken(int) const;
private:
enum State {
kNone, // Not in a andomize call.
kGotRandomizeKeyword,
kOpenedVariableList,
kClosedVariableList,
kGotWithKeyword,
kInsideWithIdentifierList,
kExpectConstraintBlock,
kInsideConstraintBlock,
};
// TODO(fangism): do we need a stack? can randomize appear inside a
// randomize_call?
State state_ = kNone;
// Nested state machine.
_ConstraintBlockStateMachine constraint_block_tracker_;
};
// Helper state machine to parse (non-extern) constraint declarations.
class _ConstraintDeclarationStateMachine {
public:
bool IsActive() const { return state_ != kNone; }
// Updates the state machine, by looking ahead at the next token's enum.
void UpdateState(int);
int InterpretToken(int) const;
private:
enum State {
kNone,
kGotConstraintKeyword,
kGotConstraintIdentifier,
// TODO(fangism): handle out-of-line definitions: constraint foo::bar ...
kInsideConstraintBlock,
};
State state_ = kNone;
// Nested state machine.
_ConstraintBlockStateMachine constraint_block_tracker_;
};
// This state machine keeps track of semicolons in a range enclosed by
// a pair of (keyword) tokens. This is useful in disambiguating between
// grammatic constructs that can conflict due to optionality of a former
// list. See the definition bodies of property_declaration and
// sequence_declaration for examples.
// For additional fun, both declarations accept an optional ';' right before
// the terminating keyword, but that one should *not* count as the 'last'.
class _LastSemicolonStateMachine {
public:
_LastSemicolonStateMachine(int trigger, int stop, int replacement)
: trigger_token_enum_(trigger),
finish_token_enum_(stop),
semicolon_replacement_(replacement) {}
void UpdateState(verible::TokenInfo*);
protected:
enum State {
kNone,
kActive, // in betwen two keywords
};
// This is the token_enum that activates this state machine.
const int trigger_token_enum_;
// This is the token_enum that de-activates this state machine.
const int finish_token_enum_;
// This is the token_enum that should replace the last ';'.
const int semicolon_replacement_;
State state_ = kNone;
// Keeps track of the last semicolons. Upon de-activation, the last
// semicolon will be replaced. Technically, we only need a two-slot queue,
// but a CircularBuffer is overkill.
std::stack<verible::TokenInfo*> semicolons_;
// One token look-back.
verible::TokenInfo* previous_token_ = nullptr;
};
// A structure for tracking context needed to disambiguate tokens.
// The main input is a token stream coming from a lexer, and the main consumer
// is a parser that accepts a token stream.
// The vast majority of tokens should pass through unchanged.
// The ones that are changed are those that require context-based
// disambiguation.
// This should be designed in a manner that is forgiving of invalid inputs,
// i.e. improperly balanced code should never cause fatal errors.
// This class should maintain just enough state to correctly
// transform token enums on *valid* lexical streams.
//
// Design philosophy: This class itself is a state machine while employing
// smaller, simpler, concurrent state machines.
// The constituent state machines also scan the input token stream and
// update their states accordingly.
// The smaller state machines will be inactive most of the time, and activated
// on certain keywords in certain states.
class LexicalContext {
public:
LexicalContext();
~LexicalContext() = default;
// Not copy-able.
LexicalContext(const LexicalContext&) = delete;
LexicalContext& operator=(const LexicalContext&) = delete;
// Re-writes some token enums in-place using context-sensitivity.
// This function must re-tag tokens enumerated (_TK_*), see verilog.y and
// verilog.lex for all such enumerations.
// This function must accept both valid and invalid inputs, but is only
// required to operate correctly on valid inputs.
// Postcondition: tokens_view's tokens must not be tagged with (_TK_*)
// enumerations.
void TransformVerilogSymbols(
const verible::TokenStreamReferenceView& tokens_view) {
// TODO(fangism): Using a stream interface would further decouple the input
// iteration from output iteration.
for (auto iter : tokens_view) {
_AdvanceToken(&*iter);
}
}
protected: // Allow direct testing of some methods.
// Reads a single token, and may alter it depending on internal state.
void _AdvanceToken(verible::TokenInfo*);
// Changes the enum of a token where disambiguation is needed.
int _InterpretToken(int token_enum) const;
// Changes the enum of a token (in-place) without changing internal state.
void _MutateToken(verible::TokenInfo* token) const {
token->set_token_enum(_InterpretToken(token->token_enum()));
}
// Updates the internally tracked state without touching the token.
void _UpdateState(const verible::TokenInfo& token);
// State functions:
bool ExpectingStatement() const;
verible::WithReason<bool> ExpectingBodyItemStart() const;
bool InFlowControlHeader() const;
bool InModuleDeclarationHeader() const {
return in_module_declaration_ && !in_module_body_;
}
bool InFunctionDeclarationHeader() const {
return in_function_declaration_ && !in_function_body_;
}
bool InTaskDeclarationHeader() const {
return in_task_declaration_ && !in_task_body_;
}
bool InAnyDeclaration() const;
bool InAnyDeclarationHeader() const;
bool InStatementContext() const {
return in_function_body_ || in_task_body_ ||
in_initial_always_final_construct_;
}
const verible::TokenInfo* previous_token_ = nullptr;
// Non-nestable states can be represented without a stack.
// Do not bother trying to accommodate malformed input token sequences.
bool in_module_declaration_ = false;
bool in_module_body_ = false;
bool in_initial_always_final_construct_ = false;
bool in_function_declaration_ = false;
bool in_function_body_ = false;
bool in_task_declaration_ = false;
bool in_task_body_ = false;
// TODO(fangism): class_declaration, interface_declaration, udp_declaration...
// Extern declarations cannot be nested, so a single bool suffices.
bool in_extern_declaration_ = false;
bool previous_token_finished_header_ = true;
// Nestable states need to be tracked with a stack.
// Tracks if, for, case blocks.
struct FlowControlState {
const verible::TokenInfo* start;
// When this is false, the state is still in the header, which is:
// if (...)
// for (...)
// case (...) (including other case-variants)
bool in_body = false; // starts in header state
explicit FlowControlState(const verible::TokenInfo* token) : start(token) {}
};
std::vector<FlowControlState> flow_control_stack_;
// Tracks optional labels after certain keywords.
_KeywordLabelStateMachine keyword_label_tracker_;
// Tracks parsing state inside randomize_call.
_RandomizeCallStateMachine randomize_call_tracker_;
// Tracks parsing state inside randomize_call.
_ConstraintDeclarationStateMachine constraint_declaration_tracker_;
// Tracks last semicolon in property_declarations so that it can be
// re-enumerated to help disambiguate.
_LastSemicolonStateMachine property_declaration_tracker_;
// Tracks last semicolon in sequence_declarations so that it can be
// re-enumerated to help disambiguate.
_LastSemicolonStateMachine sequence_declaration_tracker_;
// Tracks begin-end paired sequence blocks in all contexts (generate blocks,
// function/task statements, flow-control constructs...).
// Every 'begin' token will be pushed onto this stack.
// Every 'end' token will pop the stack (safely).
// Accepts invalid input, which does not guarantee begin-end balancing.
// Does not care about optional labels after these keywords.
//
// e.g.
// ... // stack initially empty
// begin // pushes onto this stack
// ...
// begin // pushes onto this stack
// ...
// end // pops off of this stack
// ...
// end // pops off of this stack
//
std::vector<const verible::TokenInfo*> block_stack_;
// Tracks open-close paired tokens like parentheses and brackets and braces.
std::vector<const verible::TokenInfo*> balance_stack_;
};
} // namespace verilog
#endif // VERIBLE_VERILOG_PARSER_VERILOG_LEXICAL_CONTEXT_H_