Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_make_lalr1_parser.cpp
1#include "Teuchos_make_lalr1_parser.hpp"
2
3#include <map>
4#include <iostream>
5#include <cstdlib>
6#include <queue>
7#include <algorithm>
8#include <fstream>
9
10#include "Teuchos_vector.hpp"
11#include "Teuchos_Graph.hpp"
12#include "Teuchos_stack.hpp"
13#include "Teuchos_set.hpp"
14
15namespace Teuchos {
16
17/* The LALR(1) parser construction implemented here is based on
18 David Pager's work:
19
20 Pager, David.
21 "The lane-tracing algorithm for constructing LR (k) parsers
22 and ways of enhancing its efficiency."
23 Information Sciences 12.1 (1977): 19-42.
24
25 The identifiers used in this code are consistent with the terminology
26 in that paper, except where we bring in FIRST set terminology, which
27 Pager doesn't go into detail about. */
28
29Config::Config(int p, int d):
30 production(p),
31 dot(d)
32{
33}
34
35StateConfig::StateConfig(int s, int cis):
36 state(s),
37 config_in_state(cis)
38{
39}
40
41void swap(StateInProgress& a, StateInProgress& b) {
42 using std::swap;
43 swap(a.configs, b.configs);
44 swap(a.actions, b.actions);
45}
46
47// expand the grammar productions into marked productions
48static Configs make_configs(Grammar const& g) {
49 Configs configs;
50 for (int i = 0; i < Teuchos::size(g.productions); ++i) {
51 const Grammar::Production& production = at(g.productions, i);
52 for (int j = 0; j <= Teuchos::size(production.rhs); ++j) {
53 configs.push_back(Config(i,j));
54 }
55 }
56 return configs;
57}
58
59static Graph get_left_hand_sides_to_start_configs(
60 Configs const& cs, Grammar const& grammar) {
61 Graph lhs2sc = make_graph_with_nnodes(grammar.nsymbols);
62 for (int c_i = 0; c_i < Teuchos::size(cs); ++c_i) {
63 const Config& c = at(cs, c_i);
64 if (c.dot > 0) continue;
65 int p_i = c.production;
66 const Grammar::Production& p = at(grammar.productions, p_i);
67 add_edge(lhs2sc, p.lhs, c_i);
68 }
69 return lhs2sc;
70}
71
72struct StateCompare {
73 typedef StateInProgress const* Value;
74 bool operator()(Value const& a, Value const& b) const {
75 return a->configs < b->configs;
76 }
77};
78
79typedef std::map<StateInProgress const*, int, StateCompare> StatePtr2StateIndex;
80
81static void close(StateInProgress& state,
82 Configs const& cs, Grammar const& grammar,
83 Graph const& lhs2sc) {
84 std::queue<int> config_q;
85 std::set<int> config_set;
86 for (std::vector<int>::const_iterator it = state.configs.begin();
87 it != state.configs.end(); ++it) {
88 int config_i = *it;
89 config_q.push(config_i);
90 TEUCHOS_ASSERT(!config_set.count(config_i));
91 config_set.insert(config_i);
92 }
93 while (!config_q.empty()) {
94 int config_i = config_q.front(); config_q.pop();
95 const Config& config = at(cs, config_i);
96 int prod_i = config.production;
97 const Grammar::Production& prod = at(grammar.productions, prod_i);
98 if (config.dot == Teuchos::size(prod.rhs)) continue;
99 int symbol_after_dot = at(prod.rhs, config.dot);
100 if (is_terminal(grammar, symbol_after_dot)) continue;
101 const NodeEdges& edges = get_edges(lhs2sc, symbol_after_dot);
102 for (NodeEdges::const_iterator it = edges.begin();
103 it != edges.end(); ++it) {
104 int sc = *it;
105 if (!config_set.count(sc)) {
106 config_set.insert(sc);
107 config_q.push(sc);
108 }
109 }
110 }
111 state.configs.assign(config_set.begin(), config_set.end());
112}
113
114static void add_back(StatesInProgress& sips, StateInProgress& sip) {
115 using std::swap;
116 StateInProgressPtr ptr(new StateInProgress());
117 swap(*ptr, sip);
118 sips.push_back(ptr);
119}
120
121static void add_reduction_actions(StatesInProgress& states,
122 Configs const& cs, Grammar const& grammar) {
123 for (StatesInProgress::iterator it = states.begin();
124 it != states.end(); ++it) {
125 StateInProgressPtr& state_uptr = *it;
126 StateInProgress& state = *state_uptr;
127 for (std::vector<int>::const_iterator it2 = state.configs.begin();
128 it2 != state.configs.end(); ++it2) {
129 int config_i = *it2;
130 const Config& config = at(cs, config_i);
131 int prod_i = config.production;
132 const Grammar::Production& prod = at(grammar.productions, prod_i);
133 if (config.dot != Teuchos::size(prod.rhs)) continue;
134 ActionInProgress reduction;
135 reduction.action.kind = ACTION_REDUCE;
136 reduction.action.production = config.production;
137 state.actions.push_back(reduction);
138 }
139 }
140}
141
142static void set_lr0_contexts(
143 StatesInProgress& states,
144 Grammar const& grammar) {
145 for (StatesInProgress::iterator it = states.begin();
146 it != states.end(); ++it) {
147 StateInProgressPtr& state_uptr = *it;
148 StateInProgress& state = *state_uptr;
149 for (StateInProgress::Actions::iterator it2 = state.actions.begin();
150 it2 != state.actions.end(); ++it2) {
151 ActionInProgress& action = *it2;
152 if (action.action.kind != ACTION_REDUCE) continue;
153 if (action.action.production == get_accept_production(grammar)) {
154 action.context.insert(get_end_terminal(grammar));
155 } else {
156 for (int terminal = 0; terminal < grammar.nterminals; ++terminal) {
157 action.context.insert(terminal);
158 }
159 }
160 }
161 }
162}
163
164static StatesInProgress make_lr0_parser(Configs const& cs, Grammar const& grammar,
165 Graph const& lhs2sc) {
166 StatesInProgress states;
167 StatePtr2StateIndex state_ptrs2idxs;
168 std::queue<int> state_q;
169 { /* start state */
170 StateInProgress start_state;
171 int accept_nt = get_accept_nonterminal(grammar);
172 /* there should only be one start configuration for the accept symbol */
173 int start_accept_config = get_edges(lhs2sc, accept_nt).front();
174 start_state.configs.push_back(start_accept_config);
175 close(start_state, cs, grammar, lhs2sc);
176 int start_state_i = Teuchos::size(states);
177 state_q.push(start_state_i);
178 add_back(states, start_state);
179 state_ptrs2idxs[states.back().get()] = start_state_i;
180 }
181 while (!state_q.empty()) {
182 int state_i = state_q.front(); state_q.pop();
183 StateInProgress& state = *at(states, state_i);
184 std::set<int> transition_symbols;
185 for (std::vector<int>::const_iterator it = state.configs.begin();
186 it != state.configs.end(); ++it) {
187 int config_i = *it;
188 const Config& config = at(cs, config_i);
189 int prod_i = config.production;
190 const Grammar::Production& prod = at(grammar.productions, prod_i);
191 if (config.dot == Teuchos::size(prod.rhs)) continue;
192 int symbol_after_dot = at(prod.rhs, config.dot);
193 transition_symbols.insert(symbol_after_dot);
194 }
195 for (std::set<int>::const_iterator it = transition_symbols.begin();
196 it != transition_symbols.end(); ++it) {
197 int transition_symbol = *it;
198 StateInProgress next_state;
199 for (std::vector<int>::const_iterator it2 = state.configs.begin();
200 it2 != state.configs.end(); ++it2) {
201 int config_i = *it2;
202 const Config& config = at(cs, config_i);
203 int prod_i = config.production;
204 const Grammar::Production& prod = at(grammar.productions, prod_i);
205 if (config.dot == Teuchos::size(prod.rhs)) continue;
206 int symbol_after_dot = at(prod.rhs, config.dot);
207 if (symbol_after_dot != transition_symbol) continue;
208 /* transition successor should just be the next index */
209 int next_config_i = config_i + 1;
210 next_state.configs.push_back(next_config_i);
211 }
212 close(next_state, cs, grammar, lhs2sc);
213 StatePtr2StateIndex::iterator it2 = state_ptrs2idxs.find(&next_state);
214 int next_state_i;
215 if (it2 == state_ptrs2idxs.end()) {
216 next_state_i = Teuchos::size(states);
217 state_q.push(next_state_i);
218 add_back(states, next_state);
219 state_ptrs2idxs[states.back().get()] = next_state_i;
220 } else {
221 next_state_i = it2->second;
222 }
223 ActionInProgress transition;
224 transition.action.kind = ACTION_SHIFT;
225 transition.action.next_state = next_state_i;
226 transition.context.insert(transition_symbol);
227 state.actions.push_back(transition);
228 }
229 }
230 add_reduction_actions(states, cs, grammar);
231 set_lr0_contexts(states, grammar);
232 return states;
233}
234
235static Graph get_productions_by_lhs(Grammar const& grammar) {
236 int nsymbols = grammar.nsymbols;
237 Graph lhs2prods = make_graph_with_nnodes(nsymbols);
238 for (int prod_i = 0; prod_i < Teuchos::size(grammar.productions); ++prod_i) {
239 const Grammar::Production& prod = at(grammar.productions, prod_i);
240 add_edge(lhs2prods, prod.lhs, prod_i);
241 }
242 return lhs2prods;
243}
244
245/* compute a graph where symbols are graph nodes, and
246 there exists an edge (A, B) if B appears in the RHS of
247 any production in which A is the LHS */
248static Graph get_symbol_graph(Grammar const& grammar, Graph const& lhs2prods) {
249 int nsymbols = grammar.nsymbols;
250 Graph out = make_graph_with_nnodes(nsymbols);
251 for (int lhs = 0; lhs < nsymbols; ++lhs) {
252 std::set<int> dependees;
253 for (int i = 0; i < count_edges(lhs2prods, lhs); ++i) {
254 int prod_i = at(lhs2prods, lhs, i);
255 const Grammar::Production& prod = at(grammar.productions, prod_i);
256 for (int j = 0; j < Teuchos::size(prod.rhs); ++j) {
257 int rhs_symb = at(prod.rhs, j);
258 dependees.insert(rhs_symb);
259 }
260 }
261 at(out, lhs).assign(dependees.begin(), dependees.end());
262 }
263 return out;
264}
265
266/* the "FIRST" set, i.e. the set of 1-heads of non-null terminal descendants of
267 some string.
268 As suggested by Westley Weimer here:
269 https://www.cs.virginia.edu/~weimer/2008-415/reading/FirstFollowLL.pdf
270 we will also use the FIRST set for determining whether the string has
271 a null terminal descendant, indicated by the prescence of a special
272 FIRST_NULL symbol in the FIRST set */
273enum { FIRST_NULL = -425 };
274typedef std::set<int> FirstSet;
275
276static void print_set(std::set<int> const& set, Grammar const& grammar) {
277 std::cerr << "{";
278 for (std::set<int>::const_iterator it = set.begin(); it != set.end(); ++it) {
279 if (it != set.begin()) std::cerr << ", ";
280 int symb = *it;
281 if (symb == FIRST_NULL) std::cerr << "null";
282 else {
283 const std::string& symb_name = at(grammar.symbol_names, symb);
284 if (symb_name == ",") std::cerr << "','";
285 else std::cerr << symb_name;
286 }
287 }
288 std::cerr << "}";
289}
290
291static FirstSet get_first_set_of_string(std::vector<int> const& string,
292 std::vector<FirstSet> const& first_sets) {
293 FirstSet out;
294 /* walk the string, stop when any symbol is found that doesn't
295 have a null terminal descendant */
296 int i;
297 for (i = 0; i < Teuchos::size(string); ++i) {
298 int symbol = at(string, i);
299 bool has_null = false;
300 const FirstSet& first_set = at(first_sets, symbol);
301 for (FirstSet::const_iterator it = first_set.begin();
302 it != first_set.end(); ++it) {
303 int first_symbol = *it;
304 if (first_symbol == FIRST_NULL) has_null = true;
305 else out.insert(first_symbol);
306 }
307 if (!has_null) break;
308 }
309 if (i == Teuchos::size(string)) out.insert(FIRST_NULL);
310 return out;
311}
312
313struct Event {
314 int added_symbol;
315 int dependee;
316 Event(int as, int d):
317 added_symbol(as),
318 dependee(d)
319 {}
320};
321
322/* figure out the FIRST sets for each non-terminal in the grammar.
323 I couldn't find a super-efficient way to do this, so here is a
324 free-for-all event-driven implementation. */
325static std::vector<FirstSet> compute_first_sets(Grammar const& grammar,
326 bool verbose) {
327 if (verbose) std::cerr << "computing FIRST sets...\n";
328 std::queue<Event> event_q;
329 int nsymbols = grammar.nsymbols;
330 std::vector<FirstSet> first_sets = make_vector<FirstSet>(nsymbols);
331 Graph lhs2prods = get_productions_by_lhs(grammar);
332 for (int symbol = 0; symbol < nsymbols; ++symbol) {
333 if (is_terminal(grammar, symbol)) {
334 event_q.push(Event(symbol, symbol));
335 } else {
336 for (int i = 0; i < count_edges(lhs2prods, symbol); ++i) {
337 int prod_i = at(lhs2prods, symbol, i);
338 const Grammar::Production& prod = at(grammar.productions, prod_i);
339 if (prod.rhs.empty()) {
340 event_q.push(Event(FIRST_NULL, symbol));
341 break;
342 }
343 }
344 }
345 }
346 Graph dependers2dependees = get_symbol_graph(grammar, lhs2prods);
347 Graph dependees2dependers = make_transpose(dependers2dependees);
348 while (!event_q.empty()) {
349 Event event = event_q.front(); event_q.pop();
350 int added_symb = event.added_symbol;
351 int dependee = event.dependee;
352 FirstSet& dependee_firsts = at(first_sets, dependee);
353 /* hopefully we don't get too many duplicate events piled up... */
354 if (dependee_firsts.count(added_symb)) continue;
355 dependee_firsts.insert(added_symb);
356 for (int i = 0; i < count_edges(dependees2dependers, dependee); ++i) {
357 int depender = at(dependees2dependers, dependee, i);
358 TEUCHOS_ASSERT(is_nonterminal(grammar, depender));
359 const FirstSet& depender_firsts = at(first_sets, depender);
360 for (int j = 0; j < count_edges(lhs2prods, depender); ++j) {
361 int prod_i = at(lhs2prods, depender, j);
362 const Grammar::Production& prod = at(grammar.productions, prod_i);
363 FirstSet rhs_first_set = get_first_set_of_string(prod.rhs, first_sets);
364 for (FirstSet::iterator it = rhs_first_set.begin();
365 it != rhs_first_set.end(); ++it) {
366 int rhs_first_symb = *it;
367 if (!depender_firsts.count(rhs_first_symb)) {
368 event_q.push(Event(rhs_first_symb, depender));
369 }
370 }
371 }
372 }
373 }
374 if (verbose) {
375 for (int symb = 0; symb < nsymbols; ++symb) {
376 const std::string& symb_name = at(grammar.symbol_names, symb);
377 std::cerr << "FIRST(" << symb_name << ") = {";
378 const FirstSet& c = at(first_sets, symb);
379 for (FirstSet::const_iterator it = c.begin(); it != c.end(); ++it) {
380 if (it != c.begin()) std::cerr << ", ";
381 int first_symb = *it;
382 if (first_symb == FIRST_NULL) {
383 std::cerr << "null";
384 } else {
385 const std::string& first_name = at(grammar.symbol_names, first_symb);
386 std::cerr << first_name;
387 }
388 }
389 std::cerr << "}\n";
390 }
391 std::cerr << '\n';
392 }
393 return first_sets;
394}
395
396StateConfigs form_state_configs(StatesInProgress const& states) {
397 StateConfigs out;
398 for (int i = 0; i < Teuchos::size(states); ++i) {
399 StateInProgress& state = *at(states, i);
400 for (int j = 0; j < Teuchos::size(state.configs); ++j) {
401 out.push_back(StateConfig(i, j));
402 }
403 }
404 return out;
405}
406
407Graph form_states_to_state_configs(StateConfigs const& scs,
408 StatesInProgress const& states) {
409 Graph out = make_graph_with_nnodes(Teuchos::size(states));
410 for (int i = 0; i < Teuchos::size(scs); ++i) {
411 const StateConfig& sc = at(scs, i);
412 at(out, sc.state).push_back(i);
413 }
414 return out;
415}
416
417static std::string escape_dot(std::string const& s) {
418 std::string out;
419 for (std::size_t i = 0; i < s.size(); ++i) {
420 char c = s[i];
421 if (c == '\\' || c == '|' || c == '\"' || c == '<' || c == '>' || c == '{' || c == '}') {
422 out.push_back('\\');
423 out.push_back(c);
424 } else if (c == '.') {
425 out = "'";
426 out += s;
427 out += "'";
428 return out;
429 } else {
430 out.push_back(c);
431 }
432 }
433 return out;
434}
435
436void print_graphviz(
437 std::string const& filepath,
438 ParserInProgress const& pip,
439 bool /* verbose */,
440 std::ostream& os
441 ) {
442 const StatesInProgress& sips = pip.states;
443 const Configs& cs = pip.configs;
444 GrammarPtr grammar = pip.grammar;
445 const Graph& states2scs = pip.states2state_configs;
446 os << "writing GraphViz file \"" << filepath << "\"\n";
447 os << "process with:\n";
448 os << " dot -Tpdf -o \"" << filepath << ".pdf\" \"" << filepath << "\"\n";
449 std::ofstream file(filepath.c_str());
450 TEUCHOS_ASSERT(file.is_open());
451 file << "digraph {\n";
452 file << "graph [\n";
453 file << "rankdir = \"LR\"\n";
454 file << "]\n";
455 for (int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
456 const StateInProgress& state = *at(sips, s_i);
457 file << s_i << " [\n";
458 file << "label = \"";
459 file << "State " << s_i << "\\l";
460 for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
461 int c_i = at(state.configs, cis_i);
462 const Config& config = at(cs, c_i);
463 const Grammar::Production& prod = at(grammar->productions, config.production);
464 int sc_i = at(states2scs, s_i, cis_i);
465 file << sc_i << ": ";
466 const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
467 file << escape_dot(lhs_name) << " ::= ";
468 for (int rhs_i = 0; rhs_i <= Teuchos::size(prod.rhs); ++rhs_i) {
469 if (rhs_i == config.dot) file << " .";
470 if (rhs_i < Teuchos::size(prod.rhs)) {
471 int rhs_symb = at(prod.rhs, rhs_i);
472 const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
473 file << " " << escape_dot(rhs_symb_name);
474 }
475 }
476 if (config.dot == Teuchos::size(prod.rhs)) {
477 file << ", \\{";
478 bool found = false;
479 for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
480 const ActionInProgress& action = at(state.actions, a_i);
481 if (action.action.kind == ACTION_REDUCE &&
482 action.action.production == config.production) {
483 found = true;
484 const Context& ac = action.context;
485 for (Context::const_iterator it = ac.begin(); it != ac.end(); ++it) {
486 if (it != ac.begin()) file << ", ";
487 int symb = *it;
488 const std::string& symb_name = at(grammar->symbol_names, symb);
489 file << escape_dot(symb_name);
490 }
491 }
492 }
493 TEUCHOS_TEST_FOR_EXCEPTION(!found, std::logic_error,
494 "BUG: missing reduce action in state " << s_i << " !!!\n");
495 file << "\\}";
496 }
497 file << "\\l";
498 }
499 file << "\"\n";
500 file << "shape = \"record\"\n";
501 file << "]\n";
502 for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
503 const ActionInProgress& action = at(state.actions, a_i);
504 if (action.action.kind == ACTION_SHIFT) {
505 int symb = *(action.context.begin());
506 const std::string& symb_name = at(grammar->symbol_names, symb);
507 int next = action.action.next_state;
508 file << s_i << " -> " << next << " [\n";
509 file << "label = \"" << escape_dot(symb_name) << "\"\n";
510 file << "]\n";
511 }
512 }
513 }
514 file << "}\n";
515}
516
517static Graph make_immediate_predecessor_graph(
518 StateConfigs const& scs,
519 StatesInProgress const& states,
520 Graph const& states2scs,
521 Configs const& cs,
522 GrammarPtr grammar) {
523 Graph out = make_graph_with_nnodes(Teuchos::size(scs));
524 for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
525 const StateInProgress& state = *at(states, s_i);
526 for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
527 int config_i = at(state.configs, cis_i);
528 const Config& config = at(cs, config_i);
529 const Grammar::Production& prod = at(grammar->productions, config.production);
530 int dot = config.dot;
531 if (dot == Teuchos::size(prod.rhs)) continue;
532 int s = at(prod.rhs, dot);
533 if (is_terminal(*grammar, s)) continue;
534 for (int cis_j = 0; cis_j < Teuchos::size(state.configs); ++cis_j) {
535 int config_j = at(state.configs, cis_j);
536 const Config& config2 = at(cs, config_j);
537 const Grammar::Production& prod2 = at(grammar->productions, config2.production);
538 if (prod2.lhs == s) {
539 int sc_i = at(states2scs, s_i, cis_i);
540 int sc_j = at(states2scs, s_i, cis_j);
541 add_edge(out, sc_j, sc_i);
542 }
543 }
544 }
545 }
546 return out;
547}
548
549static Graph find_transition_predecessors(
550 StateConfigs const& scs,
551 StatesInProgress const& states,
552 Graph const& states2scs,
553 Configs const& cs,
554 GrammarPtr grammar) {
555 Graph out = make_graph_with_nnodes(Teuchos::size(scs));
556 for (int state_i = 0; state_i < Teuchos::size(states); ++state_i) {
557 const StateInProgress& state = *at(states, state_i);
558 for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
559 const ActionInProgress& action = at(state.actions, a_i);
560 if (action.action.kind != ACTION_SHIFT) continue;
561 TEUCHOS_ASSERT(action.context.size() == 1);
562 int symbol = *(action.context.begin());
563 int state_j = action.action.next_state;
564 const StateInProgress& state2 = *at(states, state_j);
565 for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
566 int config_i = at(state.configs, cis_i);
567 const Config& config = at(cs, config_i);
568 for (int cis_j = 0; cis_j < Teuchos::size(state2.configs); ++cis_j) {
569 int config_j = at(state2.configs, cis_j);
570 const Config& config2 = at(cs, config_j);
571 if (config.production == config2.production &&
572 config.dot + 1 == config2.dot) {
573 const Grammar::Production& prod = at(grammar->productions, config.production);
574 int rhs_symbol = at(prod.rhs, config.dot);
575 if (rhs_symbol == symbol) {
576 int sc_i = at(states2scs, state_i, cis_i);
577 int sc_j = at(states2scs, state_j, cis_j);
578 add_edge(out, sc_j, sc_i);
579 }
580 }
581 }
582 }
583 }
584 }
585 return out;
586}
587
588static Graph make_originator_graph(
589 StateConfigs const& scs,
590 StatesInProgress const& states,
591 Graph const& states2scs,
592 Configs const& cs,
593 GrammarPtr grammar) {
594 Graph out = make_graph_with_nnodes(Teuchos::size(scs));
595 Graph ipg = make_immediate_predecessor_graph(
596 scs, states, states2scs, cs, grammar);
597 Graph tpg = find_transition_predecessors(
598 scs, states, states2scs, cs, grammar);
599 for (int sc_i = 0; sc_i < Teuchos::size(scs); ++sc_i) {
600 std::set<int> originators;
601 /* breadth-first search through the Transition
602 Precessor Graph (tpg), followed by a single hop
603 along the Immediate Predecessor Graph (ipg) */
604 std::queue<int> tpq;
605 std::set<int> tps;
606 tpq.push(sc_i);
607 tps.insert(sc_i);
608 while (!tpq.empty()) {
609 int tpp = tpq.front(); tpq.pop();
610 for (int i = 0; i < count_edges(tpg, tpp); ++i) {
611 int tpc = at(tpg, tpp, i);
612 if (tps.count(tpc)) continue;
613 tpq.push(tpc);
614 tps.insert(tpc);
615 }
616 for (int i = 0; i < count_edges(ipg, tpp); ++i) {
617 int ip_i = at(ipg, tpp, i);
618 originators.insert(ip_i);
619 }
620 }
621 at(out, sc_i).assign(originators.begin(), originators.end());
622 }
623 return out;
624}
625
626static std::vector<int> get_follow_string(
627 int sc_addr,
628 StateConfigs const& scs,
629 StatesInProgress const& states,
630 Configs const& cs,
631 GrammarPtr grammar) {
632 const StateConfig& sc = at(scs, sc_addr);
633 const StateInProgress& state = *at(states, sc.state);
634 int config_i = at(state.configs, sc.config_in_state);
635 const Config& config = at(cs, config_i);
636 const Grammar::Production& prod = at(grammar->productions, config.production);
637 int out_size = Teuchos::size(prod.rhs) - (config.dot + 1);
638 std::vector<int> out;
639 /* out_size can be negative */
640 if (out_size < 1) return out;
641 reserve(out, out_size);
642 for (int i = config.dot + 1; i < Teuchos::size(prod.rhs); ++i) {
643 out.push_back(at(prod.rhs, i));
644 }
645 return out;
646}
647
648static void print_string(std::vector<int> const& str, GrammarPtr grammar) {
649 std::cerr << "\"";
650 for (int i = 0; i < Teuchos::size(str); ++i) {
651 int symb = at(str, i);
652 const std::string& symb_name = at(grammar->symbol_names, symb);
653 std::cerr << symb_name;
654 }
655 std::cerr << "\"";
656}
657
658static bool has_non_null_terminal_descendant(FirstSet const& first_set) {
659 if (first_set.empty()) return false;
660 if (first_set.size() > 1) return true;
661 return *(first_set.begin()) != FIRST_NULL;
662}
663
664static Context get_contexts(FirstSet first_set) {
665 FirstSet::iterator it = first_set.find(FIRST_NULL);
666 if (it != first_set.end()) first_set.erase(it);
667 return first_set;
668}
669
670enum { MARKER = -433 };
671enum { ZERO = -100 }; // actual zero is a valid index for us
672
673static void print_stack(std::vector<int> const& stack) {
674 for (int i = 0; i < Teuchos::size(stack); ++i) {
675 int symb = at(stack, i);
676 if (symb == MARKER) std::cerr << " M";
677 else if (symb == ZERO) std::cerr << " Z";
678 else std::cerr << " " << symb;
679 }
680 std::cerr << '\n';
681}
682
683static void move_markers(
684 std::vector<int>& lane,
685 std::vector<int>& in_lane,
686 int zeta_prime_addr,
687 int zeta_pointer,
688 bool tests_failed
689 ) {
690 int loc_of_zeta_prime = at(in_lane, zeta_prime_addr);
691 TEUCHOS_ASSERT(loc_of_zeta_prime != -1);
692 int r = 0;
693 for (int i = loc_of_zeta_prime + 1; i < zeta_pointer; ++i) {
694 if (at(lane, i) == MARKER) {
695 ++r;
696 at(lane, i) = ZERO;
697 }
698 }
699 int top_addr = -1;
700 if (tests_failed) {
701 top_addr = lane.back();
702 lane.resize(lane.size() - 1); // pop
703 }
704 for (int i = 0; i < r; ++i) lane.push_back(MARKER);
705 if (tests_failed) {
706 lane.push_back(top_addr);
707 at(in_lane, top_addr) = Teuchos::size(lane) - 1;
708 }
709}
710
711typedef std::vector<Context> Contexts;
712
713static void context_adding_routine(
714 std::vector<int> const& lane,
715 int zeta_pointer,
716 Context& contexts_generated,
717 Contexts& contexts,
718 bool verbose,
719 GrammarPtr grammar
720 ) {
721 if (verbose) {
722 std::cerr << " CONTEXT ADDING ROUTINE\n";
723 std::cerr << " LANE:";
724 print_stack(lane);
725 std::cerr << " $\\zeta$-POINTER = " << zeta_pointer << '\n';
726 }
727 for (int r = zeta_pointer; r >= 0 && (!contexts_generated.empty()); --r) {
728 int v_r = at(lane, r);
729 if (verbose) std::cerr << " r = " << r << ", $v_r$ = ";
730 if (v_r < 0) {
731 if (verbose) {
732 if (v_r == MARKER) std::cerr << "marker\n";
733 else if (v_r == ZERO) std::cerr << "zero\n";
734 }
735 continue;
736 }
737 int tau_r_addr = v_r;
738 if (verbose) {
739 std::cerr << "$\\tau_r$ = " << tau_r_addr << '\n';
740 std::cerr << " CONTEXTS_GENERATED = ";
741 print_set(contexts_generated, *grammar);
742 std::cerr << "\n CONTEXTS_$\\tau_r$ = ";
743 print_set(at(contexts, tau_r_addr), *grammar);
744 std::cerr << "\n CONTEXTS_GENERATED <- CONTEXTS_GENERATED - CONTEXTS_$\\tau_r$";
745 }
746 subtract_from(contexts_generated, at(contexts, tau_r_addr));
747 if (verbose) {
748 std::cerr << "\n CONTEXTS_GENERATED = ";
749 print_set(contexts_generated, *grammar);
750 std::cerr << "\n CONTEXTS_$\\tau_r$ <- CONTEXTS_$\\tau_r$ U CONTEXTS_GENERATED";
751 }
752 unite_with(at(contexts, tau_r_addr), contexts_generated);
753 if (verbose) {
754 std::cerr << "\n CONTEXTS_$\\tau_r$ = ";
755 print_set(at(contexts, tau_r_addr), *grammar);
756 std::cerr << "\n";
757 }
758 }
759}
760
761static void deal_with_tests_failed(
762 int& num_originators_failed,
763 int& first_originator_failed,
764 int zeta_prime_addr,
765 bool& tests_failed,
766 std::vector<int>& lane,
767 std::vector<int>& in_lane,
768 int zeta_addr,
769 std::vector<int>& stack,
770 bool verbose
771 ) {
772 if (verbose) std::cerr << " Dealing with test failures\n";
773 if (num_originators_failed == 0) {
774 if (verbose) std::cerr << " " << zeta_prime_addr << " is the first originator of " << zeta_addr << " to fail the tests\n";
775 first_originator_failed = zeta_prime_addr;
776 if (verbose) std::cerr << " pushing " << zeta_prime_addr << " onto LANE:\n ";
777 lane.push_back(zeta_prime_addr);
778 if (verbose) print_stack(lane);
779 at(in_lane, zeta_prime_addr) = Teuchos::size(lane) - 1;
780 if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") <- ON\n";
781 tests_failed = true;
782 if (verbose) std::cerr << " TESTS_FAILED <- ON\n";
783 } else if (num_originators_failed == 1) {
784 if (verbose) std::cerr << " " << zeta_prime_addr << " is the second originator of " << zeta_addr << " to fail the tests\n";
785 int zeta_double_prime_addr = first_originator_failed;
786 if (verbose) std::cerr << " the first was " << zeta_double_prime_addr << '\n';
787 TEUCHOS_ASSERT(at(lane, Teuchos::size(lane) - 1) == zeta_double_prime_addr);
788 TEUCHOS_ASSERT(at(lane, Teuchos::size(lane) - 2) == zeta_addr);
789 if (verbose) std::cerr << " pop LANE, push {marker, " << zeta_double_prime_addr << "} onto it:\n ";
790 lane.resize(lane.size() - 1);
791 lane.push_back(MARKER);
792 lane.push_back(zeta_double_prime_addr);
793 if (verbose) print_stack(lane);
794 if (verbose) std::cerr << " push {marker, " << zeta_prime_addr << "} onto STACK:\n ";
795 stack.push_back(MARKER);
796 stack.push_back(zeta_prime_addr);
797 if (verbose) print_stack(stack);
798 } else {
799 if (verbose) std::cerr << " " << zeta_prime_addr << " is the third or later originator of " << zeta_addr << " to fail the tests\n";
800 if (verbose) std::cerr << " pushing " << zeta_prime_addr << " onto STACK:\n ";
801 stack.push_back(zeta_prime_addr);
802 if (verbose) print_stack(stack);
803 }
804 ++num_originators_failed;
805}
806
807static void heuristic_propagation_of_context_sets(
808 int tau_addr,
809 Contexts& contexts,
810 std::vector<bool>& complete,
811 StateConfigs const& scs,
812 StatesInProgress const& states,
813 Graph const& states2scs,
814 Configs const& cs,
815 GrammarPtr grammar
816 ) {
817 const StateConfig& tau = at(scs, tau_addr);
818 const StateInProgress& state = *at(states, tau.state);
819 int config_i = at(state.configs, tau.config_in_state);
820 const Config& config = at(cs, config_i);
821 if (config.dot != 0) return;
822 const Grammar::Production& prod = at(grammar->productions, config.production);
823 for (int cis_j = 0; cis_j < Teuchos::size(state.configs); ++cis_j) {
824 int config_j = at(state.configs, cis_j);
825 if (config_j == config_i) continue;
826 const Config& config2 = at(cs, config_j);
827 if (config2.dot != 0) continue;
828 const Grammar::Production& prod2 = at(grammar->productions, config2.production);
829 if (prod.lhs != prod2.lhs) continue;
830 int tau_prime_addr = at(states2scs, tau.state, cis_j);
831 at(contexts, tau_prime_addr) = at(contexts, tau_addr);
832 at(complete, tau_prime_addr) = true;
833 }
834}
835
836/* Here it is! The magical algorithm described by a flowchart in
837 Figure 7 of David Pager's paper. */
838static void compute_context_set(
839 int zeta_j_addr,
840 Contexts& contexts,
841 std::vector<bool>& complete,
842 StateConfigs const& scs,
843 Graph const& originator_graph,
844 StatesInProgress const& states,
845 Graph const& states2scs,
846 Configs const& cs,
847 std::vector<FirstSet> const& first_sets,
848 GrammarPtr grammar,
849 bool verbose,
850 ParserInProgress const& pip
851 ) {
852 if (verbose) std::cerr << "Computing context set for $\\zeta_j$ = " << zeta_j_addr << "...\n";
853 if (verbose) std::cerr << "BEGIN PROGRAM\n";
854 if (at(complete, zeta_j_addr)) {
855 if (verbose) std::cerr << zeta_j_addr << " was already complete!\nEND PROGRAM\n\n";
856 return;
857 }
858 std::vector<int> stack;
859 // need random access, inner insert, which std::stack doesn't provide
860 std::vector<int> lane;
861 std::vector<int> in_lane = make_vector<int>(Teuchos::size(scs), -1);
862 lane.push_back(zeta_j_addr);
863 at(in_lane, zeta_j_addr) = Teuchos::size(lane) - 1;
864 bool tests_failed = false;
865 Context contexts_generated;
866 if (verbose) {
867 std::cerr << "Initial LANE:";
868 print_stack(lane);
869 }
870 while (true) {
871 TEUCHOS_ASSERT(!lane.empty());
872 int zeta_addr = lane.back();
873 if (verbose) {
874 std::cerr << "Top of LANE is $\\zeta$ = " << zeta_addr << '\n';
875 }
876 int zeta_pointer = Teuchos::size(lane) - 1;
877 if (verbose) std::cerr << "$\\zeta$-POINTER <- " << zeta_pointer << '\n';
878 int num_originators_failed = 0;
879 int first_originator_failed = -1;
880 if (verbose) std::cerr << "DO_LOOP:\n";
881 /* DO_LOOP */
882 for (int i = 0; i < count_edges(originator_graph, zeta_addr); ++i) {
883 int zeta_prime_addr = at(originator_graph, zeta_addr, i);
884 if (verbose) {
885 std::cerr << "Next originator of $\\zeta$ = " << zeta_addr << " is $\\zeta'$ = " << zeta_prime_addr << '\n';
886 }
887 std::vector<int> gamma = get_follow_string(zeta_prime_addr, scs, states, cs, grammar);
888 if (verbose) {
889 std::cerr << " FOLLOW string of $\\zeta'$ = " << zeta_prime_addr << " is ";
890 print_string(gamma, grammar);
891 std::cerr << '\n';
892 }
893 FirstSet gamma_first = get_first_set_of_string(gamma, first_sets);
894 if (verbose) {
895 std::cerr << " FIRST set of ";
896 print_string(gamma, grammar);
897 std::cerr << " is ";
898 print_set(gamma_first, *grammar);
899 std::cerr << "\n";
900 }
901 if (has_non_null_terminal_descendant(gamma_first)) { // test A
902 if (verbose) {
903 std::cerr << " ";
904 print_string(gamma, grammar);
905 std::cerr << " has a non-null terminal descendant\n";
906 }
907 contexts_generated = get_contexts(gamma_first);
908 if (verbose) {
909 std::cerr << " CONTEXTS_GENERATED = ";
910 print_set(contexts_generated, *grammar);
911 std::cerr << " = 1-heads of non-null descendants of ";
912 print_string(gamma, grammar);
913 std::cerr << '\n';
914 }
915 if (gamma_first.count(FIRST_NULL)) {
916 if (verbose) {
917 std::cerr << " ";
918 print_string(gamma, grammar);
919 std::cerr << " has a null terminal descendant\n";
920 }
921 if (at(complete, zeta_prime_addr)) {
922 unite_with(contexts_generated, at(contexts, zeta_prime_addr));
923 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
924 verbose, grammar);
925 } else if (-1 == at(in_lane, zeta_prime_addr)) {
926 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
927 verbose, grammar);
928 /* TRACE_FURTHER */
929 deal_with_tests_failed(num_originators_failed, first_originator_failed,
930 zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
931 verbose);
932 } else {
933 print_graphviz("ambiguous.dot", pip, true, std::cerr);
934 std::stringstream ss;
935 ss << "error: grammar is ambiguous.\n";
936 ss << "zeta_j is " << zeta_j_addr << ", zeta is " << zeta_addr << ", and zeta prime is " << zeta_prime_addr << '\n';
937 throw ParserBuildFail(ss.str());
938 }
939 } else {
940 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
941 verbose, grammar);
942 }
943 } else {
944 if (verbose) {
945 std::cerr << " ";
946 print_string(gamma, grammar);
947 std::cerr << " does not have a non-null terminal descendant\n";
948 }
949 if (at(complete, zeta_prime_addr)) { // test B
950 if (verbose) std::cerr << " COMPLETE(" << zeta_prime_addr << ") is ON\n";
951 contexts_generated = at(contexts, zeta_prime_addr);
952 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
953 verbose, grammar);
954 } else {
955 if (verbose) std::cerr << " COMPLETE(" << zeta_prime_addr << ") is OFF\n";
956 if (-1 != at(in_lane, zeta_prime_addr)) { // test C
957 if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") is ON\n";
958 move_markers(lane, in_lane, zeta_prime_addr, zeta_pointer, tests_failed);
959 contexts_generated = at(contexts, zeta_prime_addr);
960 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
961 verbose, grammar);
962 } else {
963 if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") is OFF\n";
964 deal_with_tests_failed(num_originators_failed, first_originator_failed,
965 zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
966 verbose);
967 }
968 }
969 }
970 } /* END DO_LOOP */
971 if (verbose) std::cerr << "END DO_LOOP\n";
972 if (tests_failed) {
973 if (verbose) {
974 std::cerr << " TESTS_FAILED was on, turning it off and going to next configuration\n";
975 }
976 tests_failed = false;
977 continue;
978 }
979 bool keep_lane_popping = true;
980 if (verbose) std::cerr << " Start LANE popping\n";
981 while (keep_lane_popping) { // LANE popping loop
982 TEUCHOS_ASSERT(!lane.empty());
983 if (verbose) {
984 std::cerr << " LANE:";
985 print_stack(lane);
986 }
987 if (at(lane, Teuchos::size(lane) - 1) == MARKER) {
988 if (verbose) std::cerr << " Top of LANE is a marker\n";
989 if (verbose) std::cerr << " Start STACK popping\n";
990 while (true) { // STACK popping loop
991 TEUCHOS_ASSERT(!stack.empty());
992 if (verbose) {
993 std::cerr << " STACK:";
994 print_stack(stack);
995 std::cerr << " LANE:";
996 print_stack(lane);
997 }
998 if (stack.back() == MARKER) {
999 if (verbose) std::cerr << " Top of STACK is a marker, pop STACK and LANE\n";
1000 resize(stack, Teuchos::size(stack) - 1);
1001 resize(lane, Teuchos::size(lane) - 1);
1002 break; // out of STACK popping, back into LANE popping
1003 } else if (at(complete, stack.back())) {
1004 if (verbose) std::cerr << " Top of STACK is has COMPLETE flag, pop STACK\n";
1005 resize(stack, Teuchos::size(stack) - 1);
1006 // back into STACK popping
1007 } else {
1008 int addr = stack.back();
1009 if (verbose) std::cerr << " Top of STACK is " << addr << ", pop STACK\n";
1010 resize(stack, Teuchos::size(stack) - 1);
1011 if (verbose) std::cerr << " Push " << addr << " onto LANE\n";
1012 lane.push_back(addr);
1013 if (verbose) std::cerr << " IN_LANE(" << addr << ") <- ON\n";
1014 at(in_lane, addr) = Teuchos::size(lane) - 1;
1015 keep_lane_popping = false;
1016 break; // out of STACK and LANE popping, into top-level loop
1017 } // end STACK top checks
1018 } // end STACK popping loop
1019 } else if (at(lane, Teuchos::size(lane) - 1) == ZERO) {
1020 if (verbose) std::cerr << " Top of LANE is a zero\n";
1021 if (verbose) std::cerr << " Pop LANE\n";
1022 resize(lane, Teuchos::size(lane) - 1); // pop LANE
1023 // back to top of LANE popping loop
1024 } else { // top of LANE neither marker nor zero
1025 int tau_addr = lane.back();
1026 if (verbose) std::cerr << " Top of LANE is $\\tau$ = " << tau_addr << "\n";
1027 at(in_lane, tau_addr) = -1;
1028 if (verbose) std::cerr << " IN_LANE(" << tau_addr << ") <- OFF\n";
1029 at(complete, tau_addr) = true;
1030 if (verbose) std::cerr << " COMPLETE(" << tau_addr << ") <- ON\n";
1031 if (verbose) std::cerr << " HEURISTIC PROPAGATION OF CONTEXT SETS\n";
1032 heuristic_propagation_of_context_sets(
1033 tau_addr, contexts, complete,
1034 scs, states, states2scs, cs, grammar);
1035 if (Teuchos::size(lane) == 1 && at(lane, 0) == zeta_j_addr) {
1036 if (verbose) std::cerr << "END PROGRAM\n\n";
1037 return;
1038 }
1039 if (verbose) std::cerr << " Pop LANE\n";
1040 resize(lane, Teuchos::size(lane) - 1); // pop LANE
1041 // back to top of LANE popping loop
1042 } // end top of LANE checks
1043 } // end LANE popping loop
1044 } // end top-level while(1) loop
1045}
1046
1047static std::vector<bool> determine_adequate_states(
1048 StatesInProgress const& states,
1049 GrammarPtr grammar,
1050 bool verbose,
1051 std::ostream& os) {
1052 std::vector<bool> out = make_vector<bool>(Teuchos::size(states));
1053 for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1054 const StateInProgress& state = *at(states, s_i);
1055 bool state_is_adequate = true;
1056 for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
1057 const ActionInProgress& action = at(state.actions, a_i);
1058 if (action.action.kind == ACTION_SHIFT &&
1059 is_nonterminal(*grammar, *(action.context.begin()))) {
1060 continue;
1061 }
1062 for (int a_j = a_i + 1; a_j < Teuchos::size(state.actions); ++a_j) {
1063 const ActionInProgress& action2 = at(state.actions, a_j);
1064 if (action2.action.kind == ACTION_SHIFT &&
1065 is_nonterminal(*grammar, *(action2.context.begin()))) {
1066 continue;
1067 }
1068 if (intersects(action2.context, action.context)) {
1069 if (verbose) {
1070 const ActionInProgress* ap1 = &action;
1071 const ActionInProgress* ap2 = &action2;
1072 if (ap1->action.kind == ACTION_SHIFT) {
1073 std::swap(ap1, ap2);
1074 }
1075 TEUCHOS_ASSERT(ap1->action.kind == ACTION_REDUCE);
1076 os << "shift-reduce conflict in state " << s_i << ":\n";
1077 os << "reduce ";
1078 const Grammar::Production& prod = at(grammar->productions, ap1->action.production);
1079 const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
1080 os << lhs_name << " ::=";
1081 for (int rhs_i = 0; rhs_i < Teuchos::size(prod.rhs); ++rhs_i) {
1082 int rhs_symb = at(prod.rhs, rhs_i);
1083 const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
1084 os << " " << rhs_symb_name;
1085 }
1086 int shift_symb = *(ap2->context.begin());
1087 const std::string& shift_name = at(grammar->symbol_names, shift_symb);
1088 os << "\nshift " << shift_name << '\n';
1089 }
1090 state_is_adequate = false;
1091 break;
1092 }
1093 }
1094 if (!state_is_adequate) break;
1095 }
1096 at(out, s_i) = state_is_adequate;
1097 }
1098 if (verbose) os << '\n';
1099 return out;
1100}
1101
1102ParserInProgress draft_lalr1_parser(GrammarPtr grammar, bool verbose) {
1103 ParserInProgress out;
1104 Configs& cs = out.configs;
1105 StatesInProgress& states = out.states;
1106 StateConfigs& scs = out.state_configs;
1107 Graph& states2scs = out.states2state_configs;
1108 out.grammar = grammar;
1109 cs = make_configs(*grammar);
1110 Graph lhs2cs = get_left_hand_sides_to_start_configs(cs, *grammar);
1111 if (verbose) std::cerr << "Building LR(0) parser\n";
1112 states = make_lr0_parser(cs, *grammar, lhs2cs);
1113 scs = form_state_configs(states);
1114 states2scs = form_states_to_state_configs(scs, states);
1115 if (verbose) print_graphviz("lr0.dot", out, true, std::cerr);
1116 if (verbose) std::cerr << "Checking adequacy of LR(0) machine\n";
1117 std::vector<bool> adequate = determine_adequate_states(states, grammar, verbose,
1118 std::cerr);
1119 if (*(std::min_element(adequate.begin(), adequate.end()))) {
1120 if (verbose) std::cerr << "The grammar is LR(0)!\n";
1121 return out;
1122 }
1123 std::vector<bool> complete = make_vector<bool>(Teuchos::size(scs), false);
1124 std::vector<Context> contexts = make_vector<Context>(Teuchos::size(scs));
1125 int accept_prod_i = get_accept_production(*grammar);
1126 /* initialize the accepting state-configs as described in
1127 footnote 8 at the bottom of page 37 */
1128 for (int sc_i = 0; sc_i < Teuchos::size(scs); ++sc_i) {
1129 StateConfig& sc = at(scs, sc_i);
1130 StateInProgress& state = *at(states, sc.state);
1131 int config_i = at(state.configs, sc.config_in_state);
1132 Config& config = at(cs, config_i);
1133 if (config.production == accept_prod_i) {
1134 at(complete, sc_i) = true;
1135 at(contexts, sc_i).insert(get_end_terminal(*grammar));
1136 }
1137 }
1138 Graph og = make_originator_graph(scs, states, states2scs, cs, grammar);
1139 if (verbose) std::cerr << "Originator Graph:\n";
1140 if (verbose) std::cerr << og << '\n';
1141 std::vector<FirstSet> first_sets = compute_first_sets(*grammar, verbose);
1142 /* compute context sets for all state-configs associated with reduction
1143 actions that are part of an inadequate state */
1144 for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1145 if (at(adequate, s_i)) continue;
1146 StateInProgress& state = *at(states, s_i);
1147 for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
1148 int config_i = at(state.configs, cis_i);
1149 const Config& config = at(cs, config_i);
1150 const Grammar::Production& prod = at(grammar->productions, config.production);
1151 if (config.dot != Teuchos::size(prod.rhs)) continue;
1152 int zeta_j_addr = at(states2scs, s_i, cis_i);
1153 compute_context_set(zeta_j_addr, contexts, complete, scs,
1154 og, states, states2scs, cs, first_sets, grammar, verbose, out);
1155 }
1156 }
1157 /* update the context sets for all reduction state-configs
1158 which are marked complete, even if they aren't in inadequate states */
1159 for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1160 StateInProgress& state = *at(states, s_i);
1161 for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
1162 int sc_i = at(states2scs, s_i, cis_i);
1163 if (!at(complete, sc_i)) continue;
1164 int config_i = at(state.configs, cis_i);
1165 Config& config = at(cs, config_i);
1166 const Grammar::Production& prod = at(grammar->productions, config.production);
1167 if (config.dot != Teuchos::size(prod.rhs)) continue;
1168 for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
1169 ActionInProgress& action = at(state.actions, a_i);
1170 if (action.action.kind == ACTION_REDUCE &&
1171 action.action.production == config.production) {
1172 action.context = at(contexts, sc_i);
1173 }
1174 }
1175 }
1176 }
1177 if (verbose) std::cerr << "Checking adequacy of LALR(1) machine\n";
1178 adequate = determine_adequate_states(states, grammar, verbose, std::cerr);
1179 if (!(*(std::min_element(adequate.begin(), adequate.end())))) {
1180 std::stringstream ss;
1181 ss << "error: The grammar is not LALR(1).\n";
1182 determine_adequate_states(states, grammar, true, ss);
1183 print_graphviz("error.dot", out, true, ss);
1184 std::string s = ss.str();
1185 throw ParserBuildFail(s);
1186 }
1187 if (verbose) std::cerr << "The grammar is LALR(1)!\n";
1188 if (verbose) print_graphviz("lalr1.dot", out, true, std::cerr);
1189 return out;
1190}
1191
1192Parser accept_parser(ParserInProgress const& pip) {
1193 const StatesInProgress& sips = pip.states;
1194 GrammarPtr grammar = pip.grammar;
1195 Parser out(grammar, Teuchos::size(sips));
1196 for (int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
1197 add_state(out);
1198 }
1199 for (int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
1200 const StateInProgress& sip = *at(sips, s_i);
1201 for (int a_i = 0; a_i < Teuchos::size(sip.actions); ++a_i) {
1202 const ActionInProgress& action = at(sip.actions, a_i);
1203 if (action.action.kind == ACTION_SHIFT &&
1204 is_nonterminal(*grammar, *(action.context.begin()))) {
1205 int nt = as_nonterminal(*grammar, *(action.context.begin()));
1206 add_nonterminal_action(out, s_i, nt, action.action.next_state);
1207 } else {
1208 for (Context::const_iterator it = action.context.begin();
1209 it != action.context.end(); ++it) {
1210 int terminal = *it;
1211 TEUCHOS_ASSERT(is_terminal(*grammar, terminal));
1212 add_terminal_action(out, s_i, terminal, action.action);
1213 }
1214 }
1215 }
1216 }
1217 return out;
1218}
1219
1220ParserBuildFail::ParserBuildFail(const std::string& msg):
1221 std::invalid_argument(msg) {
1222}
1223
1224Parser make_lalr1_parser(GrammarPtr grammar, bool verbose) {
1225 ParserInProgress pip = draft_lalr1_parser(grammar, verbose);
1226 return accept_parser(pip);
1227}
1228
1229}
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
TypeTo as(const TypeFrom &t)
Convert from one value type to another.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.