Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_FiniteAutomaton.cpp
1#include "Teuchos_FiniteAutomaton.hpp"
2
3#include <set>
4#include <map>
5#include <queue>
6#include <utility>
7#include <memory>
8#include <limits>
9
10#include "Teuchos_chartab.hpp"
11#include "Teuchos_RCP.hpp"
12#include "Teuchos_Table.hpp"
13
14namespace Teuchos {
15
16template struct Table<int>;
17
18FiniteAutomaton::FiniteAutomaton(int nsymbols_init, bool is_deterministic_init,
19 int nstates_reserve):
20 table(nsymbols_init + (is_deterministic_init ? 0 : 2), nstates_reserve),
21 is_deterministic(is_deterministic_init)
22{
23 reserve(accepted_tokens, nstates_reserve);
24}
25
26void FiniteAutomaton::swap(FiniteAutomaton& other) {
27 using std::swap;
28 swap(table, other.table);
29 swap(accepted_tokens, other.accepted_tokens);
30 swap(is_deterministic, other.is_deterministic);
31}
32
33int get_nstates(FiniteAutomaton const& fa) {
34 return get_nrows(fa.table);
35}
36
37int get_nsymbols(FiniteAutomaton const& fa) {
38 return get_ncols(fa.table) - (fa.is_deterministic ? 0 : 2);
39}
40
41bool get_determinism(FiniteAutomaton const& fa) {
42 return fa.is_deterministic;
43}
44
45int get_epsilon0(FiniteAutomaton const& fa) {
46 TEUCHOS_ASSERT(!fa.is_deterministic);
47 return get_ncols(fa.table) - 2;
48}
49
50int get_epsilon1(FiniteAutomaton const& fa) {
51 TEUCHOS_ASSERT(!fa.is_deterministic);
52 return get_ncols(fa.table) - 1;
53}
54
55int add_state(FiniteAutomaton& fa) {
56 int state = get_nstates(fa);
57 resize(fa.table, state + 1, get_ncols(fa.table));
58 for (int j = 0; j < get_ncols(fa.table); ++j) {
59 at(fa.table, state, j) = -1;
60 }
61 fa.accepted_tokens.push_back(-1);
62 return state;
63}
64
65void add_transition(FiniteAutomaton& fa, int from_state, int at_symbol, int to_state) {
66 TEUCHOS_ASSERT(0 <= to_state);
67 TEUCHOS_ASSERT(to_state < get_nstates(fa));
68 TEUCHOS_ASSERT(0 <= at_symbol);
69 TEUCHOS_ASSERT(at_symbol < get_ncols(fa.table)); // allow setting epsilon transitions
70 TEUCHOS_ASSERT(at(fa.table, from_state, at_symbol) == -1);
71 at(fa.table, from_state, at_symbol) = to_state;
72}
73
74void add_accept(FiniteAutomaton& fa, int state, int token) {
75 TEUCHOS_ASSERT(0 <= token);
76 at(fa.accepted_tokens, state) = token;
77}
78
79void remove_accept(FiniteAutomaton& fa, int state) {
80 at(fa.accepted_tokens, state) = -1;
81}
82
83int step(FiniteAutomaton const& fa, int state, int symbol) {
84 TEUCHOS_ASSERT(0 <= state);
85 TEUCHOS_ASSERT(state < get_nstates(fa));
86 TEUCHOS_ASSERT(0 <= symbol);
87 TEUCHOS_ASSERT(symbol < get_ncols(fa.table)); // allow getting epsilon transitions
88 return at(fa.table, state, symbol);
89}
90
91int accepts(FiniteAutomaton const& fa, int state) {
92 return at(fa.accepted_tokens, state);
93}
94
95int get_nsymbols_eps(FiniteAutomaton const& fa) {
96 return get_ncols(fa.table);
97}
98
99void append_states(FiniteAutomaton& fa, FiniteAutomaton const& other) {
100 TEUCHOS_ASSERT(get_nsymbols(other) == get_nsymbols(fa));
101 bool other_determ = get_determinism(other);
102 if (!other_determ) TEUCHOS_ASSERT(!fa.is_deterministic);
103 int offset = get_nstates(fa);
104 for (int other_state = 0; other_state < get_nstates(other); ++other_state) {
105 int my_state = add_state(fa);
106 int token = accepts(other, other_state);
107 if (0 <= token) add_accept(fa, my_state, token);
108 }
109 for (int other_state = 0; other_state < get_nstates(other); ++other_state) {
110 int my_state = other_state + offset;
111 for (int symbol = 0; symbol < get_nsymbols_eps(other); ++symbol) {
112 int other_next = step(other, other_state, symbol);
113 if (other_next < 0) continue;
114 int my_next = other_next + offset;
115 add_transition(fa, my_state, symbol, my_next);
116 }
117 }
118}
119
120void make_single_nfa(FiniteAutomaton& result, int nsymbols, int symbol, int token) {
121 make_range_nfa(result, nsymbols, symbol, symbol, token);
122}
123
124void make_set_nfa(FiniteAutomaton& result, int nsymbols, std::set<int> const& accepted, int token) {
125 using std::swap;
126 FiniteAutomaton out(nsymbols, true, 2);
127 int start_state = add_state(out);
128 int accept_state = add_state(out);
129 for (std::set<int>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
130 int i = *it;
131 add_transition(out, start_state, i, accept_state);
132 }
133 add_accept(out, accept_state, token);
134 swap(result, out);
135}
136
137void make_range_nfa(FiniteAutomaton& result, int nsymbols, int range_start, int range_end, int token) {
138 using std::swap;
139 TEUCHOS_ASSERT(0 <= range_start);
140 TEUCHOS_ASSERT(range_start <= range_end);
141 TEUCHOS_ASSERT(range_end <= nsymbols);
142 FiniteAutomaton out(nsymbols, true, 2);
143 int start_state = add_state(out);
144 int accept_state = add_state(out);
145 for (int i = range_start; i <= range_end; ++i) {
146 add_transition(out, start_state, i, accept_state);
147 }
148 add_accept(out, accept_state, token);
149 swap(result, out);
150}
151
152void unite(FiniteAutomaton& result, FiniteAutomaton const& a, FiniteAutomaton const& b) {
153 using std::swap;
154 int nsymbols = get_nsymbols(a);
155 FiniteAutomaton out(nsymbols, false, 1 + get_nstates(a) + get_nstates(b));
156 int start_state = add_state(out);
157 int a_offset = get_nstates(out);
158 append_states(out, a);
159 int b_offset = get_nstates(out);
160 append_states(out, b);
161 int epsilon0 = get_epsilon0(out);
162 int epsilon1 = get_epsilon1(out);
163 add_transition(out, start_state, epsilon0, a_offset);
164 add_transition(out, start_state, epsilon1, b_offset);
165 using std::swap;
166 swap(out, result);
167}
168
169void concat(FiniteAutomaton& result, FiniteAutomaton const& a, FiniteAutomaton const& b, int token) {
170 int nsymbols = get_nsymbols(a);
171 FiniteAutomaton out(nsymbols, false, get_nstates(a) + get_nstates(b));
172 append_states(out, a);
173 int b_offset = get_nstates(out);
174 append_states(out, b);
175 int epsilon0 = get_epsilon0(out);
176 for (int i = 0; i < get_nstates(a); ++i) {
177 if (accepts(a, i) != -1) {
178 add_transition(out, i, epsilon0, b_offset);
179 remove_accept(out, i);
180 }
181 }
182 for (int i = 0; i < get_nstates(b); ++i) {
183 if (accepts(b, i) != -1) {
184 add_accept(out, i + b_offset, token);
185 }
186 }
187 swap(result, out);
188}
189
190void plus(FiniteAutomaton& result, FiniteAutomaton const& a, int token) {
191 using std::swap;
192 FiniteAutomaton out(get_nsymbols(a), false, get_nstates(a) + 1);
193 append_states(out, a);
194 int new_accept_state = add_state(out);
195 add_accept(out, new_accept_state, token);
196 int epsilon0 = get_epsilon0(out);
197 int epsilon1 = get_epsilon1(out);
198 for (int i = 0; i < get_nstates(a); ++i) {
199 if (accepts(a, i) != -1) {
200 add_transition(out, i, epsilon0, new_accept_state);
201 /* we follow a convention that accepting
202 states should not have epsilon transitions */
203 add_transition(out, i, epsilon1, 0);
204 remove_accept(out, i);
205 }
206 }
207 swap(result, out);
208}
209
210void maybe(FiniteAutomaton& result, FiniteAutomaton const& a, int token) {
211 using std::swap;
212 FiniteAutomaton out(get_nsymbols(a), false, get_nstates(a) + 2);
213 int new_start_state = add_state(out);
214 int offset = get_nstates(out);
215 append_states(out, a);
216 int new_accept_state = add_state(out);
217 int epsilon0 = get_epsilon0(out);
218 int epsilon1 = get_epsilon1(out);
219 add_transition(out, new_start_state, epsilon1, offset);
220 /* form an epsilon0 linked list of new start state,
221 all old accepting states, and new accepting state */
222 int last = new_start_state;
223 for (int i = 0; i < get_nstates(a); ++i) {
224 if (accepts(a, i) != -1) {
225 add_transition(out, last, epsilon0, i + offset);
226 remove_accept(out, i + offset);
227 last = i + offset;
228 }
229 }
230 add_transition(out, last, epsilon0, new_accept_state);
231 add_accept(out, new_accept_state, token);
232 swap(result, out);
233}
234
235void star(FiniteAutomaton& result, FiniteAutomaton const& a, int token) {
236 using std::swap;
237 plus(result, a, token);
238 maybe(result, result, token);
239}
240
241typedef std::set<int> StateSet;
242
243static StateSet step(StateSet const& ss, int symbol, FiniteAutomaton const& fa) {
244 StateSet next_ss;
245 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
246 int state = *it;
247 int next_state = step(fa, state, symbol);
248 if (next_state != -1) next_ss.insert(next_state);
249 }
250 return next_ss;
251}
252
253typedef std::queue<int> StateQueue;
254
255static StateSet get_epsilon_closure(StateSet ss, FiniteAutomaton const& fa) {
256 StateQueue q;
257 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
258 int state = *it;
259 q.push(state);
260 }
261 int epsilon0 = get_epsilon0(fa);
262 int epsilon1 = get_epsilon1(fa);
263 while (!q.empty()) {
264 int state = q.front(); q.pop();
265 for (int epsilon = epsilon0; epsilon <= epsilon1; ++epsilon) {
266 int next_state = step(fa, state, epsilon);
267 if (next_state == -1) continue;
268 if (!ss.count(next_state)) {
269 ss.insert(next_state);
270 q.push(next_state);
271 }
272 }
273 }
274 return ss;
275}
276
277struct StateSetPtrLess {
278 bool operator()(StateSet* a, StateSet* b) const {
279 return *a < *b;
280 }
281};
282
283typedef std::map<StateSet*,int,StateSetPtrLess> StateSetPtr2State;
284typedef RCP<StateSet> StateSetPtr;
285typedef std::vector<StateSetPtr> StateSetUniqPtrVector;
286
287static void add_back(StateSetUniqPtrVector& ssupv, StateSet& ss) {
288 using std::swap;
289 StateSetPtr ptr(new StateSet());
290 swap(*ptr, ss);
291 ssupv.push_back(ptr);
292}
293
294/* powerset construction, NFA -> DFA */
295void make_deterministic(FiniteAutomaton& result, FiniteAutomaton& nfa) {
296 using std::swap;
297 if (get_determinism(nfa)) {
298 swap(result, nfa);
299 return;
300 }
301 StateSetPtr2State ssp2s;
302 StateSetUniqPtrVector ssupv;
303 FiniteAutomaton out(get_nsymbols(nfa), true, 0);
304 StateSet start_ss;
305 start_ss.insert(0);
306 start_ss = get_epsilon_closure(start_ss, nfa);
307 add_back(ssupv, start_ss);
308 ssp2s[ssupv.back().get()] = add_state(out);
309 int front = 0;
310 while (front < int(ssupv.size())) {
311 int state = front;
312 StateSet& ss = *at(ssupv, front);
313 ++front;
314 for (int symbol = 0; symbol < get_nsymbols(nfa); ++symbol) {
315 StateSet next_ss = Teuchos::step(ss, symbol, nfa);
316 if (next_ss.empty()) continue;
317 next_ss = get_epsilon_closure(next_ss, nfa);
318 int next_state;
319 StateSetPtr2State::iterator it = ssp2s.find(&next_ss);
320 if (it == ssp2s.end()) {
321 next_state = add_state(out);
322 add_back(ssupv, next_ss);
323 ssp2s[ssupv.back().get()] = next_state;
324 } else {
325 next_state = it->second;
326 }
327 add_transition(out, state, symbol, next_state);
328 }
329 int min_accepted = -1;
330 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
331 int nfa_state = *it;
332 int nfa_token = accepts(nfa, nfa_state);
333 if (nfa_token == -1) continue;
334 if (min_accepted == -1 || nfa_token < min_accepted) {
335 min_accepted = nfa_token;
336 }
337 }
338 if (min_accepted != -1) add_accept(out, state, min_accepted);
339 }
340 swap(result, out);
341}
342
343struct StateRowLess {
344 Table<int> const& table;
345 std::vector<int> const& accepted;
346 bool operator()(int const& a, int const& b) const {
347 int aa = at(accepted, a);
348 int ab = at(accepted, b);
349 if (aa != ab) return aa < ab;
350 for (int symbol = 0, ncols = get_ncols(table); symbol < ncols; ++symbol) {
351 int na = at(table, a, symbol);
352 int nb = at(table, b, symbol);
353 if (na != nb) return na < nb;
354 }
355 return false;
356 }
357 StateRowLess(Table<int> const& t, std::vector<int> const& a):
358 table(t),accepted(a) {
359 }
360};
361
362typedef std::map<int, int, StateRowLess> StateRow2SimpleState;
363
364void simplify_once(FiniteAutomaton& result, FiniteAutomaton const& fa) {
365 using std::swap;
366 StateRow2SimpleState sr2ss(StateRowLess(fa.table, fa.accepted_tokens));
367 int nsimple = 0;
368 for (int state = 0; state < get_nstates(fa); ++state) {
369 std::pair<StateRow2SimpleState::iterator, bool> res =
370 sr2ss.insert(std::make_pair(state, nsimple));
371 if (res.second) {
372 ++nsimple;
373 }
374 }
375 FiniteAutomaton out(get_nsymbols(fa), get_determinism(fa), nsimple);
376 for (int simple = 0; simple < nsimple; ++simple) {
377 add_state(out);
378 }
379 std::vector<bool> did_simple(size_t(nsimple), false);
380 for (int state = 0; state < get_nstates(fa); ++state) {
381 TEUCHOS_ASSERT(sr2ss.count(state));
382 int simple = sr2ss[state];
383 if (at(did_simple, simple)) continue;
384 for (int symbol = 0; symbol < get_nsymbols_eps(fa); ++symbol) {
385 int next_state = step(fa, state, symbol);
386 if (next_state == -1) continue;
387 TEUCHOS_ASSERT(sr2ss.count(next_state));
388 int next_simple = sr2ss[next_state];
389 add_transition(out, simple, symbol, next_simple);
390 }
391 int token = accepts(fa, state);
392 if (token != -1) {
393 add_accept(out, simple, token);
394 }
395 at(did_simple, simple) = true;
396 }
397 swap(result, out);
398}
399
400void simplify(FiniteAutomaton& result, FiniteAutomaton const& fa) {
401 using std::swap;
402 FiniteAutomaton prev;
403 FiniteAutomaton next = fa;
404 int nstates_next = get_nstates(next);
405 int nstates;
406 int i = 0;
407 do {
408 swap(prev, next);
409 nstates = nstates_next;
410 simplify_once(next, prev);
411 ++i;
412 nstates_next = get_nstates(next);
413 } while (nstates_next < nstates);
414 swap(result, next);
415}
416
417void add_char_transition(FiniteAutomaton& fa, int from_state, char at_char, int to_state) {
418 add_transition(fa, from_state, get_symbol(at_char), to_state);
419}
420
421template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
422struct IsSymbol;
423
424template <typename T>
425struct IsSymbol<T, true> {
426 static bool eval(T c) {
427 if (c < 0) return false;
428 return 0 <= Teuchos::chartab[int(c)];
429 }
430};
431
432template <typename T>
433struct IsSymbol<T, false> {
434 static bool eval(T c) {
435 if (c >= TEUCHOS_CHARTAB_SIZE) return false;
436 return 0 <= Teuchos::chartab[int(c)];
437 }
438};
439
440bool is_symbol(char c) {
441 return IsSymbol<char>::eval(c);
442}
443
444template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
445struct GetSymbol;
446
447template <typename T>
448struct GetSymbol<T, true> {
449 static int eval(T c) {
450 TEUCHOS_ASSERT(0 <= c);
451 int symbol = Teuchos::chartab[int(c)];
452 TEUCHOS_ASSERT(0 <= symbol);
453 return symbol;
454 }
455};
456
457template <typename T>
458struct GetSymbol<T, false> {
459 static int eval(T c) {
460 int symbol = Teuchos::chartab[int(c)];
461 TEUCHOS_ASSERT(0 <= symbol);
462 return symbol;
463 }
464};
465
466int get_symbol(char c) {
467 return GetSymbol<char>::eval(c);
468}
469
470char get_char(int symbol) {
471 TEUCHOS_ASSERT(0 <= symbol);
472 TEUCHOS_ASSERT(symbol < Teuchos::NCHARS);
473 return inv_chartab[symbol];
474}
475
476void make_char_set_nfa(FiniteAutomaton& result, std::set<char> const& accepted, int token) {
477 std::set<int> symbol_set;
478 for (std::set<char>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
479 char c = *it;
480 symbol_set.insert(get_symbol(c));
481 }
482 return make_set_nfa(result, Teuchos::NCHARS, symbol_set, token);
483}
484
485void make_char_range_nfa(FiniteAutomaton& result, char range_start, char range_end, int token) {
486 return make_range_nfa(result, Teuchos::NCHARS, get_symbol(range_start), get_symbol(range_end), token);
487}
488
489void make_char_single_nfa(FiniteAutomaton& result, char symbol_char, int token) {
490 return make_range_nfa(result, Teuchos::NCHARS, get_symbol(symbol_char), get_symbol(symbol_char), token);
491}
492
493void negate_set(std::set<char>& result, std::set<char> const& s) {
494 using std::swap;
495 std::set<char> out;
496 for (int symbol = 0; symbol < NCHARS; ++symbol) {
497 char c = inv_chartab[symbol];
498 if (!s.count(c)) out.insert(c);
499 }
500 swap(result, out);
501}
502
503std::ostream& operator<<(std::ostream& os, FiniteAutomaton const& fa) {
504 if (get_determinism(fa)) os << "dfa ";
505 else os << "nfa ";
506 os << get_nstates(fa) << " states " << get_nsymbols(fa) << " symbols\n";
507 for (int state = 0; state < get_nstates(fa); ++state) {
508 for (int symbol = 0; symbol < get_nsymbols(fa); ++symbol) {
509 int next_state = step(fa, state, symbol);
510 if (next_state != -1) os << "(" << state << ", " << symbol << ") -> " << next_state << '\n';
511 }
512 if (!get_determinism(fa)) {
513 for (int symbol = get_epsilon0(fa); symbol <= get_epsilon1(fa); ++symbol) {
514 int next_state = step(fa, state, symbol);
515 if (next_state != -1) os << "(" << state << ", eps" << (symbol - get_epsilon0(fa)) << ") -> " << next_state << '\n';
516 }
517 }
518 int token = accepts(fa, state);
519 if (token != -1) os << state << " accepts " << token << '\n';
520 }
521 return os;
522}
523
524} // end namespace Teuchos
Reference-counted pointer class and non-member templated function implementations.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...