1 | // Copyright 2004-9 Trustees of Indiana University |
2 | |
3 | // Distributed under the Boost Software License, Version 1.0. |
4 | // (See accompanying file LICENSE_1_0.txt or copy at |
5 | // http://www.boost.org/LICENSE_1_0.txt) |
6 | |
7 | // |
8 | // read_graphviz_new.cpp - |
9 | // Initialize a model of the BGL's MutableGraph concept and an associated |
10 | // collection of property maps using a graph expressed in the GraphViz |
11 | // DOT Language. |
12 | // |
13 | // Based on the grammar found at: |
14 | // https://web.archive.org/web/20041213234742/http://www.graphviz.org/cvs/doc/info/lang.html |
15 | // |
16 | // Jeremiah rewrite used grammar found at: |
17 | // http://www.graphviz.org/doc/info/lang.html |
18 | // and page 34 or http://www.graphviz.org/pdf/dotguide.pdf |
19 | // |
20 | // See documentation for this code at: |
21 | // http://www.boost.org/libs/graph/doc/read_graphviz.html |
22 | // |
23 | |
24 | // Author: Jeremiah Willcock |
25 | // Ronald Garcia |
26 | // |
27 | |
28 | #define BOOST_GRAPH_SOURCE |
29 | #include <boost/assert.hpp> |
30 | #include <boost/ref.hpp> |
31 | #include <boost/function/function2.hpp> |
32 | #include <boost/property_map/dynamic_property_map.hpp> |
33 | #include <boost/graph/graph_traits.hpp> |
34 | #include <boost/detail/workaround.hpp> |
35 | #include <boost/algorithm/string/case_conv.hpp> |
36 | #include <cstdlib> |
37 | #include <algorithm> |
38 | #include <exception> // for std::exception |
39 | #include <iostream> |
40 | #include <map> |
41 | #include <set> |
42 | #include <string> |
43 | #include <vector> |
44 | #include <utility> |
45 | #include <boost/throw_exception.hpp> |
46 | #include <boost/regex.hpp> |
47 | #include <boost/function.hpp> |
48 | #include <boost/graph/dll_import_export.hpp> |
49 | #include <boost/graph/graphviz.hpp> |
50 | |
51 | namespace boost |
52 | { |
53 | |
54 | namespace read_graphviz_detail |
55 | { |
56 | struct token |
57 | { |
58 | enum token_type |
59 | { |
60 | kw_strict, |
61 | kw_graph, |
62 | kw_digraph, |
63 | kw_node, |
64 | kw_edge, |
65 | kw_subgraph, |
66 | left_brace, |
67 | right_brace, |
68 | semicolon, |
69 | equal, |
70 | left_bracket, |
71 | right_bracket, |
72 | comma, |
73 | colon, |
74 | dash_greater, |
75 | dash_dash, |
76 | plus, |
77 | left_paren, |
78 | right_paren, |
79 | at, |
80 | identifier, |
81 | quoted_string, // Only used internally in tokenizer |
82 | eof, |
83 | invalid |
84 | }; |
85 | token_type type; |
86 | std::string normalized_value; // May have double-quotes removed and/or |
87 | // some escapes replaced |
88 | token(token_type type, const std::string& normalized_value) |
89 | : type(type), normalized_value(normalized_value) |
90 | { |
91 | } |
92 | token() : type(invalid), normalized_value("" ) {} |
93 | friend std::ostream& operator<<(std::ostream& o, const token& t) |
94 | { |
95 | switch (t.type) |
96 | { |
97 | case token::kw_strict: |
98 | o << "<strict>" ; |
99 | break; |
100 | case token::kw_graph: |
101 | o << "<graph>" ; |
102 | break; |
103 | case token::kw_digraph: |
104 | o << "<digraph>" ; |
105 | break; |
106 | case token::kw_node: |
107 | o << "<node>" ; |
108 | break; |
109 | case token::kw_edge: |
110 | o << "<edge>" ; |
111 | break; |
112 | case token::kw_subgraph: |
113 | o << "<subgraph>" ; |
114 | break; |
115 | case token::left_brace: |
116 | o << "<left_brace>" ; |
117 | break; |
118 | case token::right_brace: |
119 | o << "<right_brace>" ; |
120 | break; |
121 | case token::semicolon: |
122 | o << "<semicolon>" ; |
123 | break; |
124 | case token::equal: |
125 | o << "<equal>" ; |
126 | break; |
127 | case token::left_bracket: |
128 | o << "<left_bracket>" ; |
129 | break; |
130 | case token::right_bracket: |
131 | o << "<right_bracket>" ; |
132 | break; |
133 | case token::comma: |
134 | o << "<comma>" ; |
135 | break; |
136 | case token::colon: |
137 | o << "<colon>" ; |
138 | break; |
139 | case token::dash_greater: |
140 | o << "<dash-greater>" ; |
141 | break; |
142 | case token::dash_dash: |
143 | o << "<dash-dash>" ; |
144 | break; |
145 | case token::plus: |
146 | o << "<plus>" ; |
147 | break; |
148 | case token::left_paren: |
149 | o << "<left_paren>" ; |
150 | break; |
151 | case token::right_paren: |
152 | o << "<right_paren>" ; |
153 | break; |
154 | case token::at: |
155 | o << "<at>" ; |
156 | break; |
157 | case token::identifier: |
158 | o << "<identifier>" ; |
159 | break; |
160 | case token::quoted_string: |
161 | o << "<quoted_string>" ; |
162 | break; |
163 | case token::eof: |
164 | o << "<eof>" ; |
165 | break; |
166 | default: |
167 | o << "<invalid type>" ; |
168 | break; |
169 | } |
170 | o << " '" << t.normalized_value << "'" ; |
171 | return o; |
172 | } |
173 | }; |
174 | |
175 | bad_graphviz_syntax lex_error(const std::string& errmsg, char bad_char) |
176 | { |
177 | if (bad_char == '\0') |
178 | { |
179 | return bad_graphviz_syntax(errmsg + " (at end of input)" ); |
180 | } |
181 | else |
182 | { |
183 | return bad_graphviz_syntax( |
184 | errmsg + " (char is '" + bad_char + "')" ); |
185 | } |
186 | } |
187 | |
188 | bad_graphviz_syntax parse_error( |
189 | const std::string& errmsg, const token& bad_token) |
190 | { |
191 | return bad_graphviz_syntax(errmsg + " (token is \"" |
192 | + boost::lexical_cast< std::string >(arg: bad_token) + "\")" ); |
193 | } |
194 | |
195 | struct tokenizer |
196 | { |
197 | std::string::const_iterator begin, end; |
198 | std::vector< token > lookahead; |
199 | // Precomputed regexes |
200 | boost::regex stuff_to_skip; |
201 | boost::regex basic_id_token; |
202 | boost::regex punctuation_token; |
203 | boost::regex number_token; |
204 | boost::regex quoted_string_token; |
205 | boost::regex xml_tag_token; |
206 | boost::regex cdata; |
207 | |
208 | tokenizer(const std::string& str) : begin(str.begin()), end(str.end()) |
209 | { |
210 | std::string end_of_token = "(?=(?:\\W))" ; |
211 | std::string whitespace = "(?:\\s+)" ; |
212 | std::string = "(?://.*?$)" ; |
213 | std::string = "(?:/\\*.*?\\*/)" ; |
214 | std::string = "(?:^#.*?$)" ; |
215 | std::string backslash_newline = "(?:[\\\\][\\n])" ; |
216 | stuff_to_skip = "\\A(?:" + whitespace + "|" + slash_slash_comment |
217 | + "|" + slash_star_comment + "|" + hash_comment + "|" |
218 | + backslash_newline + ")*" ; |
219 | basic_id_token = "\\A([[:alpha:]_](?:\\w*))" ; |
220 | punctuation_token = "\\A([][{};=,:+()@]|[-][>-])" ; |
221 | number_token = "\\A([-]?(?:(?:\\.\\d+)|(?:\\d+(?:\\.\\d*)?)))" ; |
222 | quoted_string_token = "\\A(\"(?:[^\"\\\\]|(?:[\\\\].))*\")" ; |
223 | xml_tag_token |
224 | = "\\A<(/?)(?:[^!?'\"]|(?:'[^']*?')|(?:\"[^\"]*?\"))*?(/?)>" ; |
225 | cdata = "\\A\\Q<![CDATA[\\E.*?\\Q]]>\\E" ; |
226 | } |
227 | |
228 | void skip() |
229 | { |
230 | boost::match_results< std::string::const_iterator > results; |
231 | #ifndef NDEBUG |
232 | bool found = |
233 | #endif |
234 | boost::regex_search(first: begin, last: end, m&: results, e: stuff_to_skip); |
235 | #ifndef NDEBUG |
236 | BOOST_ASSERT(found); |
237 | #endif |
238 | boost::sub_match< std::string::const_iterator > sm1 |
239 | = results.suffix(); |
240 | BOOST_ASSERT(sm1.second == end); |
241 | begin = sm1.first; |
242 | } |
243 | |
244 | token get_token_raw() |
245 | { |
246 | if (!lookahead.empty()) |
247 | { |
248 | token t = lookahead.front(); |
249 | lookahead.erase(position: lookahead.begin()); |
250 | return t; |
251 | } |
252 | skip(); |
253 | if (begin == end) |
254 | return token(token::eof, "" ); |
255 | // Look for keywords first |
256 | bool found; |
257 | boost::match_results< std::string::const_iterator > results; |
258 | found = boost::regex_search(first: begin, last: end, m&: results, e: basic_id_token); |
259 | if (found) |
260 | { |
261 | std::string str = results[1].str(); |
262 | std::string str_lower = boost::algorithm::to_lower_copy(Input: str); |
263 | begin = results.suffix().first; |
264 | if (str_lower == "strict" ) |
265 | { |
266 | return token(token::kw_strict, str); |
267 | } |
268 | else if (str_lower == "graph" ) |
269 | { |
270 | return token(token::kw_graph, str); |
271 | } |
272 | else if (str_lower == "digraph" ) |
273 | { |
274 | return token(token::kw_digraph, str); |
275 | } |
276 | else if (str_lower == "node" ) |
277 | { |
278 | return token(token::kw_node, str); |
279 | } |
280 | else if (str_lower == "edge" ) |
281 | { |
282 | return token(token::kw_edge, str); |
283 | } |
284 | else if (str_lower == "subgraph" ) |
285 | { |
286 | return token(token::kw_subgraph, str); |
287 | } |
288 | else |
289 | { |
290 | return token(token::identifier, str); |
291 | } |
292 | } |
293 | found = boost::regex_search(first: begin, last: end, m&: results, e: punctuation_token); |
294 | if (found) |
295 | { |
296 | std::string str = results[1].str(); |
297 | begin = results.suffix().first; |
298 | switch (str[0]) |
299 | { |
300 | case '[': |
301 | return token(token::left_bracket, str); |
302 | case ']': |
303 | return token(token::right_bracket, str); |
304 | case '{': |
305 | return token(token::left_brace, str); |
306 | case '}': |
307 | return token(token::right_brace, str); |
308 | case ';': |
309 | return token(token::semicolon, str); |
310 | case '=': |
311 | return token(token::equal, str); |
312 | case ',': |
313 | return token(token::comma, str); |
314 | case ':': |
315 | return token(token::colon, str); |
316 | case '+': |
317 | return token(token::plus, str); |
318 | case '(': |
319 | return token(token::left_paren, str); |
320 | case ')': |
321 | return token(token::right_paren, str); |
322 | case '@': |
323 | return token(token::at, str); |
324 | case '-': |
325 | { |
326 | switch (str[1]) |
327 | { |
328 | case '-': |
329 | return token(token::dash_dash, str); |
330 | case '>': |
331 | return token(token::dash_greater, str); |
332 | default: |
333 | BOOST_ASSERT(!"Definition of punctuation_token does " |
334 | "not match switch statement" ); |
335 | } |
336 | // Prevent static analyzers complaining about fallthrough: |
337 | break; |
338 | } |
339 | default: |
340 | BOOST_ASSERT(!"Definition of punctuation_token does not " |
341 | "match switch statement" ); |
342 | } |
343 | } |
344 | found = boost::regex_search(first: begin, last: end, m&: results, e: number_token); |
345 | if (found) |
346 | { |
347 | std::string str = results[1].str(); |
348 | begin = results.suffix().first; |
349 | return token(token::identifier, str); |
350 | } |
351 | found |
352 | = boost::regex_search(first: begin, last: end, m&: results, e: quoted_string_token); |
353 | if (found) |
354 | { |
355 | std::string str = results[1].str(); |
356 | begin = results.suffix().first; |
357 | // Remove the beginning and ending quotes |
358 | BOOST_ASSERT(str.size() >= 2); |
359 | str.erase(position: str.begin()); |
360 | str.erase(position: str.end() - 1); |
361 | // Unescape quotes in the middle, but nothing else (see format |
362 | // spec) |
363 | for (size_t i = 0; i + 1 < str.size() /* May change */; ++i) |
364 | { |
365 | if (str[i] == '\\' && str[i + 1] == '"') |
366 | { |
367 | str.erase(position: str.begin() + i); |
368 | // Don't need to adjust i |
369 | } |
370 | else if (str[i] == '\\' && str[i + 1] == '\n') |
371 | { |
372 | str.erase(position: str.begin() + i); |
373 | str.erase(position: str.begin() + i); |
374 | --i; // Invert ++ that will be applied |
375 | } |
376 | } |
377 | return token(token::quoted_string, str); |
378 | } |
379 | if (*begin == '<') |
380 | { |
381 | std::string::const_iterator saved_begin = begin; |
382 | int counter = 0; |
383 | do |
384 | { |
385 | if (begin == end) |
386 | throw_lex_error(errmsg: "Unclosed HTML string" ); |
387 | if (*begin != '<') |
388 | { |
389 | ++begin; |
390 | continue; |
391 | } |
392 | found = boost::regex_search( |
393 | first: begin, last: end, m&: results, e: xml_tag_token); |
394 | if (found) |
395 | { |
396 | begin = results.suffix().first; |
397 | if (results[1].str() == "/" ) |
398 | { // Close tag |
399 | --counter; |
400 | } |
401 | else if (results[2].str() == "/" ) |
402 | { // Empty tag |
403 | } |
404 | else |
405 | { // Open tag |
406 | ++counter; |
407 | } |
408 | continue; |
409 | } |
410 | found = boost::regex_search(first: begin, last: end, m&: results, e: cdata); |
411 | if (found) |
412 | { |
413 | begin = results.suffix().first; |
414 | continue; |
415 | } |
416 | throw_lex_error(errmsg: "Invalid contents in HTML string" ); |
417 | } while (counter > 0); |
418 | return token( |
419 | token::identifier, std::string(saved_begin, begin)); |
420 | } |
421 | else |
422 | { |
423 | throw_lex_error(errmsg: "Invalid character" ); |
424 | return token(); |
425 | } |
426 | } |
427 | |
428 | token peek_token_raw() |
429 | { |
430 | if (lookahead.empty()) |
431 | { |
432 | token t = get_token_raw(); |
433 | lookahead.push_back(x: t); |
434 | } |
435 | return lookahead.front(); |
436 | } |
437 | |
438 | token get_token() |
439 | { // Handle string concatenation |
440 | token t = get_token_raw(); |
441 | if (t.type != token::quoted_string) |
442 | return t; |
443 | std::string str = t.normalized_value; |
444 | while (peek_token_raw().type == token::plus) |
445 | { |
446 | get_token_raw(); |
447 | token t2 = get_token_raw(); |
448 | if (t2.type != token::quoted_string) |
449 | { |
450 | throw_lex_error( |
451 | errmsg: "Must have quoted string after string concatenation" ); |
452 | } |
453 | str += t2.normalized_value; |
454 | } |
455 | return token( |
456 | token::identifier, str); // Note that quoted_string does not get |
457 | // passed to the parser |
458 | } |
459 | |
460 | void throw_lex_error(const std::string& errmsg) |
461 | { |
462 | boost::throw_exception( |
463 | e: lex_error(errmsg, bad_char: (begin == end ? '\0' : *begin))); |
464 | } |
465 | }; |
466 | |
467 | struct edge_endpoint |
468 | { |
469 | bool is_subgraph; |
470 | node_and_port node_ep; |
471 | subgraph_name subgraph_ep; |
472 | |
473 | static edge_endpoint node(const node_and_port& ep) |
474 | { |
475 | edge_endpoint r; |
476 | r.is_subgraph = false; |
477 | r.node_ep = ep; |
478 | return r; |
479 | } |
480 | |
481 | static edge_endpoint subgraph(const subgraph_name& ep) |
482 | { |
483 | edge_endpoint r; |
484 | r.is_subgraph = true; |
485 | r.subgraph_ep = ep; |
486 | return r; |
487 | } |
488 | }; |
489 | |
490 | struct node_or_subgraph_ref |
491 | { |
492 | bool is_subgraph; |
493 | std::string |
494 | name; // Name for subgraphs or nodes, "___root___" for root graph |
495 | }; |
496 | |
497 | static node_or_subgraph_ref noderef(const node_name& n) |
498 | { |
499 | node_or_subgraph_ref r; |
500 | r.is_subgraph = false; |
501 | r.name = n; |
502 | return r; |
503 | } |
504 | |
505 | static node_or_subgraph_ref subgraphref(const subgraph_name& n) |
506 | { |
507 | node_or_subgraph_ref r; |
508 | r.is_subgraph = true; |
509 | r.name = n; |
510 | return r; |
511 | } |
512 | |
513 | typedef std::vector< node_or_subgraph_ref > subgraph_member_list; |
514 | |
515 | struct subgraph_info |
516 | { |
517 | properties def_node_props; |
518 | properties def_edge_props; |
519 | subgraph_member_list members; |
520 | }; |
521 | |
522 | struct parser |
523 | { |
524 | tokenizer the_tokenizer; |
525 | std::vector< token > lookahead; |
526 | parser_result& r; |
527 | std::map< subgraph_name, subgraph_info > subgraphs; |
528 | std::string current_subgraph_name; |
529 | int sgcounter; // Counter for anonymous subgraphs |
530 | std::set< std::pair< node_name, node_name > > |
531 | existing_edges; // Used for checking in strict graphs |
532 | |
533 | subgraph_info& current() { return subgraphs[current_subgraph_name]; } |
534 | properties& current_graph_props() |
535 | { |
536 | return r.graph_props[current_subgraph_name]; |
537 | } |
538 | subgraph_member_list& current_members() { return current().members; } |
539 | |
540 | parser(const std::string& gr, parser_result& result) |
541 | : the_tokenizer(gr), lookahead(), r(result), sgcounter(0) |
542 | { |
543 | current_subgraph_name = "___root___" ; |
544 | current() = subgraph_info(); // Initialize root graph |
545 | current_graph_props().clear(); |
546 | current_members().clear(); |
547 | } |
548 | |
549 | token get() |
550 | { |
551 | if (lookahead.empty()) |
552 | { |
553 | token t = the_tokenizer.get_token(); |
554 | return t; |
555 | } |
556 | else |
557 | { |
558 | token t = lookahead.front(); |
559 | lookahead.erase(position: lookahead.begin()); |
560 | return t; |
561 | } |
562 | } |
563 | |
564 | token peek() |
565 | { |
566 | if (lookahead.empty()) |
567 | { |
568 | lookahead.push_back(x: the_tokenizer.get_token()); |
569 | } |
570 | return lookahead.front(); |
571 | } |
572 | |
573 | void error(const std::string& str) |
574 | { |
575 | boost::throw_exception(e: parse_error(errmsg: str, bad_token: peek())); |
576 | } |
577 | |
578 | void parse_graph(bool want_directed) |
579 | { |
580 | bool is_strict = false; |
581 | bool is_directed = false; |
582 | std::string name; |
583 | if (peek().type == token::kw_strict) |
584 | { |
585 | get(); |
586 | is_strict = true; |
587 | } |
588 | switch (peek().type) |
589 | { |
590 | case token::kw_graph: |
591 | is_directed = false; |
592 | break; |
593 | case token::kw_digraph: |
594 | is_directed = true; |
595 | break; |
596 | default: |
597 | error(str: "Wanted \"graph\" or \"digraph\"" ); |
598 | } |
599 | r.graph_is_directed = is_directed; // Used to check edges |
600 | r.graph_is_strict = is_strict; |
601 | if (want_directed != r.graph_is_directed) |
602 | { |
603 | if (want_directed) |
604 | { |
605 | boost::throw_exception(e: boost::undirected_graph_error()); |
606 | } |
607 | else |
608 | { |
609 | boost::throw_exception(e: boost::directed_graph_error()); |
610 | } |
611 | } |
612 | get(); |
613 | switch (peek().type) |
614 | { |
615 | case token::identifier: |
616 | name = peek().normalized_value; |
617 | get(); |
618 | break; |
619 | case token::left_brace: |
620 | break; |
621 | default: |
622 | error(str: "Wanted a graph name or left brace" ); |
623 | } |
624 | if (peek().type == token::left_brace) |
625 | get(); |
626 | else |
627 | error(str: "Wanted a left brace to start the graph" ); |
628 | parse_stmt_list(); |
629 | if (peek().type == token::right_brace) |
630 | get(); |
631 | else |
632 | error(str: "Wanted a right brace to end the graph" ); |
633 | if (peek().type == token::eof) |
634 | { |
635 | } |
636 | else |
637 | error(str: "Wanted end of file" ); |
638 | } |
639 | |
640 | void parse_stmt_list() |
641 | { |
642 | while (true) |
643 | { |
644 | if (peek().type == token::right_brace) |
645 | return; |
646 | parse_stmt(); |
647 | if (peek().type == token::semicolon) |
648 | get(); |
649 | } |
650 | } |
651 | |
652 | void parse_stmt() |
653 | { |
654 | switch (peek().type) |
655 | { |
656 | case token::kw_node: |
657 | case token::kw_edge: |
658 | case token::kw_graph: |
659 | parse_attr_stmt(); |
660 | break; |
661 | case token::kw_subgraph: |
662 | case token::left_brace: |
663 | case token::identifier: |
664 | { |
665 | token id = get(); |
666 | if (id.type == token::identifier && peek().type == token::equal) |
667 | { // Graph property |
668 | get(); |
669 | if (peek().type != token::identifier) |
670 | error(str: "Wanted identifier as right side of =" ); |
671 | token id2 = get(); |
672 | current_graph_props()[id.normalized_value] |
673 | = id2.normalized_value; |
674 | } |
675 | else |
676 | { |
677 | edge_endpoint ep = parse_endpoint_rest(first_token: id); |
678 | if (peek().type == token::dash_dash |
679 | || peek().type == token::dash_greater) |
680 | { // Edge |
681 | parse_edge_stmt(lhs: ep); |
682 | } |
683 | else |
684 | { |
685 | if (!ep.is_subgraph) |
686 | { // Only nodes can have attribute lists |
687 | // This node already exists because of its first |
688 | // mention (properties set to defaults by |
689 | // parse_node_and_port, called by |
690 | // parse_endpoint_rest) |
691 | properties this_node_props; |
692 | if (peek().type == token::left_bracket) |
693 | { |
694 | parse_attr_list(props&: this_node_props); |
695 | } |
696 | for (properties::const_iterator i |
697 | = this_node_props.begin(); |
698 | i != this_node_props.end(); ++i) |
699 | { |
700 | // Override old properties with same names |
701 | r.nodes[ep.node_ep.name][i->first] = i->second; |
702 | } |
703 | current_members().push_back( |
704 | x: noderef(n: ep.node_ep.name)); |
705 | } |
706 | else |
707 | { |
708 | current_members().push_back( |
709 | x: subgraphref(n: ep.subgraph_ep)); |
710 | } |
711 | } |
712 | } |
713 | break; |
714 | } |
715 | default: |
716 | error(str: "Invalid start token for statement" ); |
717 | } |
718 | } |
719 | |
720 | void parse_attr_stmt() |
721 | { |
722 | switch (get().type) |
723 | { |
724 | case token::kw_graph: |
725 | parse_attr_list(props&: current_graph_props()); |
726 | break; |
727 | case token::kw_node: |
728 | parse_attr_list(props&: current().def_node_props); |
729 | break; |
730 | case token::kw_edge: |
731 | parse_attr_list(props&: current().def_edge_props); |
732 | break; |
733 | default: |
734 | BOOST_ASSERT(!"Bad attr_stmt case" ); |
735 | } |
736 | } |
737 | |
738 | edge_endpoint parse_endpoint() |
739 | { |
740 | switch (peek().type) |
741 | { |
742 | case token::kw_subgraph: |
743 | case token::left_brace: |
744 | case token::identifier: |
745 | { |
746 | token first = get(); |
747 | return parse_endpoint_rest(first_token: first); |
748 | } |
749 | default: |
750 | { |
751 | error(str: "Wanted \"subgraph\", \"{\", or identifier to start node " |
752 | "or subgraph" ); |
753 | return edge_endpoint(); |
754 | } |
755 | } |
756 | } |
757 | |
758 | edge_endpoint parse_endpoint_rest(const token& first_token) |
759 | { |
760 | switch (first_token.type) |
761 | { |
762 | case token::kw_subgraph: |
763 | case token::left_brace: |
764 | return edge_endpoint::subgraph(ep: parse_subgraph(first_token)); |
765 | default: |
766 | return edge_endpoint::node(ep: parse_node_and_port(name: first_token)); |
767 | } |
768 | } |
769 | |
770 | subgraph_name parse_subgraph(const token& first_token) |
771 | { |
772 | std::string name; |
773 | bool is_anonymous = true; |
774 | if (first_token.type == token::kw_subgraph) |
775 | { |
776 | if (peek().type == token::identifier) |
777 | { |
778 | name = get().normalized_value; |
779 | is_anonymous = false; |
780 | } |
781 | } |
782 | if (is_anonymous) |
783 | { |
784 | name = "___subgraph_" |
785 | + boost::lexical_cast< std::string >(arg: ++sgcounter); |
786 | } |
787 | if (subgraphs.find(x: name) == subgraphs.end()) |
788 | { |
789 | subgraphs[name] |
790 | = current(); // Initialize properties and defaults |
791 | subgraphs[name].members.clear(); // Except member list |
792 | } |
793 | if (first_token.type == token::kw_subgraph |
794 | && peek().type != token::left_brace) |
795 | { |
796 | if (is_anonymous) |
797 | error(str: "Subgraph reference needs a name" ); |
798 | return name; |
799 | } |
800 | subgraph_name old_sg = current_subgraph_name; |
801 | current_subgraph_name = name; |
802 | if (peek().type == token::left_brace) |
803 | get(); |
804 | else |
805 | error(str: "Wanted left brace to start subgraph" ); |
806 | parse_stmt_list(); |
807 | if (peek().type == token::right_brace) |
808 | get(); |
809 | else |
810 | error(str: "Wanted right brace to end subgraph" ); |
811 | current_subgraph_name = old_sg; |
812 | return name; |
813 | } |
814 | |
815 | node_and_port parse_node_and_port(const token& name) |
816 | { |
817 | // A node ID is a node name, followed optionally by a port angle and |
818 | // a port location (in either order); a port location is either :id, |
819 | // :id:id, or :(id,id); the last two forms are treated as equivalent |
820 | // although I am not sure about that. |
821 | node_and_port id; |
822 | id.name = name.normalized_value; |
823 | parse_more: |
824 | switch (peek().type) |
825 | { |
826 | case token::at: |
827 | { |
828 | get(); |
829 | if (peek().type != token::identifier) |
830 | error(str: "Wanted identifier as port angle" ); |
831 | if (!id.angle.empty()) |
832 | error(str: "Duplicate port angle" ); |
833 | id.angle = get().normalized_value; |
834 | goto parse_more; |
835 | } |
836 | case token::colon: |
837 | { |
838 | get(); |
839 | if (!id.location.empty()) |
840 | error(str: "Duplicate port location" ); |
841 | switch (peek().type) |
842 | { |
843 | case token::identifier: |
844 | { |
845 | id.location.push_back(x: get().normalized_value); |
846 | switch (peek().type) |
847 | { |
848 | case token::colon: |
849 | { |
850 | get(); |
851 | if (peek().type != token::identifier) |
852 | error(str: "Wanted identifier as port location" ); |
853 | id.location.push_back(x: get().normalized_value); |
854 | goto parse_more; |
855 | } |
856 | default: |
857 | goto parse_more; |
858 | } |
859 | } |
860 | case token::left_paren: |
861 | { |
862 | get(); |
863 | if (peek().type != token::identifier) |
864 | error(str: "Wanted identifier as first element of port " |
865 | "location" ); |
866 | id.location.push_back(x: get().normalized_value); |
867 | if (peek().type != token::comma) |
868 | error(str: "Wanted comma between parts of port location" ); |
869 | get(); |
870 | if (peek().type != token::identifier) |
871 | error(str: "Wanted identifier as second element of port " |
872 | "location" ); |
873 | id.location.push_back(x: get().normalized_value); |
874 | if (peek().type != token::right_paren) |
875 | error( |
876 | str: "Wanted right parenthesis to close port location" ); |
877 | get(); |
878 | goto parse_more; |
879 | } |
880 | default: |
881 | error(str: "Wanted identifier or left parenthesis as start of " |
882 | "port location" ); |
883 | } |
884 | } |
885 | default: |
886 | break; |
887 | } |
888 | if (r.nodes.find(x: id.name) == r.nodes.end()) |
889 | { // First mention |
890 | r.nodes[id.name] = current().def_node_props; |
891 | } |
892 | return id; |
893 | } |
894 | |
895 | void parse_edge_stmt(const edge_endpoint& lhs) |
896 | { |
897 | std::vector< edge_endpoint > nodes_in_chain(1, lhs); |
898 | while (true) |
899 | { |
900 | bool leave_loop = true; |
901 | switch (peek().type) |
902 | { |
903 | case token::dash_dash: |
904 | { |
905 | if (r.graph_is_directed) |
906 | error(str: "Using -- in directed graph" ); |
907 | get(); |
908 | nodes_in_chain.push_back(x: parse_endpoint()); |
909 | leave_loop = false; |
910 | break; |
911 | } |
912 | case token::dash_greater: |
913 | { |
914 | if (!r.graph_is_directed) |
915 | error(str: "Using -> in undirected graph" ); |
916 | get(); |
917 | nodes_in_chain.push_back(x: parse_endpoint()); |
918 | leave_loop = false; |
919 | break; |
920 | } |
921 | default: |
922 | leave_loop = true; |
923 | break; |
924 | } |
925 | if (leave_loop) |
926 | break; |
927 | } |
928 | properties this_edge_props = current().def_edge_props; |
929 | if (peek().type == token::left_bracket) |
930 | parse_attr_list(props&: this_edge_props); |
931 | BOOST_ASSERT(nodes_in_chain.size() |
932 | >= 2); // Should be in node parser otherwise |
933 | for (size_t i = 0; i + 1 < nodes_in_chain.size(); ++i) |
934 | { |
935 | do_orig_edge( |
936 | src: nodes_in_chain[i], tgt: nodes_in_chain[i + 1], props: this_edge_props); |
937 | } |
938 | } |
939 | |
940 | // Do an edge from the file, the edge may need to be expanded if it |
941 | // connects to a subgraph |
942 | void do_orig_edge(const edge_endpoint& src, const edge_endpoint& tgt, |
943 | const properties& props) |
944 | { |
945 | std::set< node_and_port > sources = get_recursive_members(orig_ep: src); |
946 | std::set< node_and_port > targets = get_recursive_members(orig_ep: tgt); |
947 | for (std::set< node_and_port >::const_iterator i = sources.begin(); |
948 | i != sources.end(); ++i) |
949 | { |
950 | for (std::set< node_and_port >::const_iterator j |
951 | = targets.begin(); |
952 | j != targets.end(); ++j) |
953 | { |
954 | do_edge(src: *i, tgt: *j, props); |
955 | } |
956 | } |
957 | } |
958 | |
959 | // Get nodes in an edge_endpoint, recursively |
960 | std::set< node_and_port > get_recursive_members( |
961 | const edge_endpoint& orig_ep) |
962 | { |
963 | std::set< node_and_port > result; |
964 | std::vector< edge_endpoint > worklist(1, orig_ep); |
965 | std::set< subgraph_name > done; |
966 | while (!worklist.empty()) |
967 | { |
968 | edge_endpoint ep = worklist.back(); |
969 | worklist.pop_back(); |
970 | if (ep.is_subgraph) |
971 | { |
972 | if (done.find(x: ep.subgraph_ep) == done.end()) |
973 | { |
974 | done.insert(x: ep.subgraph_ep); |
975 | std::map< subgraph_name, subgraph_info >::const_iterator |
976 | info_i |
977 | = subgraphs.find(x: ep.subgraph_ep); |
978 | if (info_i != subgraphs.end()) |
979 | { |
980 | const subgraph_member_list& members |
981 | = info_i->second.members; |
982 | for (subgraph_member_list::const_iterator i |
983 | = members.begin(); |
984 | i != members.end(); ++i) |
985 | { |
986 | node_or_subgraph_ref ref = *i; |
987 | if (ref.is_subgraph) |
988 | { |
989 | worklist.push_back( |
990 | x: edge_endpoint::subgraph(ep: ref.name)); |
991 | } |
992 | else |
993 | { |
994 | node_and_port np; |
995 | np.name = ref.name; |
996 | worklist.push_back(x: edge_endpoint::node(ep: np)); |
997 | } |
998 | } |
999 | } |
1000 | } |
1001 | } |
1002 | else |
1003 | { |
1004 | result.insert(x: ep.node_ep); |
1005 | } |
1006 | } |
1007 | return result; |
1008 | } |
1009 | |
1010 | // Do a fixed-up edge, with only nodes as endpoints |
1011 | void do_edge(const node_and_port& src, const node_and_port& tgt, |
1012 | const properties& props) |
1013 | { |
1014 | if (r.graph_is_strict) |
1015 | { |
1016 | if (src.name == tgt.name) |
1017 | return; |
1018 | std::pair< node_name, node_name > tag(src.name, tgt.name); |
1019 | if (existing_edges.find(x: tag) != existing_edges.end()) |
1020 | { |
1021 | return; // Parallel edge |
1022 | } |
1023 | existing_edges.insert(x: tag); |
1024 | } |
1025 | edge_info e; |
1026 | e.source = src; |
1027 | e.target = tgt; |
1028 | e.props = props; |
1029 | r.edges.push_back(x: e); |
1030 | } |
1031 | |
1032 | void parse_attr_list(properties& props) |
1033 | { |
1034 | while (true) |
1035 | { |
1036 | if (peek().type == token::left_bracket) |
1037 | get(); |
1038 | else |
1039 | error(str: "Wanted left bracket to start attribute list" ); |
1040 | while (true) |
1041 | { |
1042 | switch (peek().type) |
1043 | { |
1044 | case token::right_bracket: |
1045 | break; |
1046 | case token::identifier: |
1047 | { |
1048 | std::string lhs = get().normalized_value; |
1049 | std::string rhs = "true" ; |
1050 | if (peek().type == token::equal) |
1051 | { |
1052 | get(); |
1053 | if (peek().type != token::identifier) |
1054 | error( |
1055 | str: "Wanted identifier as value of attribute" ); |
1056 | rhs = get().normalized_value; |
1057 | } |
1058 | props[lhs] = rhs; |
1059 | break; |
1060 | } |
1061 | default: |
1062 | error(str: "Wanted identifier as name of attribute" ); |
1063 | } |
1064 | if (peek().type == token::comma |
1065 | || peek().type == token::semicolon) |
1066 | get(); |
1067 | else if (peek().type == token::right_bracket) |
1068 | break; |
1069 | } |
1070 | if (peek().type == token::right_bracket) |
1071 | get(); |
1072 | else |
1073 | error(str: "Wanted right bracket to end attribute list" ); |
1074 | if (peek().type != token::left_bracket) |
1075 | break; |
1076 | } |
1077 | } |
1078 | }; |
1079 | |
1080 | void parse_graphviz_from_string( |
1081 | const std::string& str, parser_result& result, bool want_directed) |
1082 | { |
1083 | parser p(str, result); |
1084 | p.parse_graph(want_directed); |
1085 | } |
1086 | |
1087 | // Some debugging stuff |
1088 | std::ostream& operator<<(std::ostream& o, const node_and_port& n) |
1089 | { |
1090 | o << n.name; |
1091 | for (size_t i = 0; i < n.location.size(); ++i) |
1092 | { |
1093 | o << ":" << n.location[i]; |
1094 | } |
1095 | if (!n.angle.empty()) |
1096 | o << "@" << n.angle; |
1097 | return o; |
1098 | } |
1099 | |
1100 | // Can't be operator<< because properties is just an std::map |
1101 | std::string props_to_string(const properties& props) |
1102 | { |
1103 | std::string result = "[" ; |
1104 | for (properties::const_iterator i = props.begin(); i != props.end(); |
1105 | ++i) |
1106 | { |
1107 | if (i != props.begin()) |
1108 | result += ", " ; |
1109 | result += i->first + "=" + i->second; |
1110 | } |
1111 | result += "]" ; |
1112 | return result; |
1113 | } |
1114 | |
1115 | void translate_results_to_graph( |
1116 | const parser_result& r, ::boost::detail::graph::mutate_graph* mg) |
1117 | { |
1118 | typedef boost::detail::graph::edge_t edge; |
1119 | for (std::map< node_name, properties >::const_iterator i |
1120 | = r.nodes.begin(); |
1121 | i != r.nodes.end(); ++i) |
1122 | { |
1123 | // std::cerr << i->first << " " << props_to_string(i->second) << |
1124 | // std::endl; |
1125 | mg->do_add_vertex(node: i->first); |
1126 | for (properties::const_iterator j = i->second.begin(); |
1127 | j != i->second.end(); ++j) |
1128 | { |
1129 | mg->set_node_property(key: j->first, node: i->first, value: j->second); |
1130 | } |
1131 | } |
1132 | for (std::vector< edge_info >::const_iterator i = r.edges.begin(); |
1133 | i != r.edges.end(); ++i) |
1134 | { |
1135 | const edge_info& ei = *i; |
1136 | // std::cerr << ei.source << " -> " << ei.target << " " << |
1137 | // props_to_string(ei.props) << std::endl; |
1138 | edge e = edge::new_edge(); |
1139 | mg->do_add_edge(edge: e, source: ei.source.name, target: ei.target.name); |
1140 | for (properties::const_iterator j = ei.props.begin(); |
1141 | j != ei.props.end(); ++j) |
1142 | { |
1143 | mg->set_edge_property(key: j->first, edge: e, value: j->second); |
1144 | } |
1145 | } |
1146 | std::map< subgraph_name, properties >::const_iterator root_graph_props_i |
1147 | = r.graph_props.find(x: "___root___" ); |
1148 | BOOST_ASSERT( |
1149 | root_graph_props_i != r.graph_props.end()); // Should not happen |
1150 | const properties& root_graph_props = root_graph_props_i->second; |
1151 | // std::cerr << "ending graph " << props_to_string(root_graph_props) << |
1152 | // std::endl; |
1153 | for (properties::const_iterator i = root_graph_props.begin(); |
1154 | i != root_graph_props.end(); ++i) |
1155 | { |
1156 | mg->set_graph_property(key: i->first, value: i->second); |
1157 | } |
1158 | mg->finish_building_graph(); |
1159 | } |
1160 | |
1161 | } // end namespace read_graphviz_detail |
1162 | |
1163 | namespace detail |
1164 | { |
1165 | namespace graph |
1166 | { |
1167 | |
1168 | BOOST_GRAPH_DECL bool read_graphviz_new( |
1169 | const std::string& str, boost::detail::graph::mutate_graph* mg) |
1170 | { |
1171 | read_graphviz_detail::parser_result parsed_file; |
1172 | read_graphviz_detail::parse_graphviz_from_string( |
1173 | str, result&: parsed_file, want_directed: mg->is_directed()); |
1174 | read_graphviz_detail::translate_results_to_graph(r: parsed_file, mg); |
1175 | return true; |
1176 | } |
1177 | |
1178 | } // end namespace graph |
1179 | } // end namespace detail |
1180 | |
1181 | } // end namespace boost |
1182 | |
1183 | // GraphViz format notes (tested using "dot version 1.13 (v16) (Mon August 23, |
1184 | // 2004)", grammar from references in read_graphviz_new.hpp): |
1185 | |
1186 | // Subgraphs don't nest (all a0 subgraphs are the same thing), but a node or |
1187 | // subgraph can have multiple parents (sources online say that the layout |
1188 | // algorithms can't handle non-tree structures of clusters, but it seems to |
1189 | // read them the same from the file). The containment relation is required to |
1190 | // be a DAG, though; it appears that a node or subgraph can't be moved into an |
1191 | // ancestor of a subgraph where it already was (we don't enforce that but do a |
1192 | // DFS when finding members to prevent cycles). Nodes get their properties by |
1193 | // when they are first mentioned, and can only have them overridden after that |
1194 | // by explicit properties on that particular node. Closing and reopening the |
1195 | // same subgraph name adds to its members, and graph properties and node/edge |
1196 | // defaults are preserved in that subgraph. The members of a subgraph used in |
1197 | // an edge are gathered when the edge is read, even if new members are added to |
1198 | // the subgraph later. Ports are allowed in a lot more places in the grammar |
1199 | // than Dot uses. For example, declaring a node seems to ignore ports, and I |
1200 | // don't think it's possible to set properties on a particular port. Adding an |
1201 | // edge between two ports on the same node seems to make Dot unhappy (crashed |
1202 | // for me). |
1203 | |
1204 | // Test graph for GraphViz behavior on strange combinations of subgraphs and |
1205 | // such. I don't have anywhere else to put this file. |
1206 | |
1207 | #if 0 |
1208 | dIGRaph foo { |
1209 | node [color=blue] |
1210 | subgraph a -> b |
1211 | subgraph a {c} |
1212 | subgraph a -> d |
1213 | subgraph a {node [color=red]; e} |
1214 | subgraph a -> f |
1215 | subgraph a {g} -> h |
1216 | subgraph a {node [color=green]; i} -> j |
1217 | subgraph a {node [color=yellow]} |
1218 | |
1219 | subgraph a0 {node [color=black]; subgraph a1 {node [color=white]}} |
1220 | node [color=pink] zz |
1221 | subgraph a0 {x1} |
1222 | subgraph a0 {subgraph a1 {x2}} |
1223 | |
1224 | subgraph a0 -> x3 |
1225 | subgraph a0 {subgraph a1 -> x3} |
1226 | x3 |
1227 | subgraph a0 {subgraph a0 {node [color=xxx]; x2} x7} |
1228 | x2 [color=yyy] |
1229 | subgraph cluster_ax {foo; subgraph a0} |
1230 | subgraph a0 {foo2} |
1231 | subgraph cluster_ax {foo3} |
1232 | // subgraph a0 -> subgraph a0 |
1233 | |
1234 | bgcolor=yellow |
1235 | subgraph cluster_a2 {y1} |
1236 | // y1:n -> y1:(s,3)@se |
1237 | y1@se [color=green] |
1238 | y1@n [color=red] |
1239 | } |
1240 | #endif |
1241 | |