read_graphviz_new.cpp source code [boost/libs/graph/src/read_graphviz_new.cpp]

1	// Copyright 2004-9 Trustees of Indiana University
2
3	// Distributed under the Boost Software License, Version 1.0.
4	// (See accompanying file LICENSE_1_0.txt or copy at
5	// http://www.boost.org/LICENSE_1_0.txt)
6
7	//
8	// read_graphviz_new.cpp -
9	// Initialize a model of the BGL's MutableGraph concept and an associated
10	// collection of property maps using a graph expressed in the GraphViz
11	// DOT Language.
12	//
13	// Based on the grammar found at:
14	// https://web.archive.org/web/20041213234742/http://www.graphviz.org/cvs/doc/info/lang.html
15	//
16	// Jeremiah rewrite used grammar found at:
17	// http://www.graphviz.org/doc/info/lang.html
18	// and page 34 or http://www.graphviz.org/pdf/dotguide.pdf
19	//
20	// See documentation for this code at:
21	// http://www.boost.org/libs/graph/doc/read_graphviz.html
22	//
23
24	// Author: Jeremiah Willcock
25	// Ronald Garcia
26	//
27
28	#define BOOST_GRAPH_SOURCE
29	#include <boost/assert.hpp>
30	#include <boost/ref.hpp>
31	#include <boost/function/function2.hpp>
32	#include <boost/property_map/dynamic_property_map.hpp>
33	#include <boost/graph/graph_traits.hpp>
34	#include <boost/detail/workaround.hpp>
35	#include <boost/algorithm/string/case_conv.hpp>
36	#include <cstdlib>
37	#include <algorithm>
38	#include <exception> // for std::exception
39	#include <iostream>
40	#include <map>
41	#include <set>
42	#include <string>
43	#include <vector>
44	#include <utility>
45	#include <boost/throw_exception.hpp>
46	#include <boost/regex.hpp>
47	#include <boost/function.hpp>
48	#include <boost/graph/dll_import_export.hpp>
49	#include <boost/graph/graphviz.hpp>
50
51	namespace boost
52	{
53
54	namespace read_graphviz_detail
55	{
56	struct token
57	{
58	enum token_type
59	{
60	kw_strict,
61	kw_graph,
62	kw_digraph,
63	kw_node,
64	kw_edge,
65	kw_subgraph,
66	left_brace,
67	right_brace,
68	semicolon,
69	equal,
70	left_bracket,
71	right_bracket,
72	comma,
73	colon,
74	dash_greater,
75	dash_dash,
76	plus,
77	left_paren,
78	right_paren,
79	at,
80	identifier,
81	quoted_string, // Only used internally in tokenizer
82	eof,
83	invalid
84	};
85	token_type type;
86	std::string normalized_value; // May have double-quotes removed and/or
87	// some escapes replaced
88	token(token_type type, const std::string& normalized_value)
89	: type(type), normalized_value (normalized_value)
90	{
91	}
92	token() : type(invalid), normalized_value ("") {}
93	friend std::ostream& operator<<(std::ostream& o, const token& t)
94	{
95	switch (t.type)
96	{
97	case token::kw_strict:
98	o << "<strict>";
99	break;
100	case token::kw_graph:
101	o << "<graph>";
102	break;
103	case token::kw_digraph:
104	o << "<digraph>";
105	break;
106	case token::kw_node:
107	o << "<node>";
108	break;
109	case token::kw_edge:
110	o << "<edge>";
111	break;
112	case token::kw_subgraph:
113	o << "<subgraph>";
114	break;
115	case token::left_brace:
116	o << "<left_brace>";
117	break;
118	case token::right_brace:
119	o << "<right_brace>";
120	break;
121	case token::semicolon:
122	o << "<semicolon>";
123	break;
124	case token::equal:
125	o << "<equal>";
126	break;
127	case token::left_bracket:
128	o << "<left_bracket>";
129	break;
130	case token::right_bracket:
131	o << "<right_bracket>";
132	break;
133	case token::comma:
134	o << "<comma>";
135	break;
136	case token::colon:
137	o << "<colon>";
138	break;
139	case token::dash_greater:
140	o << "<dash-greater>";
141	break;
142	case token::dash_dash:
143	o << "<dash-dash>";
144	break;
145	case token::plus:
146	o << "<plus>";
147	break;
148	case token::left_paren:
149	o << "<left_paren>";
150	break;
151	case token::right_paren:
152	o << "<right_paren>";
153	break;
154	case token::at:
155	o << "<at>";
156	break;
157	case token::identifier:
158	o << "<identifier>";
159	break;
160	case token::quoted_string:
161	o << "<quoted_string>";
162	break;
163	case token::eof:
164	o << "<eof>";
165	break;
166	default:
167	o << "<invalid type>";
168	break;
169	}
170	o << " '" << t.normalized_value << "'";
171	return o;
172	}
173	};
174
175	bad_graphviz_syntax lex_error(const std::string& errmsg, char bad_char)
176	{
177	if (bad_char == `'\0'`)
178	{
179	return bad_graphviz_syntax (errmsg + " (at end of input)");
180	}
181	else
182	{
183	return bad_graphviz_syntax (
184	errmsg + " (char is '" + bad_char + "')");
185	}
186	}
187
188	bad_graphviz_syntax parse_error(
189	const std::string& errmsg, const token& bad_token)
190	{
191	return bad_graphviz_syntax (errmsg + " (token is \""
192	+ boost::lexical_cast< std::string >(arg: bad_token) + "\")");
193	}
194
195	struct tokenizer
196	{
197	std::string::const_iterator begin, end;
198	std::vector< token > lookahead;
199	// Precomputed regexes
200	boost::regex stuff_to_skip;
201	boost::regex basic_id_token;
202	boost::regex punctuation_token;
203	boost::regex number_token;
204	boost::regex quoted_string_token;
205	boost::regex xml_tag_token;
206	boost::regex cdata;
207
208	tokenizer(const std::string& str) : begin(str.begin()), end(str.end())
209	{
210	std::string end_of_token = "(?=(?:\\W))";
211	std::string whitespace = "(?:\\s+)";
212	std::string slash_slash_comment = "(?://.*?$)";
213	std::string slash_star_comment = "(?:/\\.?\\*/)";
214	std::string hash_comment = "(?:^#.*?$)";
215	std::string backslash_newline = "(?:[\\\\][\\n])";
216	stuff_to_skip = "\\A(?:" + whitespace + "\|" + slash_slash_comment
217	+ "\|" + slash_star_comment + "\|" + hash_comment + "\|"
218	+ backslash_newline + ")*";
219	basic_id_token = "\\A([[:alpha:]_](?:\\w*))";
220	punctuation_token = "\\A([][{};=,:+()@]\|[-][>-])";
221	number_token = "\\A([-]?(?:(?:\\.\\d+)\|(?:\\d+(?:\\.\\d*)?)))";
222	quoted_string_token = "\\A(\"(?:[^\"\\\\]\|(?:[\\\\].))*\")";
223	xml_tag_token
224	= "\\A<(/?)(?:[^!?'\"]\|(?:'[^']?')\|(?:\"[^\"]?\"))*?(/?)>";
225	cdata = "\\A\\Q<![CDATA[\\E.*?\\Q]]>\\E";
226	}
227
228	void skip()
229	{
230	boost::match_results< std::string::const_iterator > results;
231	#ifndef NDEBUG
232	bool found =
233	#endif
234	boost::regex_search(first: begin, last: end, m&: results, e: stuff_to_skip);
235	#ifndef NDEBUG
236	BOOST_ASSERT(found);
237	#endif
238	boost::sub_match< std::string::const_iterator > sm1
239	= results.suffix();
240	BOOST_ASSERT(sm1.second == end);
241	begin = sm1.first;
242	}
243
244	token get_token_raw()
245	{
246	if (!lookahead.empty())
247	{
248	token t = lookahead.front();
249	lookahead.erase(position: lookahead.begin());
250	return t;
251	}
252	skip();
253	if (begin == end)
254	return token (token::eof, "");
255	// Look for keywords first
256	bool found;
257	boost::match_results< std::string::const_iterator > results;
258	found = boost::regex_search(first: begin, last: end, m&: results, e: basic_id_token);
259	if (found)
260	{
261	std::string str = results [`1`].str();
262	std::string str_lower = boost::algorithm::to_lower_copy(Input: str);
263	begin = results.suffix().first;
264	if (str_lower == "strict")
265	{
266	return token (token::kw_strict, str);
267	}
268	else if (str_lower == "graph")
269	{
270	return token (token::kw_graph, str);
271	}
272	else if (str_lower == "digraph")
273	{
274	return token (token::kw_digraph, str);
275	}
276	else if (str_lower == "node")
277	{
278	return token (token::kw_node, str);
279	}
280	else if (str_lower == "edge")
281	{
282	return token (token::kw_edge, str);
283	}
284	else if (str_lower == "subgraph")
285	{
286	return token (token::kw_subgraph, str);
287	}
288	else
289	{
290	return token (token::identifier, str);
291	}
292	}
293	found = boost::regex_search(first: begin, last: end, m&: results, e: punctuation_token);
294	if (found)
295	{
296	std::string str = results [`1`].str();
297	begin = results.suffix().first;
298	switch (str [`0`])
299	{
300	case `'['`:
301	return token (token::left_bracket, str);
302	case `']'`:
303	return token (token::right_bracket, str);
304	case `'{'`:
305	return token (token::left_brace, str);
306	case `'}'`:
307	return token (token::right_brace, str);
308	case `';'`:
309	return token (token::semicolon, str);
310	case `'='`:
311	return token (token::equal, str);
312	case `','`:
313	return token (token::comma, str);
314	case `':'`:
315	return token (token::colon, str);
316	case `'+'`:
317	return token (token::plus, str);
318	case `'('`:
319	return token (token::left_paren, str);
320	case `')'`:
321	return token (token::right_paren, str);
322	case `'@'`:
323	return token (token::at, str);
324	case `'-'`:
325	{
326	switch (str [`1`])
327	{
328	case `'-'`:
329	return token (token::dash_dash, str);
330	case `'>'`:
331	return token (token::dash_greater, str);
332	default:
333	BOOST_ASSERT(!"Definition of punctuation_token does "
334	"not match switch statement");
335	}
336	// Prevent static analyzers complaining about fallthrough:
337	break;
338	}
339	default:
340	BOOST_ASSERT(!"Definition of punctuation_token does not "
341	"match switch statement");
342	}
343	}
344	found = boost::regex_search(first: begin, last: end, m&: results, e: number_token);
345	if (found)
346	{
347	std::string str = results [`1`].str();
348	begin = results.suffix().first;
349	return token (token::identifier, str);
350	}
351	found
352	= boost::regex_search(first: begin, last: end, m&: results, e: quoted_string_token);
353	if (found)
354	{
355	std::string str = results [`1`].str();
356	begin = results.suffix().first;
357	// Remove the beginning and ending quotes
358	BOOST_ASSERT(str.size() >= `2`);
359	str.erase(position: str.begin());
360	str.erase(position: str.end() - `1`);
361	// Unescape quotes in the middle, but nothing else (see format
362	// spec)
363	for (size_t i = `0`; i + `1` < str.size() / May change /; ++i)
364	{
365	if (str [i] == `'\\'` && str [i + `1`] == `'"'`)
366	{
367	str.erase(position: str.begin() + i);
368	// Don't need to adjust i
369	}
370	else if (str [i] == `'\\'` && str [i + `1`] == `'\n'`)
371	{
372	str.erase(position: str.begin() + i);
373	str.erase(position: str.begin() + i);
374	--i; // Invert ++ that will be applied
375	}
376	}
377	return token (token::quoted_string, str);
378	}
379	if (*begin == `'<'`)
380	{
381	std::string::const_iterator saved_begin = begin;
382	int counter = `0`;
383	do
384	{
385	if (begin == end)
386	throw_lex_error(errmsg: "Unclosed HTML string");
387	if (*begin != `'<'`)
388	{
389	++begin;
390	continue;
391	}
392	found = boost::regex_search(
393	first: begin, last: end, m&: results, e: xml_tag_token);
394	if (found)
395	{
396	begin = results.suffix().first;
397	if (results [`1`].str() == "/")
398	{ // Close tag
399	--counter;
400	}
401	else if (results [`2`].str() == "/")
402	{ // Empty tag
403	}
404	else
405	{ // Open tag
406	++counter;
407	}
408	continue;
409	}
410	found = boost::regex_search(first: begin, last: end, m&: results, e: cdata);
411	if (found)
412	{
413	begin = results.suffix().first;
414	continue;
415	}
416	throw_lex_error(errmsg: "Invalid contents in HTML string");
417	} while (counter > `0`);
418	return token (
419	token::identifier, std::string (saved_begin, begin));
420	}
421	else
422	{
423	throw_lex_error(errmsg: "Invalid character");
424	return token ();
425	}
426	}
427
428	token peek_token_raw()
429	{
430	if (lookahead.empty())
431	{
432	token t = get_token_raw();
433	lookahead.push_back(x: t);
434	}
435	return lookahead.front();
436	}
437
438	token get_token()
439	{ // Handle string concatenation
440	token t = get_token_raw();
441	if (t.type != token::quoted_string)
442	return t;
443	std::string str = t.normalized_value;
444	while (peek_token_raw().type == token::plus)
445	{
446	get_token_raw();
447	token t2 = get_token_raw();
448	if (t2.type != token::quoted_string)
449	{
450	throw_lex_error(
451	errmsg: "Must have quoted string after string concatenation");
452	}
453	str += t2.normalized_value;
454	}
455	return token (
456	token::identifier, str); // Note that quoted_string does not get
457	// passed to the parser
458	}
459
460	void throw_lex_error(const std::string& errmsg)
461	{
462	boost::throw_exception(
463	e: lex_error(errmsg, bad_char: (begin == end ? `'\0'` : *begin)));
464	}
465	};
466
467	struct edge_endpoint
468	{
469	bool is_subgraph;
470	node_and_port node_ep;
471	subgraph_name subgraph_ep;
472
473	static edge_endpoint node(const node_and_port& ep)
474	{
475	edge_endpoint r;
476	r.is_subgraph = false;
477	r.node_ep = ep;
478	return r;
479	}
480
481	static edge_endpoint subgraph(const subgraph_name& ep)
482	{
483	edge_endpoint r;
484	r.is_subgraph = true;
485	r.subgraph_ep = ep;
486	return r;
487	}
488	};
489
490	struct node_or_subgraph_ref
491	{
492	bool is_subgraph;
493	std::string
494	name; // Name for subgraphs or nodes, "___root___" for root graph
495	};
496
497	static node_or_subgraph_ref noderef(const node_name& n)
498	{
499	node_or_subgraph_ref r;
500	r.is_subgraph = false;
501	r.name = n;
502	return r;
503	}
504
505	static node_or_subgraph_ref subgraphref(const subgraph_name& n)
506	{
507	node_or_subgraph_ref r;
508	r.is_subgraph = true;
509	r.name = n;
510	return r;
511	}
512
513	typedef std::vector< node_or_subgraph_ref > subgraph_member_list;
514
515	struct subgraph_info
516	{
517	properties def_node_props;
518	properties def_edge_props;
519	subgraph_member_list members;
520	};
521
522	struct parser
523	{
524	tokenizer the_tokenizer;
525	std::vector< token > lookahead;
526	parser_result& r;
527	std::map< subgraph_name, subgraph_info > subgraphs;
528	std::string current_subgraph_name;
529	int sgcounter; // Counter for anonymous subgraphs
530	std::set< std::pair< node_name, node_name > >
531	existing_edges; // Used for checking in strict graphs
532
533	subgraph_info& current() { return subgraphs [current_subgraph_name]; }
534	properties& current_graph_props()
535	{
536	return r.graph_props [current_subgraph_name];
537	}
538	subgraph_member_list& current_members() { return current().members; }
539
540	parser(const std::string& gr, parser_result& result)
541	: the_tokenizer (gr), lookahead (), r(result), sgcounter(`0`)
542	{
543	current_subgraph_name = "___root___";
544	current() = subgraph_info (); // Initialize root graph
545	current_graph_props().clear();
546	current_members().clear();
547	}
548
549	token get()
550	{
551	if (lookahead.empty())
552	{
553	token t = the_tokenizer.get_token();
554	return t;
555	}
556	else
557	{
558	token t = lookahead.front();
559	lookahead.erase(position: lookahead.begin());
560	return t;
561	}
562	}
563
564	token peek()
565	{
566	if (lookahead.empty())
567	{
568	lookahead.push_back(x: the_tokenizer.get_token());
569	}
570	return lookahead.front();
571	}
572
573	void error(const std::string& str)
574	{
575	boost::throw_exception(e: parse_error(errmsg: str, bad_token: peek()));
576	}
577
578	void parse_graph(bool want_directed)
579	{
580	bool is_strict = false;
581	bool is_directed = false;
582	std::string name;
583	if (peek().type == token::kw_strict)
584	{
585	get();
586	is_strict = true;
587	}
588	switch (peek().type)
589	{
590	case token::kw_graph:
591	is_directed = false;
592	break;
593	case token::kw_digraph:
594	is_directed = true;
595	break;
596	default:
597	error(str: "Wanted \"graph\" or \"digraph\"");
598	}
599	r.graph_is_directed = is_directed; // Used to check edges
600	r.graph_is_strict = is_strict;
601	if (want_directed != r.graph_is_directed)
602	{
603	if (want_directed)
604	{
605	boost::throw_exception(e: boost::undirected_graph_error ());
606	}
607	else
608	{
609	boost::throw_exception(e: boost::directed_graph_error ());
610	}
611	}
612	get();
613	switch (peek().type)
614	{
615	case token::identifier:
616	name = peek().normalized_value;
617	get();
618	break;
619	case token::left_brace:
620	break;
621	default:
622	error(str: "Wanted a graph name or left brace");
623	}
624	if (peek().type == token::left_brace)
625	get();
626	else
627	error(str: "Wanted a left brace to start the graph");
628	parse_stmt_list();
629	if (peek().type == token::right_brace)
630	get();
631	else
632	error(str: "Wanted a right brace to end the graph");
633	if (peek().type == token::eof)
634	{
635	}
636	else
637	error(str: "Wanted end of file");
638	}
639
640	void parse_stmt_list()
641	{
642	while (true)
643	{
644	if (peek().type == token::right_brace)
645	return;
646	parse_stmt();
647	if (peek().type == token::semicolon)
648	get();
649	}
650	}
651
652	void parse_stmt()
653	{
654	switch (peek().type)
655	{
656	case token::kw_node:
657	case token::kw_edge:
658	case token::kw_graph:
659	parse_attr_stmt();
660	break;
661	case token::kw_subgraph:
662	case token::left_brace:
663	case token::identifier:
664	{
665	token id = get();
666	if (id.type == token::identifier && peek().type == token::equal)
667	{ // Graph property
668	get();
669	if (peek().type != token::identifier)
670	error(str: "Wanted identifier as right side of =");
671	token id2 = get();
672	current_graph_props()[id.normalized_value]
673	= id2.normalized_value;
674	}
675	else
676	{
677	edge_endpoint ep = parse_endpoint_rest(first_token: id);
678	if (peek().type == token::dash_dash
679	\|\| peek().type == token::dash_greater)
680	{ // Edge
681	parse_edge_stmt(lhs: ep);
682	}
683	else
684	{
685	if (!ep.is_subgraph)
686	{ // Only nodes can have attribute lists
687	// This node already exists because of its first
688	// mention (properties set to defaults by
689	// parse_node_and_port, called by
690	// parse_endpoint_rest)
691	properties this_node_props;
692	if (peek().type == token::left_bracket)
693	{
694	parse_attr_list(props&: this_node_props);
695	}
696	for (properties::const_iterator i
697	= this_node_props.begin();
698	i != this_node_props.end(); ++i)
699	{
700	// Override old properties with same names
701	r.nodes [ep.node_ep.name][i ->first] = i ->second;
702	}
703	current_members().push_back(
704	x: noderef(n: ep.node_ep.name));
705	}
706	else
707	{
708	current_members().push_back(
709	x: subgraphref(n: ep.subgraph_ep));
710	}
711	}
712	}
713	break;
714	}
715	default:
716	error(str: "Invalid start token for statement");
717	}
718	}
719
720	void parse_attr_stmt()
721	{
722	switch (get().type)
723	{
724	case token::kw_graph:
725	parse_attr_list(props&: current_graph_props());
726	break;
727	case token::kw_node:
728	parse_attr_list(props&: current().def_node_props);
729	break;
730	case token::kw_edge:
731	parse_attr_list(props&: current().def_edge_props);
732	break;
733	default:
734	BOOST_ASSERT(!"Bad attr_stmt case");
735	}
736	}
737
738	edge_endpoint parse_endpoint()
739	{
740	switch (peek().type)
741	{
742	case token::kw_subgraph:
743	case token::left_brace:
744	case token::identifier:
745	{
746	token first = get();
747	return parse_endpoint_rest(first_token: first);
748	}
749	default:
750	{
751	error(str: "Wanted \"subgraph\", \"{\", or identifier to start node "
752	"or subgraph");
753	return edge_endpoint ();
754	}
755	}
756	}
757
758	edge_endpoint parse_endpoint_rest(const token& first_token)
759	{
760	switch (first_token.type)
761	{
762	case token::kw_subgraph:
763	case token::left_brace:
764	return edge_endpoint::subgraph(ep: parse_subgraph(first_token));
765	default:
766	return edge_endpoint::node(ep: parse_node_and_port(name: first_token));
767	}
768	}
769
770	subgraph_name parse_subgraph(const token& first_token)
771	{
772	std::string name;
773	bool is_anonymous = true;
774	if (first_token.type == token::kw_subgraph)
775	{
776	if (peek().type == token::identifier)
777	{
778	name = get().normalized_value;
779	is_anonymous = false;
780	}
781	}
782	if (is_anonymous)
783	{
784	name = "___subgraph_"
785	+ boost::lexical_cast< std::string >(arg: ++sgcounter);
786	}
787	if (subgraphs.find(x: name) == subgraphs.end())
788	{
789	subgraphs [name]
790	= current(); // Initialize properties and defaults
791	subgraphs [name].members.clear(); // Except member list
792	}
793	if (first_token.type == token::kw_subgraph
794	&& peek().type != token::left_brace)
795	{
796	if (is_anonymous)
797	error(str: "Subgraph reference needs a name");
798	return name;
799	}
800	subgraph_name old_sg = current_subgraph_name;
801	current_subgraph_name = name;
802	if (peek().type == token::left_brace)
803	get();
804	else
805	error(str: "Wanted left brace to start subgraph");
806	parse_stmt_list();
807	if (peek().type == token::right_brace)
808	get();
809	else
810	error(str: "Wanted right brace to end subgraph");
811	current_subgraph_name = old_sg;
812	return name;
813	}
814
815	node_and_port parse_node_and_port(const token& name)
816	{
817	// A node ID is a node name, followed optionally by a port angle and
818	// a port location (in either order); a port location is either :id,
819	// :id:id, or :(id,id); the last two forms are treated as equivalent
820	// although I am not sure about that.
821	node_and_port id;
822	id.name = name.normalized_value;
823	parse_more:
824	switch (peek().type)
825	{
826	case token::at:
827	{
828	get();
829	if (peek().type != token::identifier)
830	error(str: "Wanted identifier as port angle");
831	if (!id.angle.empty())
832	error(str: "Duplicate port angle");
833	id.angle = get().normalized_value;
834	goto parse_more;
835	}
836	case token::colon:
837	{
838	get();
839	if (!id.location.empty())
840	error(str: "Duplicate port location");
841	switch (peek().type)
842	{
843	case token::identifier:
844	{
845	id.location.push_back(x: get().normalized_value);
846	switch (peek().type)
847	{
848	case token::colon:
849	{
850	get();
851	if (peek().type != token::identifier)
852	error(str: "Wanted identifier as port location");
853	id.location.push_back(x: get().normalized_value);
854	goto parse_more;
855	}
856	default:
857	goto parse_more;
858	}
859	}
860	case token::left_paren:
861	{
862	get();
863	if (peek().type != token::identifier)
864	error(str: "Wanted identifier as first element of port "
865	"location");
866	id.location.push_back(x: get().normalized_value);
867	if (peek().type != token::comma)
868	error(str: "Wanted comma between parts of port location");
869	get();
870	if (peek().type != token::identifier)
871	error(str: "Wanted identifier as second element of port "
872	"location");
873	id.location.push_back(x: get().normalized_value);
874	if (peek().type != token::right_paren)
875	error(
876	str: "Wanted right parenthesis to close port location");
877	get();
878	goto parse_more;
879	}
880	default:
881	error(str: "Wanted identifier or left parenthesis as start of "
882	"port location");
883	}
884	}
885	default:
886	break;
887	}
888	if (r.nodes.find(x: id.name) == r.nodes.end())
889	{ // First mention
890	r.nodes [id.name] = current().def_node_props;
891	}
892	return id;
893	}
894
895	void parse_edge_stmt(const edge_endpoint& lhs)
896	{
897	std::vector< edge_endpoint > nodes_in_chain(`1`, lhs);
898	while (true)
899	{
900	bool leave_loop = true;
901	switch (peek().type)
902	{
903	case token::dash_dash:
904	{
905	if (r.graph_is_directed)
906	error(str: "Using -- in directed graph");
907	get();
908	nodes_in_chain.push_back(x: parse_endpoint());
909	leave_loop = false;
910	break;
911	}
912	case token::dash_greater:
913	{
914	if (!r.graph_is_directed)
915	error(str: "Using -> in undirected graph");
916	get();
917	nodes_in_chain.push_back(x: parse_endpoint());
918	leave_loop = false;
919	break;
920	}
921	default:
922	leave_loop = true;
923	break;
924	}
925	if (leave_loop)
926	break;
927	}
928	properties this_edge_props = current().def_edge_props;
929	if (peek().type == token::left_bracket)
930	parse_attr_list(props&: this_edge_props);
931	BOOST_ASSERT(nodes_in_chain.size()
932	>= `2`); // Should be in node parser otherwise
933	for (size_t i = `0`; i + `1` < nodes_in_chain.size(); ++i)
934	{
935	do_orig_edge(
936	src: nodes_in_chain [i], tgt: nodes_in_chain [i + `1`], props: this_edge_props);
937	}
938	}
939
940	// Do an edge from the file, the edge may need to be expanded if it
941	// connects to a subgraph
942	void do_orig_edge(const edge_endpoint& src, const edge_endpoint& tgt,
943	const properties& props)
944	{
945	std::set< node_and_port > sources = get_recursive_members(orig_ep: src);
946	std::set< node_and_port > targets = get_recursive_members(orig_ep: tgt);
947	for (std::set< node_and_port >::const_iterator i = sources.begin();
948	i != sources.end(); ++i)
949	{
950	for (std::set< node_and_port >::const_iterator j
951	= targets.begin();
952	j != targets.end(); ++j)
953	{
954	do_edge(src: i, tgt: j, props);
955	}
956	}
957	}
958
959	// Get nodes in an edge_endpoint, recursively
960	std::set< node_and_port > get_recursive_members(
961	const edge_endpoint& orig_ep)
962	{
963	std::set< node_and_port > result;
964	std::vector< edge_endpoint > worklist(`1`, orig_ep);
965	std::set< subgraph_name > done;
966	while (!worklist.empty())
967	{
968	edge_endpoint ep = worklist.back();
969	worklist.pop_back();
970	if (ep.is_subgraph)
971	{
972	if (done.find(x: ep.subgraph_ep) == done.end())
973	{
974	done.insert(x: ep.subgraph_ep);
975	std::map< subgraph_name, subgraph_info >::const_iterator
976	info_i
977	= subgraphs.find(x: ep.subgraph_ep);
978	if (info_i != subgraphs.end())
979	{
980	const subgraph_member_list& members
981	= info_i ->second.members;
982	for (subgraph_member_list::const_iterator i
983	= members.begin();
984	i != members.end(); ++i)
985	{
986	node_or_subgraph_ref ref = *i;
987	if (ref.is_subgraph)
988	{
989	worklist.push_back(
990	x: edge_endpoint::subgraph(ep: ref.name));
991	}
992	else
993	{
994	node_and_port np;
995	np.name = ref.name;
996	worklist.push_back(x: edge_endpoint::node(ep: np));
997	}
998	}
999	}
1000	}
1001	}
1002	else
1003	{
1004	result.insert(x: ep.node_ep);
1005	}
1006	}
1007	return result;
1008	}
1009
1010	// Do a fixed-up edge, with only nodes as endpoints
1011	void do_edge(const node_and_port& src, const node_and_port& tgt,
1012	const properties& props)
1013	{
1014	if (r.graph_is_strict)
1015	{
1016	if (src.name == tgt.name)
1017	return;
1018	std::pair< node_name, node_name > tag(src.name, tgt.name);
1019	if (existing_edges.find(x: tag) != existing_edges.end())
1020	{
1021	return; // Parallel edge
1022	}
1023	existing_edges.insert(x: tag);
1024	}
1025	edge_info e;
1026	e.source = src;
1027	e.target = tgt;
1028	e.props = props;
1029	r.edges.push_back(x: e);
1030	}
1031
1032	void parse_attr_list(properties& props)
1033	{
1034	while (true)
1035	{
1036	if (peek().type == token::left_bracket)
1037	get();
1038	else
1039	error(str: "Wanted left bracket to start attribute list");
1040	while (true)
1041	{
1042	switch (peek().type)
1043	{
1044	case token::right_bracket:
1045	break;
1046	case token::identifier:
1047	{
1048	std::string lhs = get().normalized_value;
1049	std::string rhs = "true";
1050	if (peek().type == token::equal)
1051	{
1052	get();
1053	if (peek().type != token::identifier)
1054	error(
1055	str: "Wanted identifier as value of attribute");
1056	rhs = get().normalized_value;
1057	}
1058	props [lhs] = rhs;
1059	break;
1060	}
1061	default:
1062	error(str: "Wanted identifier as name of attribute");
1063	}
1064	if (peek().type == token::comma
1065	\|\| peek().type == token::semicolon)
1066	get();
1067	else if (peek().type == token::right_bracket)
1068	break;
1069	}
1070	if (peek().type == token::right_bracket)
1071	get();
1072	else
1073	error(str: "Wanted right bracket to end attribute list");
1074	if (peek().type != token::left_bracket)
1075	break;
1076	}
1077	}
1078	};
1079
1080	void parse_graphviz_from_string(
1081	const std::string& str, parser_result& result, bool want_directed)
1082	{
1083	parser p(str, result);
1084	p.parse_graph(want_directed);
1085	}
1086
1087	// Some debugging stuff
1088	std::ostream& operator<<(std::ostream& o, const node_and_port& n)
1089	{
1090	o << n.name;
1091	for (size_t i = `0`; i < n.location.size(); ++i)
1092	{
1093	o << ":" << n.location [i];
1094	}
1095	if (!n.angle.empty())
1096	o << "@" << n.angle;
1097	return o;
1098	}
1099
1100	// Can't be operator<< because properties is just an std::map
1101	std::string props_to_string(const properties& props)
1102	{
1103	std::string result = "[";
1104	for (properties::const_iterator i = props.begin(); i != props.end();
1105	++i)
1106	{
1107	if (i != props.begin())
1108	result += ", ";
1109	result += i ->first + "=" + i ->second;
1110	}
1111	result += "]";
1112	return result;
1113	}
1114
1115	void translate_results_to_graph(
1116	const parser_result& r, ::boost::detail::graph::mutate_graph* mg)
1117	{
1118	typedef boost::detail::graph::edge_t edge;
1119	for (std::map< node_name, properties >::const_iterator i
1120	= r.nodes.begin();
1121	i != r.nodes.end(); ++i)
1122	{
1123	// std::cerr << i->first << " " << props_to_string(i->second) <<
1124	// std::endl;
1125	mg->do_add_vertex(node: i ->first);
1126	for (properties::const_iterator j = i ->second.begin();
1127	j != i ->second.end(); ++j)
1128	{
1129	mg->set_node_property(key: j ->first, node: i ->first, value: j ->second);
1130	}
1131	}
1132	for (std::vector< edge_info >::const_iterator i = r.edges.begin();
1133	i != r.edges.end(); ++i)
1134	{
1135	const edge_info& ei = *i;
1136	// std::cerr << ei.source << " -> " << ei.target << " " <<
1137	// props_to_string(ei.props) << std::endl;
1138	edge e = edge::new_edge();
1139	mg->do_add_edge(edge: e, source: ei.source.name, target: ei.target.name);
1140	for (properties::const_iterator j = ei.props.begin();
1141	j != ei.props.end(); ++j)
1142	{
1143	mg->set_edge_property(key: j ->first, edge: e, value: j ->second);
1144	}
1145	}
1146	std::map< subgraph_name, properties >::const_iterator root_graph_props_i
1147	= r.graph_props.find(x: "___root___");
1148	BOOST_ASSERT(
1149	root_graph_props_i != r.graph_props.end()); // Should not happen
1150	const properties& root_graph_props = root_graph_props_i ->second;
1151	// std::cerr << "ending graph " << props_to_string(root_graph_props) <<
1152	// std::endl;
1153	for (properties::const_iterator i = root_graph_props.begin();
1154	i != root_graph_props.end(); ++i)
1155	{
1156	mg->set_graph_property(key: i ->first, value: i ->second);
1157	}
1158	mg->finish_building_graph();
1159	}
1160
1161	} // end namespace read_graphviz_detail
1162
1163	namespace detail
1164	{
1165	namespace graph
1166	{
1167
1168	BOOST_GRAPH_DECL bool read_graphviz_new(
1169	const std::string& str, boost::detail::graph::mutate_graph* mg)
1170	{
1171	read_graphviz_detail::parser_result parsed_file;
1172	read_graphviz_detail::parse_graphviz_from_string(
1173	str, result&: parsed_file, want_directed: mg->is_directed());
1174	read_graphviz_detail::translate_results_to_graph(r: parsed_file, mg);
1175	return true;
1176	}
1177
1178	} // end namespace graph
1179	} // end namespace detail
1180
1181	} // end namespace boost
1182
1183	// GraphViz format notes (tested using "dot version 1.13 (v16) (Mon August 23,
1184	// 2004)", grammar from references in read_graphviz_new.hpp):
1185
1186	// Subgraphs don't nest (all a0 subgraphs are the same thing), but a node or
1187	// subgraph can have multiple parents (sources online say that the layout
1188	// algorithms can't handle non-tree structures of clusters, but it seems to
1189	// read them the same from the file). The containment relation is required to
1190	// be a DAG, though; it appears that a node or subgraph can't be moved into an
1191	// ancestor of a subgraph where it already was (we don't enforce that but do a
1192	// DFS when finding members to prevent cycles). Nodes get their properties by
1193	// when they are first mentioned, and can only have them overridden after that
1194	// by explicit properties on that particular node. Closing and reopening the
1195	// same subgraph name adds to its members, and graph properties and node/edge
1196	// defaults are preserved in that subgraph. The members of a subgraph used in
1197	// an edge are gathered when the edge is read, even if new members are added to
1198	// the subgraph later. Ports are allowed in a lot more places in the grammar
1199	// than Dot uses. For example, declaring a node seems to ignore ports, and I
1200	// don't think it's possible to set properties on a particular port. Adding an
1201	// edge between two ports on the same node seems to make Dot unhappy (crashed
1202	// for me).
1203
1204	// Test graph for GraphViz behavior on strange combinations of subgraphs and
1205	// such. I don't have anywhere else to put this file.
1206
1207	#if 0
1208	dIGRaph foo {
1209	node [color=blue]
1210	subgraph a -> b
1211	subgraph a {c}
1212	subgraph a -> d
1213	subgraph a {node [color=red]; e}
1214	subgraph a -> f
1215	subgraph a {g} -> h
1216	subgraph a {node [color=green]; i} -> j
1217	subgraph a {node [color=yellow]}
1218
1219	subgraph a0 {node [color=black]; subgraph a1 {node [color=white]}}
1220	node [color=pink] zz
1221	subgraph a0 {x1}
1222	subgraph a0 {subgraph a1 {x2}}
1223
1224	subgraph a0 -> x3
1225	subgraph a0 {subgraph a1 -> x3}
1226	x3
1227	subgraph a0 {subgraph a0 {node [color=xxx]; x2} x7}
1228	x2 [color=yyy]
1229	subgraph cluster_ax {foo; subgraph a0}
1230	subgraph a0 {foo2}
1231	subgraph cluster_ax {foo3}
1232	// subgraph a0 -> subgraph a0
1233
1234	bgcolor=yellow
1235	subgraph cluster_a2 {y1}
1236	// y1:n -> y1:(s,3)@se
1237	y1@se [color=green]
1238	y1@n [color=red]
1239	}
1240	#endif
1241

source code of boost/libs/graph/src/read_graphviz_new.cpp