tz_db_base.hpp source code [boost/boost/date_time/tz_db_base.hpp]

1	#ifndef DATE_TIME_TZ_DB_BASE_HPP__
2	#define DATE_TIME_TZ_DB_BASE_HPP__
3
4	/ Copyright (c) 2003-2005 CrystalClear Software, Inc.*
5	* Subject to the Boost Software License, Version 1.0.
6	* (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
7	* Author: Jeff Garland, Bart Garst
8	* $Date$
9	*/
10
11	#include <map>
12	#include <vector>
13	#include <string>
14	#include <sstream>
15	#include <fstream>
16	#include <stdexcept>
17	#include <boost/tokenizer.hpp>
18	#include <boost/shared_ptr.hpp>
19	#include <boost/throw_exception.hpp>
20	#include <boost/date_time/compiler_config.hpp>
21	#include <boost/date_time/time_zone_names.hpp>
22	#include <boost/date_time/time_zone_base.hpp>
23	#include <boost/date_time/time_parsing.hpp>
24
25	namespace boost {
26	namespace date_time {
27
28	//! Exception thrown when tz database cannot locate requested data file
29	class data_not_accessible : public std::logic_error
30	{
31	public:
32	data_not_accessible() :
33	std::logic_error (std::string ("Unable to locate or access the required datafile."))
34	{}
35	data_not_accessible(const std::string& filespec) :
36	std::logic_error (std::string("Unable to locate or access the required datafile. Filespec: " + filespec))
37	{}
38	};
39
40	//! Exception thrown when tz database locates incorrect field structure in data file
41	class bad_field_count : public std::out_of_range
42	{
43	public:
44	bad_field_count(const std::string& s) :
45	std::out_of_range (s)
46	{}
47	};
48
49	//! Creates a database of time_zones from csv datafile
50	/! The csv file containing the zone_specs used by the*
51	* tz_db_base is intended to be customized by the
52	* library user. When customizing this file (or creating your own) the
53	* file must follow a specific format.
54	*
55	* This first line is expected to contain column headings and is therefore
56	* not processed by the tz_db_base.
57	*
58	* Each record (line) must have eleven fields. Some of those fields can
59	* be empty. Every field (even empty ones) must be enclosed in
60	* double-quotes.
61	* Ex:
62	* @code
63	* "America/Phoenix" <- string enclosed in quotes
64	* "" <- empty field
65	* @endcode
66	*
67	* Some fields represent a length of time. The format of these fields
68	* must be:
69	* @code
70	* "{+\|-}hh:mm[:ss]" <- length-of-time format
71	* @endcode
72	* Where the plus or minus is mandatory and the seconds are optional.
73	*
74	* Since some time zones do not use daylight savings it is not always
75	* necessary for every field in a zone_spec to contain a value. All
76	* zone_specs must have at least ID and GMT offset. Zones that use
77	* daylight savings must have all fields filled except:
78	* STD ABBR, STD NAME, DST NAME. You should take note
79	* that DST ABBR is mandatory for zones that use daylight savings
80	* (see field descriptions for further details).
81	*
82	* ***** Fields and their description/details *******
83	*
84	* ID:
85	* Contains the identifying string for the zone_spec. Any string will
86	* do as long as it's unique. No two ID's can be the same.
87	*
88	* STD ABBR:
89	* STD NAME:
90	* DST ABBR:
91	* DST NAME:
92	* These four are all the names and abbreviations used by the time
93	* zone being described. While any string will do in these fields,
94	* care should be taken. These fields hold the strings that will be
95	* used in the output of many of the local_time classes.
96	* Ex:
97	* @code
98	* time_zone nyc = tz_db.time_zone_from_region("America/New_York");
99	* local_time ny_time(date(2004, Aug, 30), IS_DST, nyc);
100	* cout << ny_time.to_long_string() << endl;
101	* // 2004-Aug-30 00:00:00 Eastern Daylight Time
102	* cout << ny_time.to_short_string() << endl;
103	* // 2004-Aug-30 00:00:00 EDT
104	* @endcode
105	*
106	* NOTE: The exact format/function names may vary - see local_time
107	* documentation for further details.
108	*
109	* GMT offset:
110	* This is the number of hours added to utc to get the local time
111	* before any daylight savings adjustments are made. Some examples
112	* are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours.
113	* The format must follow the length-of-time format described above.
114	*
115	* DST adjustment:
116	* The amount of time added to gmt_offset when daylight savings is in
117	* effect. The format must follow the length-of-time format described
118	* above.
119	*
120	* DST Start Date rule:
121	* This is a specially formatted string that describes the day of year
122	* in which the transition take place. It holds three fields of it's own,
123	* separated by semicolons.
124	* The first field indicates the "nth" weekday of the month. The possible
125	* values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth),
126	* and -1 (last).
127	* The second field indicates the day-of-week from 0-6 (Sun=0).
128	* The third field indicates the month from 1-12 (Jan=1).
129	*
130	* Examples are: "-1;5;9"="Last Friday of September",
131	* "2;1;3"="Second Monday of March"
132	*
133	* Start time:
134	* Start time is the number of hours past midnight, on the day of the
135	* start transition, the transition takes place. More simply put, the
136	* time of day the transition is made (in 24 hours format). The format
137	* must follow the length-of-time format described above with the
138	* exception that it must always be positive.
139	*
140	* DST End date rule:
141	* See DST Start date rule. The difference here is this is the day
142	* daylight savings ends (transition to STD).
143	*
144	* End time:
145	* Same as Start time.
146	*/
147	template<class time_zone_type, class rule_type>
148	class tz_db_base {
149	public:
150	/ Having CharT as a template parameter created problems*
151	* with posix_time::duration_from_string. Templatizing
152	* duration_from_string was not possible at this time, however,
153	* it should be possible in the future (when poor compilers get
154	* fixed or stop being used).
155	* Since this class was designed to use CharT as a parameter it
156	* is simply typedef'd here to ease converting in back to a
157	* parameter the future */
158	typedef char char_type;
159
160	typedef typename time_zone_type::base_type time_zone_base_type;
161	typedef typename time_zone_type::time_duration_type time_duration_type;
162	typedef time_zone_names_base<char_type> time_zone_names;
163	typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets;
164	typedef std::basic_string<char_type> string_type;
165
166	//! Constructs an empty database
167	tz_db_base() {}
168
169	//! Process csv data file, may throw exceptions
170	/! May throw bad_field_count exceptions /
171	void load_from_stream(std::istream &in)
172	{
173	std::string buff;
174	while( std::getline(is&: in, str&: buff)) {
175	parse_string(s&: buff);
176	}
177	}
178
179	//! Process csv data file, may throw exceptions
180	/! May throw data_not_accessible, or bad_field_count exceptions /
181	void load_from_file(const std::string& pathspec)
182	{
183	std::string buff;
184
185	std::ifstream ifs(pathspec.c_str());
186	if(!ifs){
187	boost::throw_exception(e: data_not_accessible (pathspec));
188	}
189	std::getline(is&: ifs, str&: buff); // first line is column headings
190	this->load_from_stream(ifs);
191	}
192
193	//! returns true if record successfully added to map
194	/! Takes a region name in the form of "America/Phoenix", and a*
195	* time_zone object for that region. The id string must be a unique
196	* name that does not already exist in the database. */
197	bool add_record(const string_type& region,
198	boost::shared_ptr<time_zone_base_type> tz)
199	{
200	typename map_type::value_type p(region, tz);
201	return (m_zone_map.insert(p)).second;
202	}
203
204	//! Returns a time_zone object built from the specs for the given region
205	/! Returns a time_zone object built from the specs for the given*
206	* region. If region does not exist a local_time::record_not_found
207	* exception will be thrown */
208	boost::shared_ptr<time_zone_base_type>
209	time_zone_from_region(const string_type& region) const
210	{
211	// get the record
212	typename map_type::const_iterator record = m_zone_map.find(region);
213	if(record == m_zone_map.end()){
214	return boost::shared_ptr<time_zone_base_type>(); //null pointer
215	}
216	return record->second;
217	}
218
219	//! Returns a vector of strings holding the time zone regions in the database
220	std::vector<std::string> region_list() const
221	{
222	typedef std::vector<std::string> vector_type;
223	vector_type regions;
224	typename map_type::const_iterator itr = m_zone_map.begin();
225	while(itr != m_zone_map.end()) {
226	regions.push_back(itr->first);
227	++itr;
228	}
229	return regions;
230	}
231
232	private:
233	typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type;
234	map_type m_zone_map;
235
236	// start and end rule are of the same type
237	typedef typename rule_type::start_rule::week_num week_num;
238
239	/ TODO: mechanisms need to be put in place to handle different*
240	* types of rule specs. parse_rules() only handles nth_kday
241	* rule types. */
242
243	//! parses rule specs for transition day rules
244	rule_type* parse_rules(const string_type& sr, const string_type& er) const
245	{
246	using namespace gregorian;
247	// start and end rule are of the same type,
248	// both are included here for readability
249	typedef typename rule_type::start_rule start_rule;
250	typedef typename rule_type::end_rule end_rule;
251
252	// these are: [start\|end] nth, day, month
253	int s_nth = `0`, s_d = `0`, s_m = `0`;
254	int e_nth = `0`, e_d = `0`, e_m = `0`;
255	split_rule_spec(nth&: s_nth, d&: s_d, m&: s_m, rule: sr);
256	split_rule_spec(nth&: e_nth, d&: e_d, m&: e_m, rule: er);
257
258	typename start_rule::week_num s_wn, e_wn;
259	s_wn = get_week_num(nth: s_nth);
260	e_wn = get_week_num(nth: e_nth);
261
262
263	return new rule_type(start_rule(s_wn,
264	static_cast<unsigned short>(s_d),
265	static_cast<unsigned short>(s_m)),
266	end_rule(e_wn,
267	static_cast<unsigned short>(e_d),
268	static_cast<unsigned short>(e_m)));
269	}
270	//! helper function for parse_rules()
271	week_num get_week_num(int nth) const
272	{
273	typedef typename rule_type::start_rule start_rule;
274	switch(nth){
275	case `1`:
276	return start_rule::first;
277	case `2`:
278	return start_rule::second;
279	case `3`:
280	return start_rule::third;
281	case `4`:
282	return start_rule::fourth;
283	case `5`:
284	case -`1`:
285	return start_rule::fifth;
286	default:
287	// shouldn't get here - add error handling later
288	break;
289	}
290	return start_rule::fifth; // silence warnings
291	}
292
293	//! splits the [start\|end]_date_rule string into 3 ints
294	void split_rule_spec(int& nth, int& d, int& m, string_type rule) const
295	{
296	typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type;
297	typedef boost::tokenizer<char_separator_type,
298	std::basic_string<char_type>::const_iterator,
299	std::basic_string<char_type> > tokenizer;
300	typedef boost::tokenizer<char_separator_type,
301	std::basic_string<char_type>::const_iterator,
302	std::basic_string<char_type> >::iterator tokenizer_iterator;
303
304	const char_type sep_char[] = { `';'`, `'\0'`};
305	char_separator_type sep(sep_char);
306	tokenizer tokens(rule, sep); // 3 fields
307
308	if ( std::distance ( first: tokens.begin(), last: tokens.end ()) != `3` ) {
309	std::ostringstream msg;
310	msg << "Expecting 3 fields, got "
311	<< std::distance ( first: tokens.begin(), last: tokens.end ())
312	<< " fields in line: " << rule;
313	boost::throw_exception(e: bad_field_count (msg.str()));
314	}
315
316	tokenizer_iterator tok_iter = tokens.begin();
317	nth = std::atoi(nptr: tok_iter ->c_str()); ++tok_iter;
318	d = std::atoi(nptr: tok_iter ->c_str()); ++tok_iter;
319	m = std::atoi(nptr: tok_iter ->c_str());
320	}
321
322
323	//! Take a line from the csv, turn it into a time_zone_type.
324	/! Take a line from the csv, turn it into a time_zone_type,*
325	* and add it to the map. Zone_specs in csv file are expected to
326	* have eleven fields that describe the time zone. Returns true if
327	* zone_spec successfully added to database */
328	bool parse_string(string_type& s)
329	{
330	std::vector<string_type> result;
331	typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type;
332
333	token_iter_type i = boost::make_token_iterator<string_type>(begin: s.begin(), end: s.end(),fun: boost::escaped_list_separator<char_type>());
334
335	token_iter_type end;
336	while (i != end) {
337	result.push_back(x: *i);
338	i ++;
339	}
340
341	enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET,
342	DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE,
343	END_TIME, FIELD_COUNT };
344
345	//take a shot at fixing gcc 4.x error
346	const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT);
347	if (result.size() != expected_fields) {
348	std::ostringstream msg;
349	msg << "Expecting " << FIELD_COUNT << " fields, got "
350	<< result.size() << " fields in line: " << s;
351	boost::throw_exception(e: bad_field_count (msg.str()));
352	BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach
353	}
354
355	// initializations
356	bool has_dst = true;
357	if(result[DSTABBR] == std::string ()){
358	has_dst = false;
359	}
360
361
362	// start building components of a time_zone
363	time_zone_names names(result[STDNAME], result[STDABBR],
364	result[DSTNAME], result[DSTABBR]);
365
366	time_duration_type utc_offset =
367	str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]);
368
369	dst_adjustment_offsets adjust(time_duration_type(`0`,`0`,`0`),
370	time_duration_type(`0`,`0`,`0`),
371	time_duration_type(`0`,`0`,`0`));
372
373	boost::shared_ptr<rule_type> rules;
374
375	if(has_dst){
376	adjust = dst_adjustment_offsets(
377	str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]),
378	str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]),
379	str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME])
380	);
381
382	rules =
383	boost::shared_ptr<rule_type>(parse_rules(sr: result[START_DATE_RULE],
384	er: result[END_DATE_RULE]));
385	}
386	string_type id(result[ID]);
387	boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules));
388	return (add_record(region: id, tz: zone));
389
390	}
391
392	};
393
394	} } // namespace
395
396	#endif // DATE_TIME_TZ_DB_BASE_HPP__
397

source code of boost/boost/date_time/tz_db_base.hpp