1 | #ifndef DATE_TIME_TZ_DB_BASE_HPP__ |
2 | #define DATE_TIME_TZ_DB_BASE_HPP__ |
3 | |
4 | /* Copyright (c) 2003-2005 CrystalClear Software, Inc. |
5 | * Subject to the Boost Software License, Version 1.0. |
6 | * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) |
7 | * Author: Jeff Garland, Bart Garst |
8 | * $Date$ |
9 | */ |
10 | |
11 | #include <map> |
12 | #include <vector> |
13 | #include <string> |
14 | #include <sstream> |
15 | #include <fstream> |
16 | #include <stdexcept> |
17 | #include <boost/tokenizer.hpp> |
18 | #include <boost/shared_ptr.hpp> |
19 | #include <boost/throw_exception.hpp> |
20 | #include <boost/date_time/compiler_config.hpp> |
21 | #include <boost/date_time/time_zone_names.hpp> |
22 | #include <boost/date_time/time_zone_base.hpp> |
23 | #include <boost/date_time/time_parsing.hpp> |
24 | |
25 | namespace boost { |
26 | namespace date_time { |
27 | |
28 | //! Exception thrown when tz database cannot locate requested data file |
29 | class data_not_accessible : public std::logic_error |
30 | { |
31 | public: |
32 | data_not_accessible() : |
33 | std::logic_error(std::string("Unable to locate or access the required datafile." )) |
34 | {} |
35 | data_not_accessible(const std::string& filespec) : |
36 | std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec)) |
37 | {} |
38 | }; |
39 | |
40 | //! Exception thrown when tz database locates incorrect field structure in data file |
41 | class bad_field_count : public std::out_of_range |
42 | { |
43 | public: |
44 | bad_field_count(const std::string& s) : |
45 | std::out_of_range(s) |
46 | {} |
47 | }; |
48 | |
49 | //! Creates a database of time_zones from csv datafile |
50 | /*! The csv file containing the zone_specs used by the |
51 | * tz_db_base is intended to be customized by the |
52 | * library user. When customizing this file (or creating your own) the |
53 | * file must follow a specific format. |
54 | * |
55 | * This first line is expected to contain column headings and is therefore |
56 | * not processed by the tz_db_base. |
57 | * |
58 | * Each record (line) must have eleven fields. Some of those fields can |
59 | * be empty. Every field (even empty ones) must be enclosed in |
60 | * double-quotes. |
61 | * Ex: |
62 | * @code |
63 | * "America/Phoenix" <- string enclosed in quotes |
64 | * "" <- empty field |
65 | * @endcode |
66 | * |
67 | * Some fields represent a length of time. The format of these fields |
68 | * must be: |
69 | * @code |
70 | * "{+|-}hh:mm[:ss]" <- length-of-time format |
71 | * @endcode |
72 | * Where the plus or minus is mandatory and the seconds are optional. |
73 | * |
74 | * Since some time zones do not use daylight savings it is not always |
75 | * necessary for every field in a zone_spec to contain a value. All |
76 | * zone_specs must have at least ID and GMT offset. Zones that use |
77 | * daylight savings must have all fields filled except: |
78 | * STD ABBR, STD NAME, DST NAME. You should take note |
79 | * that DST ABBR is mandatory for zones that use daylight savings |
80 | * (see field descriptions for further details). |
81 | * |
82 | * ******* Fields and their description/details ********* |
83 | * |
84 | * ID: |
85 | * Contains the identifying string for the zone_spec. Any string will |
86 | * do as long as it's unique. No two ID's can be the same. |
87 | * |
88 | * STD ABBR: |
89 | * STD NAME: |
90 | * DST ABBR: |
91 | * DST NAME: |
92 | * These four are all the names and abbreviations used by the time |
93 | * zone being described. While any string will do in these fields, |
94 | * care should be taken. These fields hold the strings that will be |
95 | * used in the output of many of the local_time classes. |
96 | * Ex: |
97 | * @code |
98 | * time_zone nyc = tz_db.time_zone_from_region("America/New_York"); |
99 | * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc); |
100 | * cout << ny_time.to_long_string() << endl; |
101 | * // 2004-Aug-30 00:00:00 Eastern Daylight Time |
102 | * cout << ny_time.to_short_string() << endl; |
103 | * // 2004-Aug-30 00:00:00 EDT |
104 | * @endcode |
105 | * |
106 | * NOTE: The exact format/function names may vary - see local_time |
107 | * documentation for further details. |
108 | * |
109 | * GMT offset: |
110 | * This is the number of hours added to utc to get the local time |
111 | * before any daylight savings adjustments are made. Some examples |
112 | * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours. |
113 | * The format must follow the length-of-time format described above. |
114 | * |
115 | * DST adjustment: |
116 | * The amount of time added to gmt_offset when daylight savings is in |
117 | * effect. The format must follow the length-of-time format described |
118 | * above. |
119 | * |
120 | * DST Start Date rule: |
121 | * This is a specially formatted string that describes the day of year |
122 | * in which the transition take place. It holds three fields of it's own, |
123 | * separated by semicolons. |
124 | * The first field indicates the "nth" weekday of the month. The possible |
125 | * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth), |
126 | * and -1 (last). |
127 | * The second field indicates the day-of-week from 0-6 (Sun=0). |
128 | * The third field indicates the month from 1-12 (Jan=1). |
129 | * |
130 | * Examples are: "-1;5;9"="Last Friday of September", |
131 | * "2;1;3"="Second Monday of March" |
132 | * |
133 | * Start time: |
134 | * Start time is the number of hours past midnight, on the day of the |
135 | * start transition, the transition takes place. More simply put, the |
136 | * time of day the transition is made (in 24 hours format). The format |
137 | * must follow the length-of-time format described above with the |
138 | * exception that it must always be positive. |
139 | * |
140 | * DST End date rule: |
141 | * See DST Start date rule. The difference here is this is the day |
142 | * daylight savings ends (transition to STD). |
143 | * |
144 | * End time: |
145 | * Same as Start time. |
146 | */ |
147 | template<class time_zone_type, class rule_type> |
148 | class tz_db_base { |
149 | public: |
150 | /* Having CharT as a template parameter created problems |
151 | * with posix_time::duration_from_string. Templatizing |
152 | * duration_from_string was not possible at this time, however, |
153 | * it should be possible in the future (when poor compilers get |
154 | * fixed or stop being used). |
155 | * Since this class was designed to use CharT as a parameter it |
156 | * is simply typedef'd here to ease converting in back to a |
157 | * parameter the future */ |
158 | typedef char char_type; |
159 | |
160 | typedef typename time_zone_type::base_type time_zone_base_type; |
161 | typedef typename time_zone_type::time_duration_type time_duration_type; |
162 | typedef time_zone_names_base<char_type> time_zone_names; |
163 | typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets; |
164 | typedef std::basic_string<char_type> string_type; |
165 | |
166 | //! Constructs an empty database |
167 | tz_db_base() {} |
168 | |
169 | //! Process csv data file, may throw exceptions |
170 | /*! May throw bad_field_count exceptions */ |
171 | void load_from_stream(std::istream &in) |
172 | { |
173 | std::string buff; |
174 | while( std::getline(is&: in, str&: buff)) { |
175 | parse_string(s&: buff); |
176 | } |
177 | } |
178 | |
179 | //! Process csv data file, may throw exceptions |
180 | /*! May throw data_not_accessible, or bad_field_count exceptions */ |
181 | void load_from_file(const std::string& pathspec) |
182 | { |
183 | std::string buff; |
184 | |
185 | std::ifstream ifs(pathspec.c_str()); |
186 | if(!ifs){ |
187 | boost::throw_exception(e: data_not_accessible(pathspec)); |
188 | } |
189 | std::getline(is&: ifs, str&: buff); // first line is column headings |
190 | this->load_from_stream(ifs); |
191 | } |
192 | |
193 | //! returns true if record successfully added to map |
194 | /*! Takes a region name in the form of "America/Phoenix", and a |
195 | * time_zone object for that region. The id string must be a unique |
196 | * name that does not already exist in the database. */ |
197 | bool add_record(const string_type& region, |
198 | boost::shared_ptr<time_zone_base_type> tz) |
199 | { |
200 | typename map_type::value_type p(region, tz); |
201 | return (m_zone_map.insert(p)).second; |
202 | } |
203 | |
204 | //! Returns a time_zone object built from the specs for the given region |
205 | /*! Returns a time_zone object built from the specs for the given |
206 | * region. If region does not exist a local_time::record_not_found |
207 | * exception will be thrown */ |
208 | boost::shared_ptr<time_zone_base_type> |
209 | time_zone_from_region(const string_type& region) const |
210 | { |
211 | // get the record |
212 | typename map_type::const_iterator record = m_zone_map.find(region); |
213 | if(record == m_zone_map.end()){ |
214 | return boost::shared_ptr<time_zone_base_type>(); //null pointer |
215 | } |
216 | return record->second; |
217 | } |
218 | |
219 | //! Returns a vector of strings holding the time zone regions in the database |
220 | std::vector<std::string> region_list() const |
221 | { |
222 | typedef std::vector<std::string> vector_type; |
223 | vector_type regions; |
224 | typename map_type::const_iterator itr = m_zone_map.begin(); |
225 | while(itr != m_zone_map.end()) { |
226 | regions.push_back(itr->first); |
227 | ++itr; |
228 | } |
229 | return regions; |
230 | } |
231 | |
232 | private: |
233 | typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type; |
234 | map_type m_zone_map; |
235 | |
236 | // start and end rule are of the same type |
237 | typedef typename rule_type::start_rule::week_num week_num; |
238 | |
239 | /* TODO: mechanisms need to be put in place to handle different |
240 | * types of rule specs. parse_rules() only handles nth_kday |
241 | * rule types. */ |
242 | |
243 | //! parses rule specs for transition day rules |
244 | rule_type* parse_rules(const string_type& sr, const string_type& er) const |
245 | { |
246 | using namespace gregorian; |
247 | // start and end rule are of the same type, |
248 | // both are included here for readability |
249 | typedef typename rule_type::start_rule start_rule; |
250 | typedef typename rule_type::end_rule end_rule; |
251 | |
252 | // these are: [start|end] nth, day, month |
253 | int s_nth = 0, s_d = 0, s_m = 0; |
254 | int e_nth = 0, e_d = 0, e_m = 0; |
255 | split_rule_spec(nth&: s_nth, d&: s_d, m&: s_m, rule: sr); |
256 | split_rule_spec(nth&: e_nth, d&: e_d, m&: e_m, rule: er); |
257 | |
258 | typename start_rule::week_num s_wn, e_wn; |
259 | s_wn = get_week_num(nth: s_nth); |
260 | e_wn = get_week_num(nth: e_nth); |
261 | |
262 | |
263 | return new rule_type(start_rule(s_wn, |
264 | static_cast<unsigned short>(s_d), |
265 | static_cast<unsigned short>(s_m)), |
266 | end_rule(e_wn, |
267 | static_cast<unsigned short>(e_d), |
268 | static_cast<unsigned short>(e_m))); |
269 | } |
270 | //! helper function for parse_rules() |
271 | week_num get_week_num(int nth) const |
272 | { |
273 | typedef typename rule_type::start_rule start_rule; |
274 | switch(nth){ |
275 | case 1: |
276 | return start_rule::first; |
277 | case 2: |
278 | return start_rule::second; |
279 | case 3: |
280 | return start_rule::third; |
281 | case 4: |
282 | return start_rule::fourth; |
283 | case 5: |
284 | case -1: |
285 | return start_rule::fifth; |
286 | default: |
287 | // shouldn't get here - add error handling later |
288 | break; |
289 | } |
290 | return start_rule::fifth; // silence warnings |
291 | } |
292 | |
293 | //! splits the [start|end]_date_rule string into 3 ints |
294 | void split_rule_spec(int& nth, int& d, int& m, string_type rule) const |
295 | { |
296 | typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type; |
297 | typedef boost::tokenizer<char_separator_type, |
298 | std::basic_string<char_type>::const_iterator, |
299 | std::basic_string<char_type> > tokenizer; |
300 | typedef boost::tokenizer<char_separator_type, |
301 | std::basic_string<char_type>::const_iterator, |
302 | std::basic_string<char_type> >::iterator tokenizer_iterator; |
303 | |
304 | const char_type sep_char[] = { ';', '\0'}; |
305 | char_separator_type sep(sep_char); |
306 | tokenizer tokens(rule, sep); // 3 fields |
307 | |
308 | if ( std::distance ( first: tokens.begin(), last: tokens.end ()) != 3 ) { |
309 | std::ostringstream msg; |
310 | msg << "Expecting 3 fields, got " |
311 | << std::distance ( first: tokens.begin(), last: tokens.end ()) |
312 | << " fields in line: " << rule; |
313 | boost::throw_exception(e: bad_field_count(msg.str())); |
314 | } |
315 | |
316 | tokenizer_iterator tok_iter = tokens.begin(); |
317 | nth = std::atoi(nptr: tok_iter->c_str()); ++tok_iter; |
318 | d = std::atoi(nptr: tok_iter->c_str()); ++tok_iter; |
319 | m = std::atoi(nptr: tok_iter->c_str()); |
320 | } |
321 | |
322 | |
323 | //! Take a line from the csv, turn it into a time_zone_type. |
324 | /*! Take a line from the csv, turn it into a time_zone_type, |
325 | * and add it to the map. Zone_specs in csv file are expected to |
326 | * have eleven fields that describe the time zone. Returns true if |
327 | * zone_spec successfully added to database */ |
328 | bool parse_string(string_type& s) |
329 | { |
330 | std::vector<string_type> result; |
331 | typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type; |
332 | |
333 | token_iter_type i = boost::make_token_iterator<string_type>(begin: s.begin(), end: s.end(),fun: boost::escaped_list_separator<char_type>()); |
334 | |
335 | token_iter_type end; |
336 | while (i != end) { |
337 | result.push_back(x: *i); |
338 | i++; |
339 | } |
340 | |
341 | enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET, |
342 | DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE, |
343 | END_TIME, FIELD_COUNT }; |
344 | |
345 | //take a shot at fixing gcc 4.x error |
346 | const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT); |
347 | if (result.size() != expected_fields) { |
348 | std::ostringstream msg; |
349 | msg << "Expecting " << FIELD_COUNT << " fields, got " |
350 | << result.size() << " fields in line: " << s; |
351 | boost::throw_exception(e: bad_field_count(msg.str())); |
352 | BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach |
353 | } |
354 | |
355 | // initializations |
356 | bool has_dst = true; |
357 | if(result[DSTABBR] == std::string()){ |
358 | has_dst = false; |
359 | } |
360 | |
361 | |
362 | // start building components of a time_zone |
363 | time_zone_names names(result[STDNAME], result[STDABBR], |
364 | result[DSTNAME], result[DSTABBR]); |
365 | |
366 | time_duration_type utc_offset = |
367 | str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]); |
368 | |
369 | dst_adjustment_offsets adjust(time_duration_type(0,0,0), |
370 | time_duration_type(0,0,0), |
371 | time_duration_type(0,0,0)); |
372 | |
373 | boost::shared_ptr<rule_type> rules; |
374 | |
375 | if(has_dst){ |
376 | adjust = dst_adjustment_offsets( |
377 | str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]), |
378 | str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]), |
379 | str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME]) |
380 | ); |
381 | |
382 | rules = |
383 | boost::shared_ptr<rule_type>(parse_rules(sr: result[START_DATE_RULE], |
384 | er: result[END_DATE_RULE])); |
385 | } |
386 | string_type id(result[ID]); |
387 | boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules)); |
388 | return (add_record(region: id, tz: zone)); |
389 | |
390 | } |
391 | |
392 | }; |
393 | |
394 | } } // namespace |
395 | |
396 | #endif // DATE_TIME_TZ_DB_BASE_HPP__ |
397 | |