123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724 |
- #ifndef DATE_TIME_FORMAT_DATE_PARSER_HPP__
- #define DATE_TIME_FORMAT_DATE_PARSER_HPP__
- /* Copyright (c) 2004-2005 CrystalClear Software, Inc.
- * Use, modification and distribution is subject to the
- * Boost Software License, Version 1.0. (See accompanying
- * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
- * Author: Jeff Garland, Bart Garst
- * $Date$
- */
- #include "boost/lexical_cast.hpp"
- #include "boost/date_time/string_parse_tree.hpp"
- #include "boost/date_time/strings_from_facet.hpp"
- #include "boost/date_time/special_values_parser.hpp"
- #include <string>
- #include <vector>
- #include <sstream>
- #include <iterator>
- #ifndef BOOST_NO_STDC_NAMESPACE
- # include <cctype>
- #else
- # include <ctype.h>
- #endif
- #ifdef BOOST_NO_STDC_NAMESPACE
- namespace std {
- using ::isspace;
- using ::isdigit;
- }
- #endif
- namespace boost { namespace date_time {
- //! Helper function for parsing fixed length strings into integers
- /*! Will consume 'length' number of characters from stream. Consumed
- * character are transfered to parse_match_result struct.
- * Returns '-1' if no number can be parsed or incorrect number of
- * digits in stream. */
- template<typename int_type, typename charT>
- inline
- int_type
- fixed_string_to_int(std::istreambuf_iterator<charT>& itr,
- std::istreambuf_iterator<charT>& stream_end,
- parse_match_result<charT>& mr,
- unsigned int length,
- const charT& fill_char)
- {
- //typedef std::basic_string<charT> string_type;
- unsigned int j = 0;
- //string_type s;
- while (j < length && itr != stream_end &&
- (std::isdigit(*itr) || *itr == fill_char)) {
- if(*itr == fill_char) {
- /* Since a fill_char can be anything, we convert it to a zero.
- * lexical_cast will behave predictably when zero is used as fill. */
- mr.cache += ('0');
- }
- else {
- mr.cache += (*itr);
- }
- itr++;
- j++;
- }
- int_type i = static_cast<int_type>(-1);
- // mr.cache will hold leading zeros. size() tells us when input is too short.
- if(mr.cache.size() < length) {
- return i;
- }
- try {
- i = boost::lexical_cast<int_type>(mr.cache);
- }catch(bad_lexical_cast&){
- // we want to return -1 if the cast fails so nothing to do here
- }
- return i;
- }
- //! Helper function for parsing fixed length strings into integers
- /*! Will consume 'length' number of characters from stream. Consumed
- * character are transfered to parse_match_result struct.
- * Returns '-1' if no number can be parsed or incorrect number of
- * digits in stream. */
- template<typename int_type, typename charT>
- inline
- int_type
- fixed_string_to_int(std::istreambuf_iterator<charT>& itr,
- std::istreambuf_iterator<charT>& stream_end,
- parse_match_result<charT>& mr,
- unsigned int length)
- {
- return fixed_string_to_int<int_type, charT>(itr, stream_end, mr, length, '0');
- }
- //! Helper function for parsing varied length strings into integers
- /*! Will consume 'max_length' characters from stream only if those
- * characters are digits. Returns '-1' if no number can be parsed.
- * Will not parse a number preceeded by a '+' or '-'. */
- template<typename int_type, typename charT>
- inline
- int_type
- var_string_to_int(std::istreambuf_iterator<charT>& itr,
- const std::istreambuf_iterator<charT>& stream_end,
- unsigned int max_length)
- {
- typedef std::basic_string<charT> string_type;
- unsigned int j = 0;
- string_type s;
- while (itr != stream_end && (j < max_length) && std::isdigit(*itr)) {
- s += (*itr);
- ++itr;
- ++j;
- }
- int_type i = static_cast<int_type>(-1);
- if(!s.empty()) {
- i = boost::lexical_cast<int_type>(s);
- }
- return i;
- }
- //! Class with generic date parsing using a format string
- /*! The following is the set of recognized format specifiers
- - %a - Short weekday name
- - %A - Long weekday name
- - %b - Abbreviated month name
- - %B - Full month name
- - %d - Day of the month as decimal 01 to 31
- - %j - Day of year as decimal from 001 to 366
- - %m - Month name as a decimal 01 to 12
- - %U - Week number 00 to 53 with first Sunday as the first day of week 1?
- - %w - Weekday as decimal number 0 to 6 where Sunday == 0
- - %W - Week number 00 to 53 where Monday is first day of week 1
- - %x - facet default date representation
- - %y - Year without the century - eg: 04 for 2004
- - %Y - Year with century
- The weekday specifiers (%a and %A) do not add to the date construction,
- but they provide a way to skip over the weekday names for formats that
- provide them.
- todo -- Another interesting feature that this approach could provide is
- an option to fill in any missing fields with the current values
- from the clock. So if you have %m-%d the parser would detect
- the missing year value and fill it in using the clock.
- todo -- What to do with the %x. %x in the classic facet is just bad...
- */
- template<class date_type, typename charT>
- class format_date_parser
- {
- public:
- typedef std::basic_string<charT> string_type;
- typedef std::basic_istringstream<charT> stringstream_type;
- typedef std::istreambuf_iterator<charT> stream_itr_type;
- typedef typename string_type::const_iterator const_itr;
- typedef typename date_type::year_type year_type;
- typedef typename date_type::month_type month_type;
- typedef typename date_type::day_type day_type;
- typedef typename date_type::duration_type duration_type;
- typedef typename date_type::day_of_week_type day_of_week_type;
- typedef typename date_type::day_of_year_type day_of_year_type;
- typedef string_parse_tree<charT> parse_tree_type;
- typedef typename parse_tree_type::parse_match_result_type match_results;
- typedef std::vector<std::basic_string<charT> > input_collection_type;
- // TODO sv_parser uses its default constructor - write the others
- format_date_parser(const string_type& format_str,
- const input_collection_type& month_short_names,
- const input_collection_type& month_long_names,
- const input_collection_type& weekday_short_names,
- const input_collection_type& weekday_long_names) :
- m_format(format_str),
- m_month_short_names(month_short_names, 1),
- m_month_long_names(month_long_names, 1),
- m_weekday_short_names(weekday_short_names),
- m_weekday_long_names(weekday_long_names)
- {}
- format_date_parser(const string_type& format_str,
- const std::locale& locale) :
- m_format(format_str),
- m_month_short_names(gather_month_strings<charT>(locale), 1),
- m_month_long_names(gather_month_strings<charT>(locale, false), 1),
- m_weekday_short_names(gather_weekday_strings<charT>(locale)),
- m_weekday_long_names(gather_weekday_strings<charT>(locale, false))
- {}
- format_date_parser(const format_date_parser<date_type,charT>& fdp)
- {
- this->m_format = fdp.m_format;
- this->m_month_short_names = fdp.m_month_short_names;
- this->m_month_long_names = fdp.m_month_long_names;
- this->m_weekday_short_names = fdp.m_weekday_short_names;
- this->m_weekday_long_names = fdp.m_weekday_long_names;
- }
- string_type format() const
- {
- return m_format;
- }
- void format(string_type format_str)
- {
- m_format = format_str;
- }
- void short_month_names(const input_collection_type& month_names)
- {
- m_month_short_names = parse_tree_type(month_names, 1);
- }
- void long_month_names(const input_collection_type& month_names)
- {
- m_month_long_names = parse_tree_type(month_names, 1);
- }
- void short_weekday_names(const input_collection_type& weekday_names)
- {
- m_weekday_short_names = parse_tree_type(weekday_names);
- }
- void long_weekday_names(const input_collection_type& weekday_names)
- {
- m_weekday_long_names = parse_tree_type(weekday_names);
- }
- date_type
- parse_date(const string_type& value,
- const string_type& format_str,
- const special_values_parser<date_type,charT>& sv_parser) const
- {
- stringstream_type ss(value);
- stream_itr_type sitr(ss);
- stream_itr_type stream_end;
- return parse_date(sitr, stream_end, format_str, sv_parser);
- }
- date_type
- parse_date(std::istreambuf_iterator<charT>& sitr,
- std::istreambuf_iterator<charT>& stream_end,
- const special_values_parser<date_type,charT>& sv_parser) const
- {
- return parse_date(sitr, stream_end, m_format, sv_parser);
- }
- /*! Of all the objects that the format_date_parser can parse, only a
- * date can be a special value. Therefore, only parse_date checks
- * for special_values. */
- date_type
- parse_date(std::istreambuf_iterator<charT>& sitr,
- std::istreambuf_iterator<charT>& stream_end,
- string_type format_str,
- const special_values_parser<date_type,charT>& sv_parser) const
- {
- bool use_current_char = false;
- // skip leading whitespace
- while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
- short year(0), month(0), day(0), day_of_year(0);// wkday(0);
- /* Initialized the following to their minimum values. These intermediate
- * objects are used so we get specific exceptions when part of the input
- * is unparsable.
- * Ex: "205-Jan-15" will throw a bad_year, "2005-Jsn-15"- bad_month, etc.*/
- year_type t_year(1400);
- month_type t_month(1);
- day_type t_day(1);
- day_of_week_type wkday(0);
- const_itr itr(format_str.begin());
- while (itr != format_str.end() && (sitr != stream_end)) {
- if (*itr == '%') {
- if ( ++itr == format_str.end())
- break;
- if (*itr != '%') {
- switch(*itr) {
- case 'a':
- {
- //this value is just throw away. It could be used for
- //error checking potentially, but it isn't helpful in
- //actually constructing the date - we just need to get it
- //out of the stream
- match_results mr = m_weekday_short_names.match(sitr, stream_end);
- if(mr.current_match == match_results::PARSE_ERROR) {
- // check special_values
- if(sv_parser.match(sitr, stream_end, mr)) {
- return date_type(static_cast<special_values>(mr.current_match));
- }
- }
- wkday = mr.current_match;
- if (mr.has_remaining()) {
- use_current_char = true;
- }
- break;
- }
- case 'A':
- {
- //this value is just throw away. It could be used for
- //error checking potentially, but it isn't helpful in
- //actually constructing the date - we just need to get it
- //out of the stream
- match_results mr = m_weekday_long_names.match(sitr, stream_end);
- if(mr.current_match == match_results::PARSE_ERROR) {
- // check special_values
- if(sv_parser.match(sitr, stream_end, mr)) {
- return date_type(static_cast<special_values>(mr.current_match));
- }
- }
- wkday = mr.current_match;
- if (mr.has_remaining()) {
- use_current_char = true;
- }
- break;
- }
- case 'b':
- {
- match_results mr = m_month_short_names.match(sitr, stream_end);
- if(mr.current_match == match_results::PARSE_ERROR) {
- // check special_values
- if(sv_parser.match(sitr, stream_end, mr)) {
- return date_type(static_cast<special_values>(mr.current_match));
- }
- }
- t_month = month_type(mr.current_match);
- if (mr.has_remaining()) {
- use_current_char = true;
- }
- break;
- }
- case 'B':
- {
- match_results mr = m_month_long_names.match(sitr, stream_end);
- if(mr.current_match == match_results::PARSE_ERROR) {
- // check special_values
- if(sv_parser.match(sitr, stream_end, mr)) {
- return date_type(static_cast<special_values>(mr.current_match));
- }
- }
- t_month = month_type(mr.current_match);
- if (mr.has_remaining()) {
- use_current_char = true;
- }
- break;
- }
- case 'd':
- {
- match_results mr;
- day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
- if(day == -1) {
- if(sv_parser.match(sitr, stream_end, mr)) {
- return date_type(static_cast<special_values>(mr.current_match));
- }
- }
- t_day = day_type(day);
- break;
- }
- case 'e':
- {
- match_results mr;
- day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2, ' ');
- if(day == -1) {
- if(sv_parser.match(sitr, stream_end, mr)) {
- return date_type(static_cast<special_values>(mr.current_match));
- }
- }
- t_day = day_type(day);
- break;
- }
- case 'j':
- {
- match_results mr;
- day_of_year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 3);
- if(day_of_year == -1) {
- if(sv_parser.match(sitr, stream_end, mr)) {
- return date_type(static_cast<special_values>(mr.current_match));
- }
- }
- // these next two lines are so we get an exception with bad input
- day_of_year_type t_day_of_year(1);
- t_day_of_year = day_of_year_type(day_of_year);
- break;
- }
- case 'm':
- {
- match_results mr;
- month = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
- if(month == -1) {
- if(sv_parser.match(sitr, stream_end, mr)) {
- return date_type(static_cast<special_values>(mr.current_match));
- }
- }
- t_month = month_type(month);
- break;
- }
- case 'Y':
- {
- match_results mr;
- year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4);
- if(year == -1) {
- if(sv_parser.match(sitr, stream_end, mr)) {
- return date_type(static_cast<special_values>(mr.current_match));
- }
- }
- t_year = year_type(year);
- break;
- }
- case 'y':
- {
- match_results mr;
- year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
- if(year == -1) {
- if(sv_parser.match(sitr, stream_end, mr)) {
- return date_type(static_cast<special_values>(mr.current_match));
- }
- }
- year += 2000; //make 2 digit years in this century
- t_year = year_type(year);
- break;
- }
- default:
- {} //ignore those we don't understand
- }//switch
- }
- else { // itr == '%', second consecutive
- sitr++;
- }
- itr++; //advance past format specifier
- }
- else { //skip past chars in format and in buffer
- itr++;
- if (use_current_char) {
- use_current_char = false;
- }
- else {
- sitr++;
- }
- }
- }
- if (day_of_year > 0) {
- date_type d(static_cast<unsigned short>(year-1),12,31); //end of prior year
- return d + duration_type(day_of_year);
- }
- return date_type(t_year, t_month, t_day); // exceptions were thrown earlier
- // if input was no good
- }
- //! Throws bad_month if unable to parse
- month_type
- parse_month(std::istreambuf_iterator<charT>& sitr,
- std::istreambuf_iterator<charT>& stream_end,
- string_type format_str) const
- {
- match_results mr;
- return parse_month(sitr, stream_end, format_str, mr);
- }
- //! Throws bad_month if unable to parse
- month_type
- parse_month(std::istreambuf_iterator<charT>& sitr,
- std::istreambuf_iterator<charT>& stream_end,
- string_type format_str,
- match_results& mr) const
- {
- bool use_current_char = false;
- // skip leading whitespace
- while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
- short month(0);
- const_itr itr(format_str.begin());
- while (itr != format_str.end() && (sitr != stream_end)) {
- if (*itr == '%') {
- if ( ++itr == format_str.end())
- break;
- if (*itr != '%') {
- switch(*itr) {
- case 'b':
- {
- mr = m_month_short_names.match(sitr, stream_end);
- month = mr.current_match;
- if (mr.has_remaining()) {
- use_current_char = true;
- }
- break;
- }
- case 'B':
- {
- mr = m_month_long_names.match(sitr, stream_end);
- month = mr.current_match;
- if (mr.has_remaining()) {
- use_current_char = true;
- }
- break;
- }
- case 'm':
- {
- month = var_string_to_int<short, charT>(sitr, stream_end, 2);
- // var_string_to_int returns -1 if parse failed. That will
- // cause a bad_month exception to be thrown so we do nothing here
- break;
- }
- default:
- {} //ignore those we don't understand
- }//switch
- }
- else { // itr == '%', second consecutive
- sitr++;
- }
- itr++; //advance past format specifier
- }
- else { //skip past chars in format and in buffer
- itr++;
- if (use_current_char) {
- use_current_char = false;
- }
- else {
- sitr++;
- }
- }
- }
- return month_type(month); // throws bad_month exception when values are zero
- }
- //! Expects 1 or 2 digits 1-31. Throws bad_day_of_month if unable to parse
- day_type
- parse_var_day_of_month(std::istreambuf_iterator<charT>& sitr,
- std::istreambuf_iterator<charT>& stream_end) const
- {
- // skip leading whitespace
- while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
- return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2));
- }
- //! Expects 2 digits 01-31. Throws bad_day_of_month if unable to parse
- day_type
- parse_day_of_month(std::istreambuf_iterator<charT>& sitr,
- std::istreambuf_iterator<charT>& stream_end) const
- {
- // skip leading whitespace
- while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
- //return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2));
- match_results mr;
- return day_type(fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2));
- }
- day_of_week_type
- parse_weekday(std::istreambuf_iterator<charT>& sitr,
- std::istreambuf_iterator<charT>& stream_end,
- string_type format_str) const
- {
- match_results mr;
- return parse_weekday(sitr, stream_end, format_str, mr);
- }
- day_of_week_type
- parse_weekday(std::istreambuf_iterator<charT>& sitr,
- std::istreambuf_iterator<charT>& stream_end,
- string_type format_str,
- match_results& mr) const
- {
- bool use_current_char = false;
- // skip leading whitespace
- while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
- short wkday(0);
- const_itr itr(format_str.begin());
- while (itr != format_str.end() && (sitr != stream_end)) {
- if (*itr == '%') {
- if ( ++itr == format_str.end())
- break;
- if (*itr != '%') {
- switch(*itr) {
- case 'a':
- {
- //this value is just throw away. It could be used for
- //error checking potentially, but it isn't helpful in
- //actually constructing the date - we just need to get it
- //out of the stream
- mr = m_weekday_short_names.match(sitr, stream_end);
- wkday = mr.current_match;
- if (mr.has_remaining()) {
- use_current_char = true;
- }
- break;
- }
- case 'A':
- {
- //this value is just throw away. It could be used for
- //error checking potentially, but it isn't helpful in
- //actually constructing the date - we just need to get it
- //out of the stream
- mr = m_weekday_long_names.match(sitr, stream_end);
- wkday = mr.current_match;
- if (mr.has_remaining()) {
- use_current_char = true;
- }
- break;
- }
- case 'w':
- {
- // weekday as number 0-6, Sunday == 0
- wkday = var_string_to_int<short, charT>(sitr, stream_end, 2);
- break;
- }
- default:
- {} //ignore those we don't understand
- }//switch
- }
- else { // itr == '%', second consecutive
- sitr++;
- }
- itr++; //advance past format specifier
- }
- else { //skip past chars in format and in buffer
- itr++;
- if (use_current_char) {
- use_current_char = false;
- }
- else {
- sitr++;
- }
- }
- }
- return day_of_week_type(wkday); // throws bad_day_of_month exception
- // when values are zero
- }
- //! throws bad_year if unable to parse
- year_type
- parse_year(std::istreambuf_iterator<charT>& sitr,
- std::istreambuf_iterator<charT>& stream_end,
- string_type format_str) const
- {
- match_results mr;
- return parse_year(sitr, stream_end, format_str, mr);
- }
- //! throws bad_year if unable to parse
- year_type
- parse_year(std::istreambuf_iterator<charT>& sitr,
- std::istreambuf_iterator<charT>& stream_end,
- string_type format_str,
- match_results& mr) const
- {
- // skip leading whitespace
- while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
- unsigned short year(0);
- const_itr itr(format_str.begin());
- while (itr != format_str.end() && (sitr != stream_end)) {
- if (*itr == '%') {
- if ( ++itr == format_str.end())
- break;
- if (*itr != '%') {
- //match_results mr;
- switch(*itr) {
- case 'Y':
- {
- // year from 4 digit string
- year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4);
- break;
- }
- case 'y':
- {
- // year from 2 digit string (no century)
- year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
- year += 2000; //make 2 digit years in this century
- break;
- }
- default:
- {} //ignore those we don't understand
- }//switch
- }
- else { // itr == '%', second consecutive
- sitr++;
- }
- itr++; //advance past format specifier
- }
- else { //skip past chars in format and in buffer
- itr++;
- sitr++;
- }
- }
- return year_type(year); // throws bad_year exception when values are zero
- }
- private:
- string_type m_format;
- parse_tree_type m_month_short_names;
- parse_tree_type m_month_long_names;
- parse_tree_type m_weekday_short_names;
- parse_tree_type m_weekday_long_names;
- };
- } } //namespace
- #endif
|