Add constructor to create GncDate from string and predefined date format

Primary use case is for parsing dates from external sources (importers)
This commit is contained in:
Geert Janssens 2017-04-21 16:58:20 +02:00
parent 9b30bb2b62
commit 209f971589
3 changed files with 303 additions and 4 deletions

View File

@ -29,13 +29,19 @@ extern "C"
}
#include <boost/date_time/gregorian/gregorian.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/regex.hpp>
#include <libintl.h>
#include <map>
#include <memory>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include "gnc-timezone.hpp"
#include "gnc-datetime.hpp"
#define N_(string) string //So that xgettext will find it
using Date = boost::gregorian::date;
using Month = boost::gregorian::greg_month;
using PTime = boost::posix_time::ptime;
@ -58,6 +64,77 @@ static constexpr auto ticks_per_second = INT64_C(1000000);
static constexpr auto ticks_per_second = INT64_C(1000000000);
#endif
/* Vector of date formats understood by gnucash and corresponding regex
* to parse each from an external source
* Note: while the format names are using a "-" as separator, the
* regexes will accept any of "-/.' " and will also work for dates
* without separators.
*/
const std::vector<GncDateFormat> GncDate::c_formats ({
GncDateFormat {
N_("y-m-d"),
"(?:" // either y-m-d
"(?<YEAR>[0-9]+)[-/.' ]+"
"(?<MONTH>[0-9]+)[-/.' ]+"
"(?<DAY>[0-9]+)"
"|" // or CCYYMMDD
"(?<YEAR>[0-9]{4})"
"(?<MONTH>[0-9]{2})"
"(?<DAY>[0-9]{2})"
")"
},
GncDateFormat {
N_("d-m-y"),
"(?:" // either d-m-y
"(?<DAY>[0-9]+)[-/.' ]+"
"(?<MONTH>[0-9]+)[-/.' ]+"
"(?<YEAR>[0-9]+)"
"|" // or DDMMCCYY
"(?<DAY>[0-9]{2})"
"(?<MONTH>[0-9]{2})"
"(?<YEAR>[0-9]{4})"
")"
},
GncDateFormat {
N_("m-d-y"),
"(?:" // either m-d-y
"(?<MONTH>[0-9]+)[-/.' ]+"
"(?<DAY>[0-9]+)[-/.' ]+"
"(?<YEAR>[0-9]+)"
"|" // or MMDDCCYY
"(?<MONTH>[0-9]{2})"
"(?<DAY>[0-9]{2})"
"(?<YEAR>[0-9]{4})"
")"
},
// Note year is still checked for in the regexes below
// This is to be able to raise an error if one is found for a yearless date format
GncDateFormat {
(N_("d-m")),
"(?:" // either d-m(-y)
"(?<DAY>[0-9]+)[-/.' ]+"
"(?<MONTH>[0-9]+)(?:[-/.' ]+"
"(?<YEAR>[0-9]+))?"
"|" // or DDMM(CCYY)
"(?<DAY>[0-9]{2})"
"(?<MONTH>[0-9]{2})"
"(?<YEAR>[0-9]+)?"
")"
},
GncDateFormat {
(N_("m-d")),
"(?:" // either m-d(-y)
"(?<MONTH>[0-9]+)[-/.' ]+"
"(?<DAY>[0-9]+)(?:[-/.' ]+"
"(?<YEAR>[0-9]+))?"
"|" // or MMDD(CCYY)
"(?<MONTH>[0-9]{2})"
"(?<DAY>[0-9]{2})"
"(?<YEAR>[0-9]+)?"
")"
}
});
/** Private implementation of GncDateTime. See the documentation for that class.
*/
static LDT
@ -126,6 +203,7 @@ public:
GncDateImpl(const int year, const int month, const int day) :
m_greg(year, static_cast<Month>(month), day) {}
GncDateImpl(Date d) : m_greg(d) {}
GncDateImpl(const std::string str, const std::string fmt);
void today() { m_greg = boost::gregorian::day_clock::local_day(); }
ymd year_month_day() const;
@ -290,8 +368,46 @@ GncDateTimeImpl::format_zulu(const char* format) const
return ss.str();
}
/* Member function definitions for GncDateTimeImpl.
/* Member function definitions for GncDateImpl.
*/
GncDateImpl::GncDateImpl(const std::string str, const std::string fmt) :
m_greg(boost::gregorian::day_clock::local_day()) /* Temporarily initialized to today, will be used and adjusted in the code below */
{
auto iter = std::find_if(GncDate::c_formats.cbegin(), GncDate::c_formats.cend(),
[&fmt](const GncDateFormat& v){ return (v.m_fmt == fmt); } );
if (iter == GncDate::c_formats.cend())
throw std::invalid_argument(N_("Unknown date format specifier passed as argument."));
boost::regex r(iter->m_re);
boost::smatch what;
if(!boost::regex_search(str, what, r)) // regex didn't find a match
throw std::invalid_argument (N_("Value can't be parsed into a date using the selected date format."));
// Bail out if a year was found with a yearless format specifier
auto fmt_has_year = (fmt.find('y') != std::string::npos);
if (!fmt_has_year && (what.length("YEAR") != 0))
throw std::invalid_argument (N_("Value appears to contain a year while the selected format forbids this."));
int year;
if (fmt_has_year)
{
/* The input dates have a year, so use that one */
year = std::stoi (what.str("YEAR"));
/* We assume two-digit years to be in the range 1969 - 2068. */
if (year < 69)
year += 2000;
else if (year < 100)
year += 1900;
}
else /* The input dates have no year, so use current year */
year = m_greg.year(); // Can use m_greg here as it was already initialized in the initializer list earlier
m_greg = Date(year,
static_cast<Month>(std::stoi (what.str("MONTH"))),
std::stoi (what.str("DAY")));
}
ymd
GncDateImpl::year_month_day() const
{
@ -376,6 +492,8 @@ GncDateTime::format_zulu(const char* format) const
GncDate::GncDate() : m_impl{new GncDateImpl} {}
GncDate::GncDate(int year, int month, int day) :
m_impl(new GncDateImpl(year, month, day)) {}
GncDate::GncDate(const std::string str, const std::string fmt) :
m_impl(new GncDateImpl(str, fmt)) {}
GncDate::GncDate(std::unique_ptr<GncDateImpl> impl) :
m_impl(std::move(impl)) {}
GncDate::GncDate(GncDate&&) = default;

View File

@ -28,6 +28,7 @@
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
typedef struct
{
@ -152,11 +153,28 @@ private:
std::unique_ptr<GncDateTimeImpl> m_impl;
};
class GncDateFormat
{
public:
GncDateFormat (const char* fmt, const char* re) :
m_fmt(fmt), m_re(re) {}
const std::string m_fmt;
private:
const std::string m_re;
friend class GncDateImpl;
};
class GncDate
{
public:/** Construct a GncDate representing the current day.
*/
GncDate();;
public:
/** A vector with all the date formats supported by the string constructor
*/
static const std::vector<GncDateFormat> c_formats;
/** Construct a GncDate representing the current day.
*/
GncDate();
/** Construct a GncDate representing the given year, month, and day in
* the proleptic Gregorian calendar.
*
@ -171,6 +189,23 @@ class GncDate
* of the constrained range.
*/
GncDate(int year, int month, int day);
/** Construct a GncDate by parsing a string assumed to be in the format
* passed in.
*
* The currently recognized formats are d-m-y, m-d-y, y-m-d, m-d, d-m.
* Note while the format descriptions use "-" as separator any of
* "-" (hyphen), "/" (slash), "'" (single quote), " " (space) or
* "." will be accepted.
*
* @param str The string to be interpreted.
* @param fmt The expected date format of the string passed in.
* @exception std::invalid_argument if
* - the string couldn't be parsed using the provided format
* - any of the date components is outside of its limit
* (like month being 13, or day being 31 in February)
* - fmt doesn't specify a year, yet a year was found in the string
*/
GncDate(const std::string str, const std::string fmt);
GncDate(std::unique_ptr<GncDateImpl> impl);
GncDate(GncDate&&);
~GncDate();

View File

@ -37,6 +37,152 @@ TEST(gnc_date_constructors, test_ymd_constructor)
EXPECT_FALSE(date.isnull());
}
typedef struct
{
const char* date_fmt;
const char* date_str;
int exp_year;
int exp_month;
int exp_day;
} parse_date_data;
/* parse_date
* time64 parse_date (const char* date_str, int format)// C: 14 in 7 SCM: 9 in 2 Local: 1:0:0
*/
TEST(gnc_date_constructors, test_str_format_constructor)
{
auto today = GncDate();
auto today_ymd = today.year_month_day();
auto curr_year = today_ymd.year;
parse_date_data test_dates[] =
{
// supported combinations -/.'
{ "y-m-d", "2013-08-01", 2013, 8, 1},
{ "y-m-d", "2013-8-01", 2013, 8, 1},
{ "y-m-d", "2013-08-1", 2013, 8, 1},
{ "y-m-d", "2013-8-1", 2013, 8, 1},
{ "y-m-d", "13-08-01", 2013, 8, 1},
{ "y-m-d", "13-8-01", 2013, 8, 1},
{ "y-m-d", "13-08-1", 2013, 8, 1},
{ "y-m-d", "13-8-1", 2013, 8, 1},
{ "y-m-d", "2009/11/04", 2009, 11, 4},
{ "y-m-d", "1985.3.12", 1985, 3, 12},
{ "y-m-d", "3'6'8", 2003, 6, 8},
{ "y-m-d", "20130801", 2013, 8, 1},
{ "d-m-y", "01-08-2013", 2013, 8, 1},
{ "d-m-y", "01-8-2013", 2013, 8, 1},
{ "d-m-y", "1-08-2013", 2013, 8, 1},
{ "d-m-y", "1-8-2013", 2013, 8, 1},
{ "d-m-y", "01-08-13", 2013, 8, 1},
{ "d-m-y", "01-8-13", 2013, 8, 1},
{ "d-m-y", "1-08-13", 2013, 8, 1},
{ "d-m-y", "1-8-13", 2013, 8, 1},
{ "d-m-y", "04/11/2009", 2009, 11, 4},
{ "d-m-y", "12.3.1985", 1985, 3, 12},
{ "d-m-y", "8'6'3", 2003, 6, 8},
{ "d-m-y", "01082013", 2013, 8, 1},
{ "m-d-y", "08-01-2013", 2013, 8, 1},
{ "m-d-y", "8-01-2013", 2013, 8, 1},
{ "m-d-y", "08-1-2013", 2013, 8, 1},
{ "m-d-y", "8-1-2013", 2013, 8, 1},
{ "m-d-y", "08-01-13", 2013, 8, 1},
{ "m-d-y", "8-01-13", 2013, 8, 1},
{ "m-d-y", "08-1-13", 2013, 8, 1},
{ "m-d-y", "8-1-13", 2013, 8, 1},
{ "m-d-y", "11/04/2009", 2009, 11, 4},
{ "m-d-y", "3.12.1985", 1985, 3, 12},
{ "m-d-y", "6'8'3", 2003, 6, 8},
{ "m-d-y", "08012013", 2013, 8, 1},
{ "d-m", "01-08", curr_year, 8, 1},
{ "d-m", "01-8", curr_year, 8, 1},
{ "d-m", "1-08", curr_year, 8, 1},
{ "d-m", "1-8", curr_year, 8, 1},
{ "d-m", "04/11", curr_year, 11, 4},
{ "d-m", "12.3", curr_year, 3, 12},
{ "d-m", "8'6", curr_year, 6, 8},
{ "d-m", "0108", curr_year, 8, 1},
{ "m-d", "08-01", curr_year, 8, 1},
{ "m-d", "8-01", curr_year, 8, 1},
{ "m-d", "08-1", curr_year, 8, 1},
{ "m-d", "8-1", curr_year, 8, 1},
{ "m-d", "11/04", curr_year, 11, 4},
{ "m-d", "3.12", curr_year, 3, 12},
{ "m-d", "6'8", curr_year, 6, 8},
{ "m-d", "0801", curr_year, 8, 1},
// ambiguous date formats
// current parser doesn't know how to disambiguate
// and hence refuses to parse
// can possibly improved with a smarter parser
{ "y-m-d", "130801", -1, -1, -1},
{ "d-m-y", "010813", -1, -1, -1},
{ "m-d-y", "080113", -1, -1, -1},
// Combinations that don't make sense
// but can still be entered by a user
// Should ideally all result in refusal to parse...
{ "y-m-d", "08-01", -1, -1, -1},
{ "y-m-d", "0801", -1, -1, -1},
{ "d-m-y", "01-08", -1, -1, -1},
{ "d-m-y", "0108", -1, -1, -1},
{ "m-d-y", "08-01", -1, -1, -1},
{ "m-d-y", "0801", -1, -1, -1},
{ "d-m", "01-08-2013", -1, -1, -1},
{ "d-m", "01-08-13", -1, -1, -1},
{ "d-m", "08-08-08", -1, -1, -1},
{ "d-m", "01082013", -1, -1, -1},
{ "d-m", "010813", -1, -1, -1},
{ "d-m", "20130108", -1, -1, -1},
{ "m-d", "08-01-2013", -1, -1, -1},
{ "m-d", "08-01-13", -1, -1, -1},
{ "m-d", "2013-08-01", -1, -1, -1},
{ "m-d", "09-08-01", -1, -1, -1},
{ "m-d", "08012013", -1, -1, -1},
{ "m-d", "080113", -1, -1, -1},
{ "m-d", "20130801", -1, -1, -1},
// Unknown date format specifier should also trigger an exception
{ "y-d-m H:M:S", "20130801", -1, -1, -1},
// Sentinel to mark the end of available tests
{ "y-m-d", NULL, 0, 0, 0},
};
int i = 0;
while (test_dates[i].date_str)
{
int got_year = 0, got_month = 0, got_day = 0;
try
{
auto test_date = GncDate (std::string(test_dates[i].date_str), test_dates[i].date_fmt);
auto test_ymd = test_date.year_month_day();
got_year = test_ymd.year;
got_month = test_ymd.month;
got_day = test_ymd.day;
}
catch (const std::invalid_argument& e)
{
got_year = got_month = got_day = -1;
}
EXPECT_TRUE ((got_year == test_dates[i].exp_year) &&
(got_month == test_dates[i].exp_month) &&
(got_day == test_dates[i].exp_day))
<< "GncDate constructor failed for str " << test_dates[i].date_str
<< " and fmt " << test_dates[i].date_fmt << ".\n"
<< "Expected: year " << test_dates[i].exp_year
<< ", month " << test_dates[i].exp_month
<< ", day " << test_dates[i].exp_day << "\n"
<< "Actual: year " << got_year << ", month "
<< got_month << ", day " << got_day << "\n";
i++;
}
}
TEST(gnc_datetime_constructors, test_default_constructor)
{
GncDateTime atime;