gnucash/gnucash/import-export/import-parse.c
luz.paz 65bb60d621 Fix typos for gnuchash/ doc/ bindings/ and misc.
Typos found via `codespell -q 3 -D ~/Projects/codespell/codespell_lib/data/dictionary.txt -S *.po,./po,*.min.js,./ChangeLog*,./NEWS,./doc/README*,./AUTHORS,./libgnucash/tax/us/txf-de*,./data/accounts -L ans,cas,dragable,gae,iff,iif,mut,nd,numer,startd,stoll`
2019-09-13 20:26:03 -04:00

414 lines
12 KiB
C

/*
* import-parse.c -- a generic "parser" API for importers.. Allows importers
* to parse dates and numbers, and provides a UI to ask for users to
* resolve ambiguities.
*
* Created by: Derek Atkins <derek@ihtfp.com>
* Copyright (c) 2003 Derek Atkins <warlord@MIT.EDU>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, contact:
*
* Free Software Foundation Voice: +1-617-542-5942
* 51 Franklin Street, Fifth Floor Fax: +1-617-542-2652
* Boston, MA 02110-1301, USA gnu@gnu.org
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <glib.h>
#include <string.h>
/* For regex */
#include <sys/types.h>
#include <regex.h>
#include "gnc-engine.h"
#include "gnc-ui-util.h"
#include "import-parse.h"
static QofLogModule log_module = GNC_MOD_IMPORT;
/* numeric regular expressions */
static regex_t decimal_radix_regex;
static regex_t comma_radix_regex;
/* date regular expressions */
static regex_t date_regex;
static regex_t date_mdy_regex;
static regex_t date_ymd_regex;
static gboolean regex_compiled = FALSE;
static void
compile_regex(void)
{
int flags = REG_EXTENDED;
/* compile the numeric regular expressions */
regcomp(&decimal_radix_regex,
"^ *\\$?[+-]?\\$?[0-9]+ *$|^ *\\$?[+-]?\\$?[0-9]?[0-9]?[0-9]?(,[0-9][0-9][0-9])*(\\.[0-9]*)? *$|^ *\\$?[+-]?\\$?[0-9]+\\.[0-9]* *$", flags);
regcomp(&comma_radix_regex,
"^ *\\$?[+-]?\\$?[0-9]+ *$|^ *\\$?[+-]?\\$?[0-9]?[0-9]?[0-9]?(\\.[0-9][0-9][0-9])*(,[0-9]*)? *$|^ *\\$?[+-]?\\$?[0-9]+,[0-9]* *$", flags);
/* compile the date-parsing regular expressions */
regcomp(&date_regex,
"^ *([0-9]+) *[-/.'] *([0-9]+) *[-/.'] *([0-9]+).*$|^ *([0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]).*$", flags);
regcomp(&date_mdy_regex, "([0-9][0-9])([0-9][0-9])([0-9][0-9][0-9][0-9])", flags);
regcomp(&date_ymd_regex, "([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])", flags);
regex_compiled = TRUE;
}
static gint
my_strntol(const char *str, int len)
{
gint res = 0;
g_return_val_if_fail(str, 0);
g_return_val_if_fail(len, 0);
while (len--)
{
if (*str < '0' || *str > '9')
{
str++;
continue;
}
res *= 10;
res += *(str++) - '0';
}
return res;
}
/*
* based on a trio match (matches in spaces 1, 2, and 3), and a list
* of possible date formats, return the list of formats that this string
* could actually be.
*/
static GncImportFormat
check_date_format(const char * str, regmatch_t *match, GncImportFormat fmts)
{
GncImportFormat res = 0;
int len0 = 0, len1 = 0, len2 = 0;
int val0 = 0, val1 = 0, val2 = 0;
g_return_val_if_fail(match, res);
g_return_val_if_fail(fmts, res);
/* Compute the lengths */
len0 = match[1].rm_eo - match[1].rm_so;
len1 = match[2].rm_eo - match[2].rm_so;
len2 = match[3].rm_eo - match[3].rm_so;
/* compute the numeric values */
val0 = my_strntol(str + match[1].rm_so, len0);
val1 = my_strntol(str + match[2].rm_so, len1);
val2 = my_strntol(str + match[3].rm_so, len2);
/* Filter out the possibilities. Hopefully only one will remain */
if (val0 > 12) import_clear_flag(fmts, GNCIF_DATE_MDY);
if (val0 > 31) import_clear_flag(fmts, GNCIF_DATE_DMY);
if (val0 < 1)
{
import_clear_flag(fmts, GNCIF_DATE_DMY);
import_clear_flag(fmts, GNCIF_DATE_MDY);
}
if (val1 > 12)
{
import_clear_flag(fmts, GNCIF_DATE_DMY);
import_clear_flag(fmts, GNCIF_DATE_YMD);
}
if (val1 > 31)
{
import_clear_flag(fmts, GNCIF_DATE_MDY);
import_clear_flag(fmts, GNCIF_DATE_YDM);
}
if (val2 > 12) import_clear_flag(fmts, GNCIF_DATE_YDM);
if (val2 > 31) import_clear_flag(fmts, GNCIF_DATE_YMD);
if (val2 < 1)
{
import_clear_flag(fmts, GNCIF_DATE_YMD);
import_clear_flag(fmts, GNCIF_DATE_YDM);
}
/* if we've got a 4-character year, make sure the value is greater
* than 1930 and less than 2100. XXX: be sure to fix this by 2100!
*/
if (len0 == 4 && (val0 < 1930 || val0 > 2100))
{
import_clear_flag(fmts, GNCIF_DATE_YMD);
import_clear_flag(fmts, GNCIF_DATE_YDM);
}
if (len2 == 4 && (val2 < 1930 || val2 > 2100))
{
import_clear_flag(fmts, GNCIF_DATE_MDY);
import_clear_flag(fmts, GNCIF_DATE_DMY);
}
/* If the first string has a length of only 1, then it is definitely
* not a year (although it could be a month or day).
*/
if (len0 == 1)
{
import_clear_flag(fmts, GNCIF_DATE_YMD);
import_clear_flag(fmts, GNCIF_DATE_YDM);
}
return fmts;
}
GncImportFormat
gnc_import_test_numeric(const char* str, GncImportFormat fmts)
{
GncImportFormat res = 0;
g_return_val_if_fail(str, fmts);
if (!regex_compiled)
compile_regex();
if ((fmts & GNCIF_NUM_PERIOD) && !regexec(&decimal_radix_regex, str, 0, NULL, 0))
res |= GNCIF_NUM_PERIOD;
if ((fmts & GNCIF_NUM_COMMA) && !regexec(&comma_radix_regex, str, 0, NULL, 0))
res |= GNCIF_NUM_COMMA;
return res;
}
GncImportFormat
gnc_import_test_date(const char* str, GncImportFormat fmts)
{
regmatch_t match[5];
GncImportFormat res = 0;
g_return_val_if_fail(str, fmts);
g_return_val_if_fail(strlen(str) > 1, fmts);
if (!regex_compiled)
compile_regex();
if (!regexec(&date_regex, str, 5, match, 0))
{
if (match[1].rm_so != -1)
res = check_date_format(str, match, fmts);
else
{
/* Hmm, it matches XXXXXXXX, but is this YYYYxxxx or xxxxYYYY?
* let's try both ways and let the parser check that YYYY is
* valid.
*/
char temp[9];
g_return_val_if_fail(match[4].rm_so != -1, fmts);
g_return_val_if_fail(match[4].rm_eo - match[4].rm_so == 8, fmts);
/* make a temp copy of the XXXXXXXX string */
strncpy(temp, str + match[4].rm_so, 8);
temp[8] = '\0';
/* then check it against the ymd or mdy formats, as necessary */
if (((fmts & GNCIF_DATE_YDM) || (fmts & GNCIF_DATE_YMD)) &&
!regexec(&date_ymd_regex, temp, 4, match, 0))
res |= check_date_format(temp, match, fmts);
if (((fmts & GNCIF_DATE_DMY) || (fmts & GNCIF_DATE_MDY)) &&
!regexec(&date_mdy_regex, temp, 4, match, 0))
res |= check_date_format(temp, match, fmts);
}
}
return res;
}
gboolean
gnc_import_parse_numeric(const char* str, GncImportFormat fmt, gnc_numeric *val)
{
g_return_val_if_fail(str, FALSE);
g_return_val_if_fail(val, FALSE);
g_return_val_if_fail(fmt, FALSE);
g_return_val_if_fail(!(fmt & (fmt - 1)), FALSE);
switch (fmt)
{
case GNCIF_NUM_PERIOD:
return xaccParseAmountExtended(str, TRUE, '-', '.', ',', NULL, "$+",
val, NULL);
case GNCIF_NUM_COMMA:
return xaccParseAmountExtended(str, TRUE, '-', ',', '.', NULL, "$+",
val, NULL);
default:
PERR("invalid format: %d", fmt);
return FALSE;
}
}
/* Handle y2k fixes, etc.
* obtaining the year "00", "2000", and "19100" all mean the same thing.
* output is an integer representing the year in the C.E.
*/
static int
fix_year(int y)
{
/* two-digit numbers less than "70" are interpreted to be post-2000. */
if (y < 70)
return (y + 2000);
/* fix a common bug in printing post-2000 dates as 19100, etc. */
if (y > 19000)
return (1900 + (y - 19000));
/* At this point we just want to make sure that this is a real date.
* y _should_ be a 'unix year' (which is the number of years since
* 1900), but it _COULD_ be a full date (1999, 2001, etc.). At some
* point in the future we can't tell the difference, but are we really
* going to care if this code fails in 3802?
*/
if (y < 1902)
return (y + 1900);
/* y is good as it is */
return y;
}
gboolean
gnc_import_parse_date(const char *str, GncImportFormat fmt, time64 *val)
{
regmatch_t match[5];
char temp[9];
const char *datestr;
int v0 = 0, v1 = 0, v2 = 0;
int m = 0, d = 0, y = 0;
g_return_val_if_fail(str, FALSE);
g_return_val_if_fail(val, FALSE);
g_return_val_if_fail(fmt, FALSE);
g_return_val_if_fail(!(fmt & (fmt - 1)), FALSE);
if (!regexec(&date_regex, str, 5, match, 0))
{
if (match[1].rm_so != -1)
datestr = str;
else
{
/* date is of the form XXXXXXX; save it to a temp string and
* split it based on the format, either YYYYaabb or aabbYYYY
*/
g_return_val_if_fail(match[4].rm_so != -1, FALSE);
g_return_val_if_fail(match[4].rm_eo - match[4].rm_so == 8, FALSE);
strncpy(temp, str + match[4].rm_so, 8);
temp[8] = '\0';
switch (fmt)
{
case GNCIF_DATE_DMY:
case GNCIF_DATE_MDY:
g_return_val_if_fail(!regexec(&date_mdy_regex, temp, 4, match, 0), FALSE);
break;
case GNCIF_DATE_YMD:
case GNCIF_DATE_YDM:
g_return_val_if_fail(!regexec(&date_ymd_regex, temp, 4, match, 0), FALSE);
break;
default:
PERR("Invalid date format provided: %d", fmt);
return FALSE;
}
datestr = temp;
}
/* datestr points to the date string, and match[123] contains the matches. */
if (match[1].rm_so == -1 || match[2].rm_so == -1 || match[3].rm_so == -1)
{
PERR("can't interpret date %s", str);
return FALSE;
}
/* grab the numerics */
v0 = my_strntol(datestr + match[1].rm_so, match[1].rm_eo - match[1].rm_so);
v1 = my_strntol(datestr + match[2].rm_so, match[2].rm_eo - match[2].rm_so);
v2 = my_strntol(datestr + match[3].rm_so, match[3].rm_eo - match[3].rm_so);
switch (fmt)
{
case GNCIF_DATE_DMY:
if (v0 > 0 && v0 <= 31 && v1 > 0 && v1 <= 12 && v2 > 0)
{
d = v0;
m = v1;
y = v2;
}
else
PERR("format is d/m/y but date is %s", str);
break;
case GNCIF_DATE_MDY:
if (v0 > 0 && v0 <= 12 && v1 > 0 && v1 <= 31 && v2 > 0)
{
m = v0;
d = v1;
y = v2;
}
else
PERR("format is m/d/y but date is %s", str);
break;
case GNCIF_DATE_YMD:
if (v0 > 0 && v1 > 0 && v1 <= 12 && v2 > 0 && v2 <= 31)
{
y = v0;
m = v1;
d = v2;
}
else
PERR("format is y/m/d but date is %s", str);
break;
case GNCIF_DATE_YDM:
if (v0 > 0 && v1 > 0 && v1 <= 31 && v2 > 0 && v2 <= 12)
{
y = v0;
d = v1;
m = v2;
}
else
PERR("format is y/d/m but date is %s", str);
break;
default:
PERR("invalid date format: %d", fmt);
}
if (!m || !d || !y)
return FALSE;
y = fix_year(y);
*val = gnc_dmy2time64(d, m, y);
return TRUE;
}
return FALSE;
}