mirror of
https://github.com/Gnucash/gnucash.git
synced 2025-02-25 18:55:30 -06:00
Add fixed width file parser based on boost::tokenize
Effective c++ notes: - explicitly declare special member functions - explicitly declare overriding functions as override
This commit is contained in:
parent
efcd266971
commit
48cfbc23d4
@ -438,6 +438,7 @@ src/import-export/csv-imp/gnc-csv-gnumeric-popup.c
|
||||
src/import-export/csv-imp/gnc-csv-model.c
|
||||
src/import-export/csv-imp/gnc-csv-tokenizer.cpp
|
||||
src/import-export/csv-imp/gnc-csv-trans-settings.c
|
||||
src/import-export/csv-imp/gnc-fw-tokenizer.cpp
|
||||
src/import-export/csv-imp/gncmod-csv-import.c
|
||||
src/import-export/csv-imp/gnc-plugin-csv-import.c
|
||||
src/import-export/csv-imp/gnc-tokenizer.cpp
|
||||
|
@ -14,6 +14,7 @@ SET(csv_import_SOURCES
|
||||
gnc-csv-gnumeric-popup.c
|
||||
gnc-csv-tokenizer.cpp
|
||||
gnc-csv-trans-settings.c
|
||||
gnc-fw-tokenizer.cpp
|
||||
gnc-tokenizer.cpp
|
||||
${CMAKE_SOURCE_DIR}/lib/stf/stf-parse.c
|
||||
${CMAKE_SOURCE_DIR}/lib/goffice/go-charmap-sel.c
|
||||
@ -36,6 +37,7 @@ SET(csv_import_noinst_HEADERS
|
||||
gnc-csv-gnumeric-popup.h
|
||||
gnc-csv-tokenizer.hpp
|
||||
gnc-csv-trans-settings.h
|
||||
gnc-fw-tokenizer.hpp
|
||||
gnc-tokenizer.hpp
|
||||
${CMAKE_SOURCE_DIR}/lib/stf/stf-parse.h
|
||||
${CMAKE_SOURCE_DIR}/lib/goffice/go-charmap-sel.h
|
||||
|
@ -14,6 +14,7 @@ libgncmod_csv_import_la_SOURCES = \
|
||||
gnc-csv-model.c \
|
||||
gnc-csv-tokenizer.cpp \
|
||||
gnc-csv-gnumeric-popup.c \
|
||||
gnc-fw-tokenizer.cpp \
|
||||
gnc-tokenizer.cpp \
|
||||
gnc-csv-trans-settings.c
|
||||
|
||||
@ -28,6 +29,7 @@ noinst_HEADERS = \
|
||||
gnc-csv-model.h \
|
||||
gnc-csv-tokenizer.hpp \
|
||||
gnc-csv-gnumeric-popup.h \
|
||||
gnc-fw-tokenizer.hpp \
|
||||
gnc-tokenizer.hpp \
|
||||
gnc-csv-trans-settings.h
|
||||
|
||||
|
48
src/import-export/csv-imp/gnc-fw-tokenizer.cpp
Normal file
48
src/import-export/csv-imp/gnc-fw-tokenizer.cpp
Normal file
@ -0,0 +1,48 @@
|
||||
#include "gnc-fw-tokenizer.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream> // fstream
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm> // copy
|
||||
#include <iterator> // ostream_operator
|
||||
|
||||
#include <boost/tokenizer.hpp>
|
||||
#include <boost/locale.hpp>
|
||||
|
||||
void
|
||||
GncFwTokenizer::columns(const std::vector<uint>& cols)
|
||||
{
|
||||
col_vec = cols;
|
||||
}
|
||||
|
||||
|
||||
int GncFwTokenizer::tokenize()
|
||||
{
|
||||
typedef boost::tokenizer< boost::offset_separator > Tokenizer;
|
||||
|
||||
boost::offset_separator sep(col_vec.begin(), col_vec.end(), false);
|
||||
|
||||
std::vector<std::string> vec;
|
||||
std::string line;
|
||||
std::string buffer;
|
||||
|
||||
tokenized_contents.clear();
|
||||
std::istringstream in_stream(utf8_contents);
|
||||
|
||||
while (std::getline (in_stream, line))
|
||||
{
|
||||
Tokenizer tok(line, sep);
|
||||
vec.assign(tok.begin(),tok.end());
|
||||
|
||||
line.clear(); // clear here, next check could fail
|
||||
|
||||
// example checking
|
||||
// for correctly parsed 3 fields per record
|
||||
if (vec.size() < 3) continue;
|
||||
|
||||
tokenized_contents.push_back(vec);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
67
src/import-export/csv-imp/gnc-fw-tokenizer.hpp
Normal file
67
src/import-export/csv-imp/gnc-fw-tokenizer.hpp
Normal file
@ -0,0 +1,67 @@
|
||||
/********************************************************************\
|
||||
* gnc-fw-tokenizer.hpp - takes a file and converts it into a *
|
||||
* two-dimensional vector of strings (table) *
|
||||
* splitting the contents on fixed width *
|
||||
* positions *
|
||||
* *
|
||||
* This program is free software; you can redistribute it and/or *
|
||||
* modify it under the terms of the GNU General Public License as *
|
||||
* published by the Free Software Foundation; either version 2 of *
|
||||
* the License, or (at your option) any later version. *
|
||||
* *
|
||||
* This program is distributed in the hope that it will be useful, *
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||
* GNU General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU General Public License*
|
||||
* along with this program; if not, contact: *
|
||||
* *
|
||||
* Free Software Foundation Voice: +1-617-542-5942 *
|
||||
* 51 Franklin Street, Fifth Floor Fax: +1-617-542-2652 *
|
||||
* Boston, MA 02110-1301, USA gnu@gnu.org *
|
||||
\********************************************************************/
|
||||
|
||||
/** @file
|
||||
@brief Class convert a file with fixed with delimited contents
|
||||
into vector of string vectors.
|
||||
One can define the widths of each column to use to split each line
|
||||
into multiple fields.
|
||||
However, no gnucash specific interpretation is done yet, that's up
|
||||
to the code using this class.
|
||||
*
|
||||
gnc-fw-tokenizer.hpp
|
||||
@author Copyright (c) 2015 Geert Janssens <geert@kobaltwit.be>
|
||||
*/
|
||||
|
||||
#ifndef GNC_FW_TOKENIZER_HPP
|
||||
#define GNC_FW_TOKENIZER_HPP
|
||||
|
||||
extern "C" {
|
||||
#include "config.h"
|
||||
}
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream> // fstream
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "gnc-tokenizer.hpp"
|
||||
|
||||
class GncFwTokenizer : public GncTokenizer
|
||||
{
|
||||
public:
|
||||
GncFwTokenizer() = default; // default constructor
|
||||
GncFwTokenizer(const GncFwTokenizer&) = default; // copy constructor
|
||||
GncFwTokenizer& operator=(const GncFwTokenizer&) = default; // copy assignment
|
||||
GncFwTokenizer(GncFwTokenizer&&) = default; // move constructor
|
||||
GncFwTokenizer& operator=(GncFwTokenizer&&) = default; // move assignment
|
||||
~GncFwTokenizer() = default; // destructor
|
||||
|
||||
void columns(const std::vector<uint>& cols);
|
||||
int tokenize() override;
|
||||
|
||||
private:
|
||||
std::vector<uint> col_vec;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,5 +1,6 @@
|
||||
#include "gnc-tokenizer.hpp"
|
||||
#include "gnc-csv-tokenizer.hpp"
|
||||
#include "gnc-fw-tokenizer.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream> // fstream
|
||||
@ -23,6 +24,9 @@ std::unique_ptr<GncTokenizer> GncTokenizerFactory(GncImpFileFormat fmt)
|
||||
case GncImpFileFormat::CSV:
|
||||
tok.reset(new GncCsvTokenizer());
|
||||
break;
|
||||
case GncImpFileFormat::FIXED_WIDTH:
|
||||
tok.reset(new GncFwTokenizer());
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -51,6 +51,7 @@ using str_vec = std::vector<std::string>;
|
||||
enum class GncImpFileFormat {
|
||||
UNKNOWN,
|
||||
CSV,
|
||||
FIXED_WIDTH
|
||||
};
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user