mirror of
https://github.com/Gnucash/gnucash.git
synced 2025-02-25 18:55:30 -06:00
Add csv file parser based on boost::tokenize
Effective c++ notes: - explicitly declare special member functions - explicitly declare overriding functions as override
This commit is contained in:
parent
ed7b863d8f
commit
efcd266971
@ -436,6 +436,7 @@ src/import-export/csv-imp/csv-fixed-trans-import.c
|
||||
src/import-export/csv-imp/gnc-csv-account-map.c
|
||||
src/import-export/csv-imp/gnc-csv-gnumeric-popup.c
|
||||
src/import-export/csv-imp/gnc-csv-model.c
|
||||
src/import-export/csv-imp/gnc-csv-tokenizer.cpp
|
||||
src/import-export/csv-imp/gnc-csv-trans-settings.c
|
||||
src/import-export/csv-imp/gncmod-csv-import.c
|
||||
src/import-export/csv-imp/gnc-plugin-csv-import.c
|
||||
|
@ -12,6 +12,7 @@ SET(csv_import_SOURCES
|
||||
gnc-csv-account-map.c
|
||||
gnc-csv-model.c
|
||||
gnc-csv-gnumeric-popup.c
|
||||
gnc-csv-tokenizer.cpp
|
||||
gnc-csv-trans-settings.c
|
||||
gnc-tokenizer.cpp
|
||||
${CMAKE_SOURCE_DIR}/lib/stf/stf-parse.c
|
||||
@ -33,6 +34,7 @@ SET(csv_import_noinst_HEADERS
|
||||
gnc-csv-account-map.h
|
||||
gnc-csv-model.h
|
||||
gnc-csv-gnumeric-popup.h
|
||||
gnc-csv-tokenizer.hpp
|
||||
gnc-csv-trans-settings.h
|
||||
gnc-tokenizer.hpp
|
||||
${CMAKE_SOURCE_DIR}/lib/stf/stf-parse.h
|
||||
|
@ -12,6 +12,7 @@ libgncmod_csv_import_la_SOURCES = \
|
||||
csv-fixed-trans-import.c \
|
||||
gnc-csv-account-map.c \
|
||||
gnc-csv-model.c \
|
||||
gnc-csv-tokenizer.cpp \
|
||||
gnc-csv-gnumeric-popup.c \
|
||||
gnc-tokenizer.cpp \
|
||||
gnc-csv-trans-settings.c
|
||||
@ -25,6 +26,7 @@ noinst_HEADERS = \
|
||||
csv-fixed-trans-import.h \
|
||||
gnc-csv-account-map.h \
|
||||
gnc-csv-model.h \
|
||||
gnc-csv-tokenizer.hpp \
|
||||
gnc-csv-gnumeric-popup.h \
|
||||
gnc-tokenizer.hpp \
|
||||
gnc-csv-trans-settings.h
|
||||
|
71
src/import-export/csv-imp/gnc-csv-tokenizer.cpp
Normal file
71
src/import-export/csv-imp/gnc-csv-tokenizer.cpp
Normal file
@ -0,0 +1,71 @@
|
||||
#include "gnc-csv-tokenizer.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream> // fstream
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm> // copy
|
||||
#include <iterator> // ostream_operator
|
||||
|
||||
#include <boost/tokenizer.hpp>
|
||||
#include <boost/locale.hpp>
|
||||
|
||||
void
|
||||
GncCsvTokenizer::set_separators(const std::string& separators)
|
||||
{
|
||||
sep_str = separators;
|
||||
}
|
||||
|
||||
|
||||
int GncCsvTokenizer::tokenize()
|
||||
{
|
||||
typedef boost::tokenizer< boost::escaped_list_separator<char> > Tokenizer;
|
||||
|
||||
boost::escaped_list_separator<char> sep("\\", sep_str, "\"");
|
||||
|
||||
std::vector<std::string> vec;
|
||||
std::string line;
|
||||
std::string buffer;
|
||||
|
||||
bool inside_quotes(false);
|
||||
size_t last_quote(0);
|
||||
|
||||
tokenized_contents.clear();
|
||||
std::istringstream in_stream(utf8_contents);
|
||||
|
||||
while (std::getline (in_stream, buffer))
|
||||
{
|
||||
// --- deal with line breaks in quoted strings
|
||||
last_quote = buffer.find_first_of('"');
|
||||
while (last_quote != std::string::npos)
|
||||
{
|
||||
if (last_quote == 0) // Test separately because last_quote - 1 would be out of range
|
||||
inside_quotes = !inside_quotes;
|
||||
else if (buffer[ last_quote - 1 ] != '\\')
|
||||
inside_quotes = !inside_quotes;
|
||||
|
||||
last_quote = buffer.find_first_of('"',last_quote+1);
|
||||
}
|
||||
|
||||
line.append(buffer);
|
||||
if (inside_quotes)
|
||||
{
|
||||
line.append("\n");
|
||||
continue;
|
||||
}
|
||||
// ---
|
||||
|
||||
Tokenizer tok(line, sep);
|
||||
vec.assign(tok.begin(),tok.end());
|
||||
|
||||
line.clear(); // clear here, next check could fail
|
||||
|
||||
// example checking
|
||||
// for correctly parsed 3 fields per record
|
||||
if (vec.size() < 3) continue;
|
||||
|
||||
tokenized_contents.push_back(vec);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
64
src/import-export/csv-imp/gnc-csv-tokenizer.hpp
Normal file
64
src/import-export/csv-imp/gnc-csv-tokenizer.hpp
Normal file
@ -0,0 +1,64 @@
|
||||
/********************************************************************\
|
||||
* gnc-csv-tokenizer.hpp - takes a csv file and converts it into a *
|
||||
* two-dimensional vector of strings (table)*
|
||||
* *
|
||||
* This program is free software; you can redistribute it and/or *
|
||||
* modify it under the terms of the GNU General Public License as *
|
||||
* published by the Free Software Foundation; either version 2 of *
|
||||
* the License, or (at your option) any later version. *
|
||||
* *
|
||||
* This program is distributed in the hope that it will be useful, *
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||
* GNU General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU General Public License*
|
||||
* along with this program; if not, contact: *
|
||||
* *
|
||||
* Free Software Foundation Voice: +1-617-542-5942 *
|
||||
* 51 Franklin Street, Fifth Floor Fax: +1-617-542-2652 *
|
||||
* Boston, MA 02110-1301, USA gnu@gnu.org *
|
||||
\********************************************************************/
|
||||
|
||||
/** @file
|
||||
@brief Class to convert a csv file into vector of string vectors.
|
||||
One can define the separator characters to use to split each line
|
||||
into multiple fields. Quote characters will be removed.
|
||||
However, no gnucash specific interpretation is done yet, that's up
|
||||
to the code using this class.
|
||||
*
|
||||
gnc-csv-tokenizer.hpp
|
||||
@author Copyright (c) 2015 Geert Janssens <geert@kobaltwit.be>
|
||||
*/
|
||||
|
||||
#ifndef GNC_CSV_TOKENIZER_HPP
|
||||
#define GNC_CSV_TOKENIZER_HPP
|
||||
|
||||
extern "C" {
|
||||
#include "config.h"
|
||||
}
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream> // fstream
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "gnc-tokenizer.hpp"
|
||||
|
||||
class GncCsvTokenizer : public GncTokenizer
|
||||
{
|
||||
public:
|
||||
GncCsvTokenizer() = default; // default constructor
|
||||
GncCsvTokenizer(const GncCsvTokenizer&) = default; // copy constructor
|
||||
GncCsvTokenizer& operator=(const GncCsvTokenizer&) = default; // copy assignment
|
||||
GncCsvTokenizer(GncCsvTokenizer&&) = default; // move constructor
|
||||
GncCsvTokenizer& operator=(GncCsvTokenizer&&) = default; // move assignment
|
||||
~GncCsvTokenizer() = default; // destructor
|
||||
|
||||
void set_separators(const std::string& separators);
|
||||
int tokenize() override;
|
||||
|
||||
private:
|
||||
std::string sep_str = ",";
|
||||
};
|
||||
|
||||
#endif
|
@ -1,4 +1,5 @@
|
||||
#include "gnc-tokenizer.hpp"
|
||||
#include "gnc-csv-tokenizer.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream> // fstream
|
||||
@ -19,6 +20,9 @@ std::unique_ptr<GncTokenizer> GncTokenizerFactory(GncImpFileFormat fmt)
|
||||
std::unique_ptr<GncTokenizer> tok(nullptr);
|
||||
switch (fmt)
|
||||
{
|
||||
case GncImpFileFormat::CSV:
|
||||
tok.reset(new GncCsvTokenizer());
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -50,6 +50,7 @@ using str_vec = std::vector<std::string>;
|
||||
/** Enumeration for file formats supported by this importer. */
|
||||
enum class GncImpFileFormat {
|
||||
UNKNOWN,
|
||||
CSV,
|
||||
};
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user