add experimental getline_csv implementation which handles newline characters inside single/double quoted strings

This commit is contained in:
artemp 2015-10-01 18:33:32 +01:00
parent a4e15b5a47
commit be437eb6b0
3 changed files with 45 additions and 8 deletions

View file

@ -170,7 +170,7 @@ void csv_datasource::parse_csv(T & stream,
stream.seekg(0, std::ios::beg);
// get first line
std::string csv_line;
std::getline(stream,csv_line,stream.widen(newline));
csv_utils::getline_csv(stream,csv_line,stream.widen(newline));
// if user has not passed a separator manually
// then attempt to detect by reading first line
@ -204,7 +204,7 @@ void csv_datasource::parse_csv(T & stream,
}
else // parse first line as headers
{
while (std::getline(stream,csv_line,stream.widen(newline)))
while (csv_utils::getline_csv(stream,csv_line,stream.widen(newline)))
{
try
{
@ -274,7 +274,7 @@ void csv_datasource::parse_csv(T & stream,
auto pos = stream.tellg();
// handle rare case of a single line of data and user-provided headers
// where a lack of a newline will mean that std::getline returns false
// where a lack of a newline will mean that csv_utils::getline_csv returns false
bool is_first_row = false;
if (!has_newline)
{
@ -289,7 +289,7 @@ void csv_datasource::parse_csv(T & stream,
if (has_disk_index_) return;
std::vector<item_type> boxes;
while (is_first_row || std::getline(stream, csv_line, stream.widen(newline)))
while (is_first_row || csv_utils::getline_csv(stream, csv_line, stream.widen(newline)))
{
if ((row_limit_ > 0) && (line_number++ > row_limit_))
{

View file

@ -92,6 +92,43 @@ inline bool ignore_case_equal(std::string const& s0, std::string const& s1)
s1.begin(), ignore_case_equal_pred());
}
template <class CharT, class Traits, class Allocator>
std::basic_istream<CharT, Traits>& getline_csv(std::istream& is, std::basic_string<CharT,Traits,Allocator>& s, CharT delim)
{
typename std::basic_string<CharT,Traits,Allocator>::size_type nread = 0;
typename std::basic_istream<CharT, Traits>::sentry sentry(is, true);
if (sentry)
{
std::basic_streambuf<CharT, Traits>* buf = is.rdbuf();
s.clear();
bool quote = false;
while (nread < s.max_size())
{
int c1 = buf->sbumpc();
if (Traits::eq_int_type(c1, Traits::eof()))
{
is.setstate(std::ios_base::eofbit);
break;
}
else
{
++nread;
CharT c = Traits::to_char_type(c1);
if (Traits::eq(c,'"') || Traits::eq(c,'\''))
quote = !quote;
if (!Traits::eq(c, delim) || quote)
s.push_back(c);
else
break;// Character is extracted but not appended.
}
}
}
if (nread == 0 || nread >= s.max_size())
is.setstate(std::ios_base::failbit);
return is;
}
}

View file

@ -183,7 +183,7 @@ int main (int argc, char** argv)
csv_file.seekg(0, std::ios::beg);
// get first line
std::string csv_line;
std::getline(csv_file, csv_line, csv_file.widen(newline));
csv_utils::getline_csv(csv_file, csv_line, csv_file.widen(newline));
mapnik::util::trim(separator);
if (separator.empty()) separator = detail::detect_separator(csv_line);
csv_file.seekg(0, std::ios::beg);
@ -209,7 +209,7 @@ int main (int argc, char** argv)
}
else // parse first line as headers
{
while (std::getline(csv_file,csv_line,csv_file.widen(newline)))
while (csv_utils::getline_csv(csv_file,csv_line,csv_file.widen(newline)))
{
try
{
@ -260,7 +260,7 @@ int main (int argc, char** argv)
auto pos = csv_file.tellg();
// handle rare case of a single line of data and user-provided headers
// where a lack of a newline will mean that std::getline returns false
// where a lack of a newline will mean that csv_utils::getline_csv returns false
bool is_first_row = false;
if (!has_newline)
{
@ -277,7 +277,7 @@ int main (int argc, char** argv)
using item_type = std::pair<box_type, std::pair<unsigned, unsigned>>;
std::vector<item_type> boxes;
while (is_first_row || std::getline(csv_file, csv_line, csv_file.widen(newline)))
while (is_first_row || csv_utils::getline_csv(csv_file, csv_line, csv_file.widen(newline)))
{
auto record_offset = pos;
auto record_size = csv_line.length();