add experimental getline_csv implementation which handles newline characters inside single/double quoted strings
This commit is contained in:
parent
a4e15b5a47
commit
be437eb6b0
3 changed files with 45 additions and 8 deletions
|
@ -170,7 +170,7 @@ void csv_datasource::parse_csv(T & stream,
|
|||
stream.seekg(0, std::ios::beg);
|
||||
// get first line
|
||||
std::string csv_line;
|
||||
std::getline(stream,csv_line,stream.widen(newline));
|
||||
csv_utils::getline_csv(stream,csv_line,stream.widen(newline));
|
||||
// if user has not passed a separator manually
|
||||
// then attempt to detect by reading first line
|
||||
|
||||
|
@ -204,7 +204,7 @@ void csv_datasource::parse_csv(T & stream,
|
|||
}
|
||||
else // parse first line as headers
|
||||
{
|
||||
while (std::getline(stream,csv_line,stream.widen(newline)))
|
||||
while (csv_utils::getline_csv(stream,csv_line,stream.widen(newline)))
|
||||
{
|
||||
try
|
||||
{
|
||||
|
@ -274,7 +274,7 @@ void csv_datasource::parse_csv(T & stream,
|
|||
auto pos = stream.tellg();
|
||||
|
||||
// handle rare case of a single line of data and user-provided headers
|
||||
// where a lack of a newline will mean that std::getline returns false
|
||||
// where a lack of a newline will mean that csv_utils::getline_csv returns false
|
||||
bool is_first_row = false;
|
||||
if (!has_newline)
|
||||
{
|
||||
|
@ -289,7 +289,7 @@ void csv_datasource::parse_csv(T & stream,
|
|||
if (has_disk_index_) return;
|
||||
|
||||
std::vector<item_type> boxes;
|
||||
while (is_first_row || std::getline(stream, csv_line, stream.widen(newline)))
|
||||
while (is_first_row || csv_utils::getline_csv(stream, csv_line, stream.widen(newline)))
|
||||
{
|
||||
if ((row_limit_ > 0) && (line_number++ > row_limit_))
|
||||
{
|
||||
|
|
|
@ -92,6 +92,43 @@ inline bool ignore_case_equal(std::string const& s0, std::string const& s1)
|
|||
s1.begin(), ignore_case_equal_pred());
|
||||
}
|
||||
|
||||
template <class CharT, class Traits, class Allocator>
|
||||
std::basic_istream<CharT, Traits>& getline_csv(std::istream& is, std::basic_string<CharT,Traits,Allocator>& s, CharT delim)
|
||||
{
|
||||
typename std::basic_string<CharT,Traits,Allocator>::size_type nread = 0;
|
||||
typename std::basic_istream<CharT, Traits>::sentry sentry(is, true);
|
||||
if (sentry)
|
||||
{
|
||||
std::basic_streambuf<CharT, Traits>* buf = is.rdbuf();
|
||||
s.clear();
|
||||
bool quote = false;
|
||||
while (nread < s.max_size())
|
||||
{
|
||||
int c1 = buf->sbumpc();
|
||||
if (Traits::eq_int_type(c1, Traits::eof()))
|
||||
{
|
||||
is.setstate(std::ios_base::eofbit);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
++nread;
|
||||
CharT c = Traits::to_char_type(c1);
|
||||
if (Traits::eq(c,'"') || Traits::eq(c,'\''))
|
||||
quote = !quote;
|
||||
if (!Traits::eq(c, delim) || quote)
|
||||
s.push_back(c);
|
||||
else
|
||||
break;// Character is extracted but not appended.
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nread == 0 || nread >= s.max_size())
|
||||
is.setstate(std::ios_base::failbit);
|
||||
|
||||
return is;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -183,7 +183,7 @@ int main (int argc, char** argv)
|
|||
csv_file.seekg(0, std::ios::beg);
|
||||
// get first line
|
||||
std::string csv_line;
|
||||
std::getline(csv_file, csv_line, csv_file.widen(newline));
|
||||
csv_utils::getline_csv(csv_file, csv_line, csv_file.widen(newline));
|
||||
mapnik::util::trim(separator);
|
||||
if (separator.empty()) separator = detail::detect_separator(csv_line);
|
||||
csv_file.seekg(0, std::ios::beg);
|
||||
|
@ -209,7 +209,7 @@ int main (int argc, char** argv)
|
|||
}
|
||||
else // parse first line as headers
|
||||
{
|
||||
while (std::getline(csv_file,csv_line,csv_file.widen(newline)))
|
||||
while (csv_utils::getline_csv(csv_file,csv_line,csv_file.widen(newline)))
|
||||
{
|
||||
try
|
||||
{
|
||||
|
@ -260,7 +260,7 @@ int main (int argc, char** argv)
|
|||
auto pos = csv_file.tellg();
|
||||
|
||||
// handle rare case of a single line of data and user-provided headers
|
||||
// where a lack of a newline will mean that std::getline returns false
|
||||
// where a lack of a newline will mean that csv_utils::getline_csv returns false
|
||||
bool is_first_row = false;
|
||||
if (!has_newline)
|
||||
{
|
||||
|
@ -277,7 +277,7 @@ int main (int argc, char** argv)
|
|||
using item_type = std::pair<box_type, std::pair<unsigned, unsigned>>;
|
||||
std::vector<item_type> boxes;
|
||||
|
||||
while (is_first_row || std::getline(csv_file, csv_line, csv_file.widen(newline)))
|
||||
while (is_first_row || csv_utils::getline_csv(csv_file, csv_line, csv_file.widen(newline)))
|
||||
{
|
||||
auto record_offset = pos;
|
||||
auto record_size = csv_line.length();
|
||||
|
|
Loading…
Add table
Reference in a new issue