Fix splitting words containing ', e.g., I've (#389)
This commit is contained in:
@@ -163,6 +163,8 @@ template bool SplitStringToFloats(const std::string &full, const char *delim,
|
|||||||
bool omit_empty_strings,
|
bool omit_empty_strings,
|
||||||
std::vector<double> *out);
|
std::vector<double> *out);
|
||||||
|
|
||||||
|
static bool IsPunct(char c) { return c != '\'' && std::ispunct(c); }
|
||||||
|
|
||||||
static std::vector<std::string> MergeCharactersIntoWords(
|
static std::vector<std::string> MergeCharactersIntoWords(
|
||||||
const std::vector<std::string> &words) {
|
const std::vector<std::string> &words) {
|
||||||
std::vector<std::string> ans;
|
std::vector<std::string> ans;
|
||||||
@@ -174,7 +176,7 @@ static std::vector<std::string> MergeCharactersIntoWords(
|
|||||||
while (i < n) {
|
while (i < n) {
|
||||||
const auto &w = words[i];
|
const auto &w = words[i];
|
||||||
if (w.size() > 1 ||
|
if (w.size() > 1 ||
|
||||||
(w.size() == 1 && (std::ispunct(w[0]) || std::isspace(w[0])))) {
|
(w.size() == 1 && (IsPunct(w[0]) || std::isspace(w[0])))) {
|
||||||
if (prev != -1) {
|
if (prev != -1) {
|
||||||
std::string t;
|
std::string t;
|
||||||
for (; prev < i; ++prev) {
|
for (; prev < i; ++prev) {
|
||||||
|
|||||||
Reference in New Issue
Block a user