Getting the next word respects quoting and escaping.

Use double quotes (").  They can be interspersed with
other whitespace characters, just like shell quoting.

A backslash (\) always escapes the next character.

The end of the stream always terminates the word.

Add AutoText struct to unit test utilities, to easily
make spv_text_t values and reference them as spv_text values.
This commit is contained in:
David Neto 2015-08-25 14:21:13 -04:00
parent 7951050a95
commit e7ee4c4476
4 changed files with 174 additions and 12 deletions

View File

@ -151,20 +151,34 @@ spv_result_t spvTextWordGet(const spv_text text,
*endPosition = *startPosition;
bool quoting = false;
bool escaping = false;
// NOTE: Assumes first character is not white space!
while (true) {
switch (text->str[endPosition->index]) {
case ' ':
case ';':
case '\t':
case '\n':
case '\0': { // NOTE: End of word found!
word.assign(text->str + startPosition->index,
(size_t)(endPosition->index - startPosition->index));
return SPV_SUCCESS;
const char ch = text->str[endPosition->index];
if (ch == '\\')
escaping = !escaping;
else {
switch (ch) {
case '"':
if (!escaping) quoting = !quoting;
break;
case ' ':
case ';':
case '\t':
case '\n':
if (escaping || quoting) break;
// Fall through.
case '\0': { // NOTE: End of word found!
word.assign(text->str + startPosition->index,
(size_t)(endPosition->index - startPosition->index));
return SPV_SUCCESS;
}
default:
break;
}
default:
break;
escaping = false;
}
endPosition->column++;

View File

@ -87,7 +87,8 @@ spv_result_t spvTextAdvance(const spv_text text, spv_position_t *pPosition);
/// @brief Fetch the next word from the text stream.
///
/// A word ends at the next comment or whitespace.
/// A word ends at the next comment or whitespace. However, double-quoted
/// strings remain intact, and a backslash always escapes the next character.
///
/// @param[in] text stream to read from
/// @param[in] startPosition current position in text stream

View File

@ -28,6 +28,11 @@
namespace {
#define TAB "\t"
#define NEWLINE "\n"
#define BACKSLASH R"(\)"
#define QUOTE R"(")"
TEST(TextWordGet, NullTerminator) {
char textStr[] = "Word";
spv_text_t text = {textStr, strlen(textStr)};
@ -112,4 +117,137 @@ TEST(TextWordGet, MultipleWords) {
}
}
TEST(TextWordGet, QuotesAreKept) {
AutoText input(R"("quotes" "around words")");
const char *expected[] = {R"("quotes")", R"("around words")"};
std::string word;
spv_position_t startPosition = {};
spv_position_t endPosition = {};
ASSERT_EQ(SPV_SUCCESS,
spvTextWordGet(input, &startPosition, word, &endPosition));
EXPECT_EQ(8, endPosition.column);
EXPECT_EQ(0, endPosition.line);
EXPECT_EQ(8, endPosition.index);
EXPECT_STREQ(expected[0], word.c_str());
// Move to the next word.
startPosition = endPosition;
startPosition.index++;
startPosition.column++;
ASSERT_EQ(SPV_SUCCESS,
spvTextWordGet(input, &startPosition, word, &endPosition));
EXPECT_EQ(23, endPosition.column);
EXPECT_EQ(0, endPosition.line);
EXPECT_EQ(23, endPosition.index);
EXPECT_STREQ(expected[1], word.c_str());
}
TEST(TextWordGet, QuotesBetweenWordsActLikeGlue) {
AutoText input(R"(quotes" "between words)");
const char *expected[] = {R"(quotes" "between)", "words"};
std::string word;
spv_position_t startPosition = {};
spv_position_t endPosition = {};
ASSERT_EQ(SPV_SUCCESS,
spvTextWordGet(input, &startPosition, word, &endPosition));
EXPECT_EQ(16, endPosition.column);
EXPECT_EQ(0, endPosition.line);
EXPECT_EQ(16, endPosition.index);
EXPECT_STREQ(expected[0], word.c_str());
// Move to the next word.
startPosition = endPosition;
startPosition.index++;
startPosition.column++;
ASSERT_EQ(SPV_SUCCESS,
spvTextWordGet(input, &startPosition, word, &endPosition));
EXPECT_EQ(22, endPosition.column);
EXPECT_EQ(0, endPosition.line);
EXPECT_EQ(22, endPosition.index);
EXPECT_STREQ(expected[1], word.c_str());
}
TEST(TextWordGet, QuotingWhitespace) {
// Whitespace surrounded by quotes acts like glue.
AutoText input(QUOTE "white " NEWLINE TAB " space" QUOTE);
std::string word;
spv_position_t startPosition = {};
spv_position_t endPosition = {};
ASSERT_EQ(SPV_SUCCESS,
spvTextWordGet(input, &startPosition, word, &endPosition));
EXPECT_EQ(input.str.length(), endPosition.column);
EXPECT_EQ(0, endPosition.line);
EXPECT_EQ(input.str.length(), endPosition.index);
EXPECT_EQ(input.str, word);
}
TEST(TextWordGet, QuoteAlone) {
AutoText input(QUOTE);
std::string word;
spv_position_t startPosition = {};
spv_position_t endPosition = {};
ASSERT_EQ(SPV_SUCCESS,
spvTextWordGet(input, &startPosition, word, &endPosition));
ASSERT_EQ(1, endPosition.column);
ASSERT_EQ(0, endPosition.line);
ASSERT_EQ(1, endPosition.index);
ASSERT_STREQ(QUOTE, word.c_str());
}
TEST(TextWordGet, EscapeAlone) {
AutoText input(BACKSLASH);
std::string word;
spv_position_t startPosition = {};
spv_position_t endPosition = {};
ASSERT_EQ(SPV_SUCCESS,
spvTextWordGet(input, &startPosition, word, &endPosition));
ASSERT_EQ(1, endPosition.column);
ASSERT_EQ(0, endPosition.line);
ASSERT_EQ(1, endPosition.index);
ASSERT_STREQ(BACKSLASH, word.c_str());
}
TEST(TextWordGet, EscapeAtEndOfInput) {
AutoText input("word" BACKSLASH);
std::string word;
spv_position_t startPosition = {};
spv_position_t endPosition = {};
ASSERT_EQ(SPV_SUCCESS,
spvTextWordGet(input, &startPosition, word, &endPosition));
ASSERT_EQ(5, endPosition.column);
ASSERT_EQ(0, endPosition.line);
ASSERT_EQ(5, endPosition.index);
ASSERT_STREQ("word" BACKSLASH, word.c_str());
}
TEST(TextWordGet, Escaping) {
AutoText input("w" BACKSLASH QUOTE "o" BACKSLASH NEWLINE "r" BACKSLASH ";d");
std::string word;
spv_position_t startPosition = {};
spv_position_t endPosition = {};
ASSERT_EQ(SPV_SUCCESS,
spvTextWordGet(input, &startPosition, word, &endPosition));
ASSERT_EQ(10, endPosition.column);
ASSERT_EQ(0, endPosition.line);
ASSERT_EQ(10, endPosition.index);
ASSERT_EQ(input.str, word);
}
TEST(TextWordGet, EscapingEscape) {
AutoText input("word" BACKSLASH BACKSLASH " abc");
std::string word;
spv_position_t startPosition = {};
spv_position_t endPosition = {};
ASSERT_EQ(SPV_SUCCESS,
spvTextWordGet(input, &startPosition, word, &endPosition));
ASSERT_EQ(6, endPosition.column);
ASSERT_EQ(0, endPosition.line);
ASSERT_EQ(6, endPosition.index);
ASSERT_STREQ("word" BACKSLASH BACKSLASH, word.c_str());
}
} // anonymous namespace

View File

@ -91,6 +91,15 @@ inline ::std::ostream& operator<<(::std::ostream& os,
}
}
// A type for easily creating spv_text_t values, with an implicit conversion to
// spv_text.
struct AutoText {
AutoText(std::string value) : str(value), text({str.data(), str.size()}) {}
operator spv_text() { return &text; }
std::string str;
spv_text_t text;
};
#define I32_ENDIAN_HOST (o32_host_order.value)
#endif