Initial checkin of SkSL lexical analyzer generator (not actually in use yet).

Bug: skia:
Change-Id: I283feb9ac1cef134588204107ea019ce657e37b3
Reviewed-on: https://skia-review.googlesource.com/42084
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
This commit is contained in:
Ethan Nicholas 2017-09-06 16:59:14 -04:00 committed by Skia Commit-Bot
parent bb3dc768fd
commit 3de0e496b5
14 changed files with 1251 additions and 0 deletions

View File

@ -412,6 +412,16 @@ optional("fontmgr_fuchsia") {
}
if (skia_compile_processors) {
executable("sksllex") {
sources = [
"src/sksl/lex/Main.cpp",
"src/sksl/lex/NFA.cpp",
"src/sksl/lex/RegexNode.cpp",
"src/sksl/lex/RegexParser.cpp",
]
include_dirs = [ "src/sksl/lex" ]
}
executable("skslc") {
defines = [ "SKSL_STANDALONE" ]
sources = [

32
src/sksl/lex/DFA.h Normal file
View File

@ -0,0 +1,32 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SKSL_DFA
#define SKSL_DFA
#include <string>
#include <vector>
/**
* Tables representing a deterministic finite automaton for matching regular expressions.
*/
struct DFA {
DFA(std::vector<std::vector<int>> transitions, std::vector<int> accepts)
: fTransitions(transitions)
, fAccepts(accepts) {}
static constexpr char END_CHAR = 126;
// one row per character, one column per state
std::vector<std::vector<int>> fTransitions;
// contains, for each state, the token id we should report when matching ends in that state (-1
// for no match)
std::vector<int> fAccepts;
};
#endif

68
src/sksl/lex/DFAState.h Normal file
View File

@ -0,0 +1,68 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SKSL_DFASTATE
#define SKSL_DFASTATE
struct DFAState {
struct Label {
std::vector<int> fStates;
Label(std::vector<int> states)
: fStates(std::move(states)) {}
bool operator==(const Label& other) const {
return fStates == other.fStates;
}
bool operator!=(const Label& other) const {
return !(*this == other);
}
std::string description() const {
std::string result = "<";
const char* separator = "";
for (int s : fStates) {
result += separator;
result += std::to_string(s);
separator = ", ";
}
result += ">";
return result;
}
};
DFAState()
: fId(-1)
, fLabel({}) {}
DFAState(int id, Label label)
: fId(id)
, fLabel(std::move(label)) {}
DFAState(const DFAState& other) = delete;
int fId;
Label fLabel;
bool fIsScanned = false;
};
namespace std {
template<> struct hash<DFAState::Label> {
size_t operator()(const DFAState::Label& s) const {
int result = 0;
for (int i : s.fStates) {
result = result * 101 + i;
}
return result;
}
};
} // namespace
#endif

14
src/sksl/lex/LexUtil.h Normal file
View File

@ -0,0 +1,14 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SKSL_LEXUTIL
#define SKSL_LEXUTIL
#define ABORT(...) (fprintf(stderr, __VA_ARGS__), abort())
#define ASSERT(x) (void)((x) || (ABORT("failed assert(%s): %s:%d\n", #x, __FILE__, __LINE__), 0))
#endif

197
src/sksl/lex/Main.cpp Normal file
View File

@ -0,0 +1,197 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "NFAtoDFA.h"
#include "RegexParser.h"
#include <fstream>
#include <sstream>
#include <string>
/**
* Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex
* file is a text file with one token definition per line. Each line is of the form:
* <TOKEN_NAME> = <pattern>
* where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.
*/
static constexpr const char* HEADER =
"/*\n"
" * Copyright 2017 Google Inc.\n"
" *\n"
" * Use of this source code is governed by a BSD-style license that can be\n"
" * found in the LICENSE file.\n"
" */\n"
"/*****************************************************************************************\n"
" ******************** This file was generated by sksllex. Do not edit. *******************\n"
" *****************************************************************************************/\n";
void writeH(const DFA& dfa, const char* lexer, const char* token,
const std::vector<std::string>& tokens, const char* hPath) {
std::ofstream out(hPath);
ASSERT(out.good());
out << HEADER;
out << "#ifndef SKSL_" << lexer << "\n";
out << "#define SKSL_" << lexer << "\n";
out << "#include <cstddef>\n";
out << "#include <cstdint>\n";
out << "namespace SkSL {\n";
out << "\n";
out << "struct " << token << " {\n";
out << " enum Kind {\n";
for (const std::string& t : tokens) {
out << " #undef " << t << "\n";
out << " " << t << ",\n";
}
out << " };\n";
out << "\n";
out << " " << token << "()\n";
out << " : fKind(Kind::INVALID)\n";
out << " , fOffset(-1)\n";
out << " , fLength(-1) {}\n";
out << "\n";
out << " " << token << "(Kind kind, int offset, int length)\n";
out << " : fKind(kind)\n";
out << " , fOffset(offset)\n";
out << " , fLength(length) {}\n";
out << "\n";
out << " Kind fKind;\n";
out << " int fOffset;\n";
out << " int fLength;\n";
out << "};\n";
out << "\n";
out << "class " << lexer << " {\n";
out << "public:\n";
out << " void start(const char* text, size_t length) {\n";
out << " fText = text;\n";
out << " fLength = length;\n";
out << " fOffset = 0;\n";
out << " }\n";
out << "\n";
out << " " << token << " next();\n";
out << "\n";
out << "private:\n";
out << " const char* fText;\n";
out << " int fLength;\n";
out << " int fOffset;\n";
out << "};\n";
out << "\n";
out << "} // namespace\n";
out << "#endif\n";
}
void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,
const char* cppPath) {
std::ofstream out(cppPath);
ASSERT(out.good());
out << HEADER;
out << "#include \"" << include << "\"\n";
out << "\n";
out << "namespace SkSL {\n";
out << "\n";
size_t states = 0;
for (const auto& row : dfa.fTransitions) {
states = std::max(states, row.size());
}
out << "static int16_t transitions[" << DFA::END_CHAR + 1 << "][" << states << "] = {\n";
for (char c = 0; c <= DFA::END_CHAR; ++c) {
out << " {";
for (size_t i = 0; i < states; ++i) {
if ((size_t) c < dfa.fTransitions.size() && i < dfa.fTransitions[c].size()) {
out << " " << dfa.fTransitions[c][i] << ",";
} else {
out << " 0,";
}
}
out << " },\n";
}
out << "};\n";
out << "\n";
out << "static int16_t accepts[" << states << "] = {";
for (size_t i = 0; i < states; ++i) {
if (i < dfa.fAccepts.size()) {
out << " " << dfa.fAccepts[i] << ",";
} else {
out << " -1,";
}
}
out << " };\n";
out << "\n";
out << token << " " << lexer << "::next() {\n";;
out << " int startOffset = fOffset;\n";
out << " if (startOffset == fLength) {\n";
out << " return " << token << "(" << token << "::END_OF_FILE, startOffset, 0);\n";
out << " }\n";
out << " int offset = startOffset;\n";
out << " int state = 1;\n";
out << " " << token << "::Kind lastAccept = " << token << "::Kind::INVALID;\n";
out << " int lastAcceptEnd = startOffset + 1;\n";
out << " while (offset < fLength) {\n";
out << " state = transitions[(int) fText[offset]][state];\n";
out << " ++offset;\n";
out << " if (!state) {\n";
out << " break;\n";
out << " }\n";
out << " if (accepts[state]) {\n";
out << " lastAccept = (" << token << "::Kind) accepts[state];\n";
out << " lastAcceptEnd = offset;\n";
out << " }\n";
out << " }\n";
out << " fOffset = lastAcceptEnd;\n";
out << " return " << token << "(lastAccept, startOffset, lastAcceptEnd - startOffset);\n";
out << "}\n";
out << "\n";
out << "} // namespace\n";
}
void process(const char* inPath, const char* lexer, const char* token, const char* hPath,
const char* cppPath) {
NFA nfa;
std::vector<std::string> tokens;
tokens.push_back("END_OF_FILE");
std::string line;
std::ifstream in(inPath);
while (std::getline(in, line)) {
std::istringstream split(line);
std::string name, delimiter, pattern;
if (split >> name >> delimiter >> pattern) {
ASSERT(split.eof());
ASSERT(name != "");
ASSERT(delimiter == "=");
ASSERT(pattern != "");
tokens.push_back(name);
if (pattern[0] == '"') {
ASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');
RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);
for (size_t i = 2; i < pattern.size() - 1; ++i) {
node = RegexNode(RegexNode::kConcat_Kind, node,
RegexNode(RegexNode::kChar_Kind, pattern[i]));
}
nfa.addRegex(node);
}
else {
nfa.addRegex(RegexParser().parse(pattern));
}
}
}
NFAtoDFA converter(&nfa);
DFA dfa = converter.convert();
writeH(dfa, lexer, token, tokens, hPath);
writeCPP(dfa, lexer, token, (std::string("SkSL") + lexer + ".h").c_str(), cppPath);
}
int main(int argc, const char** argv) {
if (argc != 6) {
printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n");
exit(1);
}
process(argv[1], argv[2], argv[3], argv[4], argv[5]);
return 0;
}

41
src/sksl/lex/NFA.cpp Normal file
View File

@ -0,0 +1,41 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "NFA.h"
int NFA::match(std::string s) const {
std::vector<int> states = fStartStates;
for (size_t i = 0; i < s.size(); ++i) {
std::vector<int> next;
for (int id : states) {
if (fStates[id].accept(s[i])) {
for (int nextId : fStates[id].fNext) {
if (fStates[nextId].fKind != NFAState::kRemapped_Kind) {
next.push_back(nextId);
} else {
next.insert(next.end(), fStates[nextId].fData.begin(),
fStates[nextId].fData.end());
}
}
}
}
if (!next.size()) {
return -1;
}
states = next;
}
int accept = -1;
for (int id : states) {
if (fStates[id].fKind == NFAState::kAccept_Kind) {
int result = fStates[id].fData[0];
if (accept == -1 || result < accept) {
accept = result;
}
}
}
return accept;
}

54
src/sksl/lex/NFA.h Normal file
View File

@ -0,0 +1,54 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SKSL_NFA
#define SKSL_NFA
#include "NFAState.h"
#include "RegexNode.h"
/**
* A nondeterministic finite automaton for matching regular expressions. The NFA is initialized with
* a number of regular expressions, and then matches a string against all of them simultaneously.
*/
struct NFA {
/**
* Adds a new regular expression to the set of expressions matched by this automaton, returning
* its index.
*/
int addRegex(const RegexNode& regex) {
std::vector<int> accept;
// we reserve token 0 for END_OF_FILE, so this starts at 1
accept.push_back(this->addState(NFAState(++fRegexCount)));
std::vector<int> startStates = regex.createStates(this, accept);
fStartStates.insert(fStartStates.end(), startStates.begin(), startStates.end());
return fStartStates.size() - 1;
}
/**
* Adds a new state to the NFA, returning its index.
*/
int addState(NFAState s) {
fStates.push_back(std::move(s));
return fStates.size() - 1;
}
/**
* Matches a string against all of the regexes added to this NFA. Returns the index of the first
* (in addRegex order) matching expression, or -1 if no match. This is relatively slow and used
* only for debugging purposes; the NFA should be converted to a DFA before actual use.
*/
int match(std::string s) const;
int fRegexCount = 0;
std::vector<NFAState> fStates;
std::vector<int> fStartStates;
};
#endif

150
src/sksl/lex/NFAState.h Normal file
View File

@ -0,0 +1,150 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SKSL_NFASTATE
#define SKSL_NFASTATE
#include <string>
#include <vector>
#include "LexUtil.h"
struct NFAState {
enum Kind {
// represents an accept state - if the NFA ends up in this state, we have successfully
// matched the token indicated by fData[0]
kAccept_Kind,
// matches the single character fChar
kChar_Kind,
// the regex '.'; matches any char but '\n'
kDot_Kind,
// a state which serves as a placeholder for the states indicated in fData. When we
// transition to this state, we instead transition to all of the fData states.
kRemapped_Kind,
// contains a list of true/false values in fData. fData[c] tells us whether we accept the
// character c.
kTable_Kind
};
NFAState(Kind kind, std::vector<int> next)
: fKind(kind)
, fNext(std::move(next)) {}
NFAState(char c, std::vector<int> next)
: fKind(kChar_Kind)
, fChar(c)
, fNext(std::move(next)) {}
NFAState(std::vector<int> states)
: fKind(kRemapped_Kind)
, fData(std::move(states)) {}
NFAState(bool inverse, std::vector<bool> accepts, std::vector<int> next)
: fKind(kTable_Kind)
, fInverse(inverse)
, fNext(std::move(next)) {
for (bool b : accepts) {
fData.push_back(b);
}
}
NFAState(int token)
: fKind(kAccept_Kind) {
fData.push_back(token);
}
bool accept(char c) const {
switch (fKind) {
case kAccept_Kind:
return false;
case kChar_Kind:
return c == fChar;
case kDot_Kind:
return c != '\n';
case kTable_Kind: {
bool value;
if ((size_t) c < fData.size()) {
value = fData[c];
} else {
value = false;
}
return value != fInverse;
}
default:
ABORT("unreachable");
}
}
std::string description() const {
switch (fKind) {
case kAccept_Kind:
return "Accept(" + std::to_string(fData[0]) + ")";
case kChar_Kind: {
std::string result = "Char('" + std::string(1, fChar) + "'";
for (int v : fNext) {
result += ", ";
result += std::to_string(v);
}
result += ")";
return result;
}
case kDot_Kind: {
std::string result = "Dot(";
const char* separator = "";
for (int v : fNext) {
result += separator;
result += std::to_string(v);
separator = ", ";
}
result += ")";
return result;
}
case kRemapped_Kind: {
std::string result = "Remapped(";
const char* separator = "";
for (int v : fData) {
result += separator;
result += std::to_string(v);
separator = ", ";
}
result += ")";
return result;
}
case kTable_Kind: {
std::string result = std::string("Table(") + (fInverse ? "true" : "false") + ", [";
const char* separator = "";
for (int v : fData) {
result += separator;
result += v ? "true" : "false";
separator = ", ";
}
result += "]";
for (int n : fNext) {
result += ", ";
result += std::to_string(n);
}
result += ")";
return result;
}
default:
ABORT("unreachable");
}
}
Kind fKind;
char fChar = 0;
bool fInverse = false;
std::vector<int> fData;
// states we transition to upon a succesful match from this state
std::vector<int> fNext;
};
#endif

130
src/sksl/lex/NFAtoDFA.h Normal file
View File

@ -0,0 +1,130 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "DFA.h"
#include "DFAState.h"
#include "NFA.h"
#include "NFAState.h"
#include <algorithm>
#include <climits>
#include <memory>
#include <unordered_map>
#include <vector>
/**
* Converts a nondeterministic finite automaton to a deterministic finite automaton. Since NFAs and
* DFAs differ only in that an NFA allows multiple states at the same time, we can find each
* possible combination of simultaneous NFA states and give this combination a label. These labelled
* nodes are our DFA nodes, since we can only be in one such unique set of NFA states at a time.
*
* As an NFA can end up in multiple accept states at the same time (for instance, the token "while"
* is valid for both WHILE and IDENTIFIER), we disambiguate by preferring the first matching regex
* (in terms of the order in which they were added to the NFA).
*/
class NFAtoDFA {
public:
NFAtoDFA(NFA* nfa)
: fNFA(*nfa) {}
/**
* Returns a DFA created from the NFA.
*/
DFA convert() {
// create state 0, the "reject" state
getState(DFAState::Label({}));
// create a state representing being in all of the NFA's start states at once
std::vector<int> startStates = fNFA.fStartStates;
std::sort(startStates.begin(), startStates.end());
// this becomes state 1, our start state
DFAState* start = getState(DFAState::Label(startStates));
this->scanState(start);
int stateCount = 0;
for (const auto& row : fTransitions) {
stateCount = std::max(stateCount, (int) row.size());
}
return DFA(fTransitions, fAccepts);
}
private:
/**
* Returns an existing state with the given label, or creates a new one and returns it.
*/
DFAState* getState(DFAState::Label label) {
auto found = fStates.find(label);
if (found == fStates.end()) {
int id = fStates.size();
fStates[label] = std::unique_ptr<DFAState>(new DFAState(id, label));
return fStates[label].get();
}
return found->second.get();
}
void add(int nfaState, std::vector<int>* states) {
NFAState state = fNFA.fStates[nfaState];
if (state.fKind == NFAState::kRemapped_Kind) {
for (int next : state.fData) {
this->add(next, states);
}
} else {
for (int state : *states) {
if (nfaState == state) {
return;
}
}
states->push_back(nfaState);
}
}
void addTransition(char c, int start, int next) {
while (fTransitions.size() <= (size_t) c) {
fTransitions.push_back(std::vector<int>());
}
std::vector<int>& row = fTransitions[c];
while (row.size() <= (size_t) start) {
row.push_back(-1);
}
row[start] = next;
}
void scanState(DFAState* state) {
state->fIsScanned = true;
for (char c = 9; c <= DFA::END_CHAR; ++c) {
std::vector<int> next;
int bestAccept = INT_MAX;
for (int idx : state->fLabel.fStates) {
const NFAState& nfaState = fNFA.fStates[idx];
if (nfaState.accept(c)) {
for (int nextState : nfaState.fNext) {
if (fNFA.fStates[nextState].fKind == NFAState::kAccept_Kind) {
bestAccept = std::min(bestAccept, fNFA.fStates[nextState].fData[0]);
}
this->add(nextState, &next);
}
}
}
std::sort(next.begin(), next.end());
DFAState* nextState = this->getState(DFAState::Label(next));
this->addTransition(c, state->fId, nextState->fId);
if (bestAccept != INT_MAX) {
while (fAccepts.size() <= (size_t) nextState->fId) {
fAccepts.push_back(-1);
}
fAccepts[nextState->fId] = bestAccept;
}
if (!nextState->fIsScanned) {
this->scanState(nextState);
}
}
}
const NFA& fNFA;
std::unordered_map<DFAState::Label, std::unique_ptr<DFAState>> fStates;
std::vector<std::vector<int>> fTransitions;
std::vector<int> fAccepts;
};

117
src/sksl/lex/RegexNode.cpp Normal file
View File

@ -0,0 +1,117 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "RegexNode.h"
#include "NFA.h"
std::vector<int> RegexNode::createStates(NFA* nfa, const std::vector<int>& accept) const {
std::vector<int> result;
switch (fKind) {
case kChar_Kind:
result.push_back(nfa->addState(NFAState(fPayload.fChar, accept)));
break;
case kCharset_Kind: {
std::vector<bool> chars;
for (const RegexNode& child : fChildren) {
if (child.fKind == kChar_Kind) {
while (chars.size() <= (size_t) child.fPayload.fChar) {
chars.push_back(false);
}
chars[child.fPayload.fChar] = true;
} else {
ASSERT(child.fKind == kRange_Kind);
while (chars.size() <= (size_t) child.fChildren[1].fPayload.fChar) {
chars.push_back(false);
}
for (char c = child.fChildren[0].fPayload.fChar;
c <= child.fChildren[1].fPayload.fChar;
++c) {
chars[c] = true;
}
}
}
result.push_back(nfa->addState(NFAState(fPayload.fBool, chars, accept)));
break;
}
case kConcat_Kind: {
std::vector<int> right = fChildren[1].createStates(nfa, accept);
result = fChildren[0].createStates(nfa, right);
break;
}
case kDot_Kind:
result.push_back(nfa->addState(NFAState(NFAState::kDot_Kind, accept)));
break;
case kOr_Kind: {
std::vector<int> states = fChildren[0].createStates(nfa, accept);
result.insert(result.end(), states.begin(), states.end());
states = fChildren[1].createStates(nfa, accept);
result.insert(result.end(), states.begin(), states.end());
break;
}
case kPlus_Kind: {
std::vector<int> next = accept;
std::vector<int> placeholder;
int id = nfa->addState(NFAState(placeholder));
next.push_back(id);
result = fChildren[0].createStates(nfa, next);
nfa->fStates[id] = NFAState(result);
break;
}
case kQuestion_Kind:
result = fChildren[0].createStates(nfa, accept);
result.insert(result.end(), accept.begin(), accept.end());
break;
case kRange_Kind:
ABORT("unreachable");
case kStar_Kind: {
std::vector<int> next = accept;
std::vector<int> placeholder;
int id = nfa->addState(NFAState(placeholder));
next.push_back(id);
result = fChildren[0].createStates(nfa, next);
result.insert(result.end(), accept.begin(), accept.end());
nfa->fStates[id] = NFAState(result);
break;
}
}
return result;
}
std::string RegexNode::description() const {
switch (fKind) {
case kChar_Kind:
return std::string(1, fPayload.fChar);
case kCharset_Kind: {
std::string result("[");
if (fPayload.fBool) {
result += "^";
}
for (const RegexNode& c : fChildren) {
result += c.description();
}
result += "]";
return result;
}
case kConcat_Kind:
return fChildren[0].description() + fChildren[1].description();
case kDot_Kind:
return ".";
case kOr_Kind:
return "(" + fChildren[0].description() + "|" + fChildren[1].description() + ")";
case kPlus_Kind:
return fChildren[0].description() + "+";
case kQuestion_Kind:
return fChildren[0].description() + "?";
case kRange_Kind:
return fChildren[0].description() + "-" + fChildren[1].description();
case kStar_Kind:
return fChildren[0].description() + "*";
default:
return "<" + std::to_string(fKind) + ">";
}
}

78
src/sksl/lex/RegexNode.h Normal file
View File

@ -0,0 +1,78 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SKSL_REGEXNODE
#define SKSL_REGEXNODE
#include <string>
#include <vector>
struct NFA;
/**
* Represents a node in the parse tree of a regular expression.
*/
struct RegexNode {
enum Kind {
kChar_Kind,
kCharset_Kind,
kConcat_Kind,
kDot_Kind,
kOr_Kind,
kPlus_Kind,
kRange_Kind,
kQuestion_Kind,
kStar_Kind
};
RegexNode(Kind kind)
: fKind(kind) {}
RegexNode(Kind kind, char payload)
: fKind(kind) {
fPayload.fChar = payload;
}
RegexNode(Kind kind, const char* children)
: fKind(kind) {
fPayload.fBool = false;
while (*children != '\0') {
fChildren.emplace_back(kChar_Kind, *children);
++children;
}
}
RegexNode(Kind kind, RegexNode child)
: fKind(kind) {
fChildren.push_back(std::move(child));
}
RegexNode(Kind kind, RegexNode child1, RegexNode child2)
: fKind(kind) {
fChildren.push_back(std::move(child1));
fChildren.push_back(std::move(child2));
}
/**
* Creates NFA states for this node, with a successful match against this node resulting in a
* transition to all of the states in the accept vector.
*/
std::vector<int> createStates(NFA* nfa, const std::vector<int>& accept) const;
std::string description() const;
Kind fKind;
union Payload {
char fChar;
bool fBool;
} fPayload;
std::vector<RegexNode> fChildren;
};
#endif

View File

@ -0,0 +1,176 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "RegexParser.h"
#include "LexUtil.h"
RegexNode RegexParser::parse(std::string source) {
fSource = source;
fIndex = 0;
ASSERT(fStack.size() == 0);
this->regex();
ASSERT(fStack.size() == 1);
ASSERT(fIndex == source.size());
return this->pop();
}
char RegexParser::peek() {
if (fIndex >= fSource.size()) {
return END;
}
return fSource[fIndex];
}
void RegexParser::expect(char c) {
if (this->peek() != c) {
printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
exit(1);
}
++fIndex;
}
RegexNode RegexParser::pop() {
RegexNode result = fStack.top();
fStack.pop();
return result;
}
void RegexParser::term() {
switch (this->peek()) {
case '(': this->group(); break;
case '[': this->set(); break;
case '.': this->dot(); break;
default: this->literal();
}
}
void RegexParser::quantifiedTerm() {
this->term();
switch (this->peek()) {
case '*': fStack.push(RegexNode(RegexNode::kStar_Kind, this->pop())); ++fIndex; break;
case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind, this->pop())); ++fIndex; break;
case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
default: break;
}
}
void RegexParser::sequence() {
this->quantifiedTerm();
for (;;) {
switch (this->peek()) {
case END: // fall through
case '|': // fall through
case ')': return;
default:
this->sequence();
RegexNode right = this->pop();
RegexNode left = this->pop();
fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
}
}
}
RegexNode RegexParser::escapeSequence(char c) {
switch (c) {
case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
default: return RegexNode(RegexNode::kChar_Kind, c);
}
}
void RegexParser::literal() {
char c = this->peek();
if (c == '\\') {
++fIndex;
fStack.push(this->escapeSequence(peek()));
++fIndex;
}
else {
fStack.push(RegexNode(RegexNode::kChar_Kind, c));
++fIndex;
}
}
void RegexParser::dot() {
this->expect('.');
fStack.push(RegexNode(RegexNode::kDot_Kind));
}
void RegexParser::group() {
this->expect('(');
this->regex();
this->expect(')');
}
void RegexParser::setItem() {
this->literal();
if (this->peek() == '-') {
++fIndex;
if (peek() == ']') {
fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
}
else {
literal();
RegexNode end = this->pop();
ASSERT(end.fKind == RegexNode::kChar_Kind);
RegexNode start = this->pop();
ASSERT(start.fKind == RegexNode::kChar_Kind);
fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
}
}
}
void RegexParser::set() {
expect('[');
size_t depth = fStack.size();
RegexNode set(RegexNode::kCharset_Kind);
if (this->peek() == '^') {
++fIndex;
set.fPayload.fBool = true;
}
else {
set.fPayload.fBool = false;
}
for (;;) {
switch (this->peek()) {
case ']':
++fIndex;
while (fStack.size() > depth) {
set.fChildren.push_back(this->pop());
}
fStack.push(std::move(set));
return;
case END:
printf("unterminated character set\n");
exit(1);
default:
this->setItem();
}
}
}
void RegexParser::regex() {
this->sequence();
switch (this->peek()) {
case '|': {
++fIndex;
this->regex();
RegexNode right = this->pop();
RegexNode left = this->pop();
fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
break;
}
case END: // fall through
case ')':
return;
default:
ASSERT(false);
}
}

View File

@ -0,0 +1,89 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SKSL_REGEXPARSER
#define SKSL_REGEXPARSER
#include "RegexNode.h"
#include <stack>
#include <string>
/**
* Turns a simple regular expression into a parse tree. The regular expression syntax supports only
* the basic quantifiers ('*', '+', and '?'), alternation ('|'), character sets ('[a-z]'), and
* groups ('()').
*/
class RegexParser {
public:
RegexNode parse(std::string source);
private:
static constexpr char END = '\0';
char peek();
void expect(char c);
RegexNode pop();
/**
* Matches a char literal, parenthesized group, character set, or dot ('.').
*/
void term();
/**
* Matches a term followed by an optional quantifier ('*', '+', or '?').
*/
void quantifiedTerm();
/**
* Matches a sequence of quantifiedTerms.
*/
void sequence();
/**
* Returns a node representing the given escape character (e.g. escapeSequence('n') returns a
* node which matches a newline character).
*/
RegexNode escapeSequence(char c);
/**
* Matches a literal character or escape sequence.
*/
void literal();
/**
* Matches a dot ('.').
*/
void dot();
/**
* Matches a parenthesized group.
*/
void group();
/**
* Matches a literal character, escape sequence, or character range from a character set.
*/
void setItem();
/**
* Matches a character set.
*/
void set();
void regex();
std::string fSource;
size_t fIndex;
std::stack<RegexNode> fStack;
};
#endif

95
src/sksl/lex/sksl.lex Normal file
View File

@ -0,0 +1,95 @@
FLOAT_LITERAL = [0-9]*\.[0-9]+([eE][+-]?[0-9]+)?|[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?|[0-9]+([eE][+-]?[0-9]+)
INT_LITERAL = [0-9]+|0x[0-9a-fA-F]+
TRUE_LITERAL = "true"
FALSE_LITERAL = "false"
IF = "if"
STATIC_IF = "@if"
ELSE = "else"
FOR = "for"
WHILE = "while"
DO = "do"
SWITCH = "switch"
STATIC_SWITCH = "@switch"
CASE = "case"
DEFAULT = "default"
BREAK = "break"
CONTINUE = "continue"
DISCARD = "discard"
RETURN = "return"
IN = "in"
OUT = "out"
INOUT = "inout"
UNIFORM = "uniform"
CONST = "const"
LOWP = "lowp"
MEDIUMP = "mediump"
HIGHP = "highp"
FLAT = "flat"
NOPERSPECTIVE = "noperspective"
READONLY = "readonly"
WRITEONLY = "writeonly"
COHERENT = "coherent"
VOLATILE = "volatile"
RESTRICT = "restrict"
BUFFER = "buffer"
HASSIDEEFFECTS = "sk_has_side_effects"
STRUCT = "struct"
LAYOUT = "layout"
PRECISION = "precision"
IDENTIFIER = [a-zA-Z_$]([0-9]|[a-zA-Z_$])*
DIRECTIVE = #[a-zA-Z_$]([0-9]|[a-zA-Z_$])*
SECTION = @[a-zA-Z_$]([0-9]|[a-zA-Z_$])*
LPAREN = "("
RPAREN = ")"
LBRACE = "{"
RBRACE = "}"
LBRACKET = "["
RBRACKET = "]"
DOT = "."
COMMA = ","
PLUSPLUS = "++"
MINUSMINUS = "--"
PLUS = "+"
MINUS = "-"
STAR = "*"
SLASH = "/"
PERCENT = "%"
SHL = "<<"
SHR = ">>"
BITWISEOR = "|"
BITWISEXOR = "^"
BITWISEAND = "&"
BITWISENOT = "~"
LOGICALOR = "||"
LOGICALXOR = "^^"
LOGICALAND = "&&"
LOGICALNOT = "!"
QUESTION = "?"
COLON = ":"
EQ = "="
EQEQ = "=="
NEQ = "!="
GT = ">"
LT = "<"
GTEQ = ">="
LTEQ = "<="
PLUSEQ = "+="
MINUSEQ = "-="
STAREQ = "*="
SLASHEQ = "/="
PERCENTEQ = "%="
SHLEQ = "<<="
SHREQ = ">>="
BITWISEOREQ = "|="
BITWISEXOREQ = "^="
BITWISEANDEQ = "&="
LOGICALOREQ = "||="
LOGICALXOREQ = "^^="
LOGICALANDEQ = "&&="
SEMICOLON = ";"
ARROW = "->"
COLONCOLON = "::"
WHITESPACE = \s+
LINE_COMMENT = //.*
BLOCK_COMMENT = /\*([^*]|\*[^/])*\*/
INVALID = .