Revert "Initial checkin of SkSL lexical analyzer generator (not actually in use yet)."
This reverts commit 3de0e496b5
.
Reason for revert: Google3 roll failure
Original change's description:
> Initial checkin of SkSL lexical analyzer generator (not actually in use yet).
>
> Bug: skia:
> Change-Id: I283feb9ac1cef134588204107ea019ce657e37b3
> Reviewed-on: https://skia-review.googlesource.com/42084
> Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
> Reviewed-by: Brian Salomon <bsalomon@google.com>
TBR=bsalomon@google.com,ethannicholas@google.com
Change-Id: Ifa9a054aba9baa1e0d17309a2e1838b5d3b0bfec
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: skia:
Reviewed-on: https://skia-review.googlesource.com/43340
Reviewed-by: Ethan Nicholas <ethannicholas@google.com>
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
This commit is contained in:
parent
276066b517
commit
3ed4781ee1
10
BUILD.gn
10
BUILD.gn
@ -412,16 +412,6 @@ optional("fontmgr_fuchsia") {
|
||||
}
|
||||
|
||||
if (skia_compile_processors) {
|
||||
executable("sksllex") {
|
||||
sources = [
|
||||
"src/sksl/lex/Main.cpp",
|
||||
"src/sksl/lex/NFA.cpp",
|
||||
"src/sksl/lex/RegexNode.cpp",
|
||||
"src/sksl/lex/RegexParser.cpp",
|
||||
]
|
||||
include_dirs = [ "src/sksl/lex" ]
|
||||
}
|
||||
|
||||
executable("skslc") {
|
||||
defines = [ "SKSL_STANDALONE" ]
|
||||
sources = [
|
||||
|
@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SKSL_DFA
|
||||
#define SKSL_DFA
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
* Tables representing a deterministic finite automaton for matching regular expressions.
|
||||
*/
|
||||
struct DFA {
|
||||
DFA(std::vector<std::vector<int>> transitions, std::vector<int> accepts)
|
||||
: fTransitions(transitions)
|
||||
, fAccepts(accepts) {}
|
||||
|
||||
static constexpr char END_CHAR = 126;
|
||||
|
||||
// one row per character, one column per state
|
||||
std::vector<std::vector<int>> fTransitions;
|
||||
|
||||
// contains, for each state, the token id we should report when matching ends in that state (-1
|
||||
// for no match)
|
||||
std::vector<int> fAccepts;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,68 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SKSL_DFASTATE
|
||||
#define SKSL_DFASTATE
|
||||
|
||||
struct DFAState {
|
||||
struct Label {
|
||||
std::vector<int> fStates;
|
||||
|
||||
Label(std::vector<int> states)
|
||||
: fStates(std::move(states)) {}
|
||||
|
||||
bool operator==(const Label& other) const {
|
||||
return fStates == other.fStates;
|
||||
}
|
||||
|
||||
bool operator!=(const Label& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
std::string description() const {
|
||||
std::string result = "<";
|
||||
const char* separator = "";
|
||||
for (int s : fStates) {
|
||||
result += separator;
|
||||
result += std::to_string(s);
|
||||
separator = ", ";
|
||||
}
|
||||
result += ">";
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
DFAState()
|
||||
: fId(-1)
|
||||
, fLabel({}) {}
|
||||
|
||||
DFAState(int id, Label label)
|
||||
: fId(id)
|
||||
, fLabel(std::move(label)) {}
|
||||
|
||||
DFAState(const DFAState& other) = delete;
|
||||
|
||||
int fId;
|
||||
|
||||
Label fLabel;
|
||||
|
||||
bool fIsScanned = false;
|
||||
};
|
||||
|
||||
namespace std {
|
||||
template<> struct hash<DFAState::Label> {
|
||||
size_t operator()(const DFAState::Label& s) const {
|
||||
int result = 0;
|
||||
for (int i : s.fStates) {
|
||||
result = result * 101 + i;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
#endif
|
@ -1,14 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SKSL_LEXUTIL
|
||||
#define SKSL_LEXUTIL
|
||||
|
||||
#define ABORT(...) (fprintf(stderr, __VA_ARGS__), abort())
|
||||
#define ASSERT(x) (void)((x) || (ABORT("failed assert(%s): %s:%d\n", #x, __FILE__, __LINE__), 0))
|
||||
|
||||
#endif
|
@ -1,197 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "NFAtoDFA.h"
|
||||
#include "RegexParser.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex
|
||||
* file is a text file with one token definition per line. Each line is of the form:
|
||||
* <TOKEN_NAME> = <pattern>
|
||||
* where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.
|
||||
*/
|
||||
|
||||
static constexpr const char* HEADER =
|
||||
"/*\n"
|
||||
" * Copyright 2017 Google Inc.\n"
|
||||
" *\n"
|
||||
" * Use of this source code is governed by a BSD-style license that can be\n"
|
||||
" * found in the LICENSE file.\n"
|
||||
" */\n"
|
||||
"/*****************************************************************************************\n"
|
||||
" ******************** This file was generated by sksllex. Do not edit. *******************\n"
|
||||
" *****************************************************************************************/\n";
|
||||
|
||||
void writeH(const DFA& dfa, const char* lexer, const char* token,
|
||||
const std::vector<std::string>& tokens, const char* hPath) {
|
||||
std::ofstream out(hPath);
|
||||
ASSERT(out.good());
|
||||
out << HEADER;
|
||||
out << "#ifndef SKSL_" << lexer << "\n";
|
||||
out << "#define SKSL_" << lexer << "\n";
|
||||
out << "#include <cstddef>\n";
|
||||
out << "#include <cstdint>\n";
|
||||
out << "namespace SkSL {\n";
|
||||
out << "\n";
|
||||
out << "struct " << token << " {\n";
|
||||
out << " enum Kind {\n";
|
||||
for (const std::string& t : tokens) {
|
||||
out << " #undef " << t << "\n";
|
||||
out << " " << t << ",\n";
|
||||
}
|
||||
out << " };\n";
|
||||
out << "\n";
|
||||
out << " " << token << "()\n";
|
||||
out << " : fKind(Kind::INVALID)\n";
|
||||
out << " , fOffset(-1)\n";
|
||||
out << " , fLength(-1) {}\n";
|
||||
out << "\n";
|
||||
out << " " << token << "(Kind kind, int offset, int length)\n";
|
||||
out << " : fKind(kind)\n";
|
||||
out << " , fOffset(offset)\n";
|
||||
out << " , fLength(length) {}\n";
|
||||
out << "\n";
|
||||
out << " Kind fKind;\n";
|
||||
out << " int fOffset;\n";
|
||||
out << " int fLength;\n";
|
||||
out << "};\n";
|
||||
out << "\n";
|
||||
out << "class " << lexer << " {\n";
|
||||
out << "public:\n";
|
||||
out << " void start(const char* text, size_t length) {\n";
|
||||
out << " fText = text;\n";
|
||||
out << " fLength = length;\n";
|
||||
out << " fOffset = 0;\n";
|
||||
out << " }\n";
|
||||
out << "\n";
|
||||
out << " " << token << " next();\n";
|
||||
out << "\n";
|
||||
out << "private:\n";
|
||||
out << " const char* fText;\n";
|
||||
out << " int fLength;\n";
|
||||
out << " int fOffset;\n";
|
||||
out << "};\n";
|
||||
out << "\n";
|
||||
out << "} // namespace\n";
|
||||
out << "#endif\n";
|
||||
}
|
||||
|
||||
void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,
|
||||
const char* cppPath) {
|
||||
std::ofstream out(cppPath);
|
||||
ASSERT(out.good());
|
||||
out << HEADER;
|
||||
out << "#include \"" << include << "\"\n";
|
||||
out << "\n";
|
||||
out << "namespace SkSL {\n";
|
||||
out << "\n";
|
||||
|
||||
size_t states = 0;
|
||||
for (const auto& row : dfa.fTransitions) {
|
||||
states = std::max(states, row.size());
|
||||
}
|
||||
out << "static int16_t transitions[" << DFA::END_CHAR + 1 << "][" << states << "] = {\n";
|
||||
for (char c = 0; c <= DFA::END_CHAR; ++c) {
|
||||
out << " {";
|
||||
for (size_t i = 0; i < states; ++i) {
|
||||
if ((size_t) c < dfa.fTransitions.size() && i < dfa.fTransitions[c].size()) {
|
||||
out << " " << dfa.fTransitions[c][i] << ",";
|
||||
} else {
|
||||
out << " 0,";
|
||||
}
|
||||
}
|
||||
out << " },\n";
|
||||
}
|
||||
out << "};\n";
|
||||
out << "\n";
|
||||
|
||||
out << "static int16_t accepts[" << states << "] = {";
|
||||
for (size_t i = 0; i < states; ++i) {
|
||||
if (i < dfa.fAccepts.size()) {
|
||||
out << " " << dfa.fAccepts[i] << ",";
|
||||
} else {
|
||||
out << " -1,";
|
||||
}
|
||||
}
|
||||
out << " };\n";
|
||||
out << "\n";
|
||||
|
||||
out << token << " " << lexer << "::next() {\n";;
|
||||
out << " int startOffset = fOffset;\n";
|
||||
out << " if (startOffset == fLength) {\n";
|
||||
out << " return " << token << "(" << token << "::END_OF_FILE, startOffset, 0);\n";
|
||||
out << " }\n";
|
||||
out << " int offset = startOffset;\n";
|
||||
out << " int state = 1;\n";
|
||||
out << " " << token << "::Kind lastAccept = " << token << "::Kind::INVALID;\n";
|
||||
out << " int lastAcceptEnd = startOffset + 1;\n";
|
||||
out << " while (offset < fLength) {\n";
|
||||
out << " state = transitions[(int) fText[offset]][state];\n";
|
||||
out << " ++offset;\n";
|
||||
out << " if (!state) {\n";
|
||||
out << " break;\n";
|
||||
out << " }\n";
|
||||
out << " if (accepts[state]) {\n";
|
||||
out << " lastAccept = (" << token << "::Kind) accepts[state];\n";
|
||||
out << " lastAcceptEnd = offset;\n";
|
||||
out << " }\n";
|
||||
out << " }\n";
|
||||
out << " fOffset = lastAcceptEnd;\n";
|
||||
out << " return " << token << "(lastAccept, startOffset, lastAcceptEnd - startOffset);\n";
|
||||
out << "}\n";
|
||||
out << "\n";
|
||||
out << "} // namespace\n";
|
||||
}
|
||||
|
||||
void process(const char* inPath, const char* lexer, const char* token, const char* hPath,
|
||||
const char* cppPath) {
|
||||
NFA nfa;
|
||||
std::vector<std::string> tokens;
|
||||
tokens.push_back("END_OF_FILE");
|
||||
std::string line;
|
||||
std::ifstream in(inPath);
|
||||
while (std::getline(in, line)) {
|
||||
std::istringstream split(line);
|
||||
std::string name, delimiter, pattern;
|
||||
if (split >> name >> delimiter >> pattern) {
|
||||
ASSERT(split.eof());
|
||||
ASSERT(name != "");
|
||||
ASSERT(delimiter == "=");
|
||||
ASSERT(pattern != "");
|
||||
tokens.push_back(name);
|
||||
if (pattern[0] == '"') {
|
||||
ASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');
|
||||
RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);
|
||||
for (size_t i = 2; i < pattern.size() - 1; ++i) {
|
||||
node = RegexNode(RegexNode::kConcat_Kind, node,
|
||||
RegexNode(RegexNode::kChar_Kind, pattern[i]));
|
||||
}
|
||||
nfa.addRegex(node);
|
||||
}
|
||||
else {
|
||||
nfa.addRegex(RegexParser().parse(pattern));
|
||||
}
|
||||
}
|
||||
}
|
||||
NFAtoDFA converter(&nfa);
|
||||
DFA dfa = converter.convert();
|
||||
writeH(dfa, lexer, token, tokens, hPath);
|
||||
writeCPP(dfa, lexer, token, (std::string("SkSL") + lexer + ".h").c_str(), cppPath);
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv) {
|
||||
if (argc != 6) {
|
||||
printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n");
|
||||
exit(1);
|
||||
}
|
||||
process(argv[1], argv[2], argv[3], argv[4], argv[5]);
|
||||
return 0;
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "NFA.h"
|
||||
|
||||
int NFA::match(std::string s) const {
|
||||
std::vector<int> states = fStartStates;
|
||||
for (size_t i = 0; i < s.size(); ++i) {
|
||||
std::vector<int> next;
|
||||
for (int id : states) {
|
||||
if (fStates[id].accept(s[i])) {
|
||||
for (int nextId : fStates[id].fNext) {
|
||||
if (fStates[nextId].fKind != NFAState::kRemapped_Kind) {
|
||||
next.push_back(nextId);
|
||||
} else {
|
||||
next.insert(next.end(), fStates[nextId].fData.begin(),
|
||||
fStates[nextId].fData.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!next.size()) {
|
||||
return -1;
|
||||
}
|
||||
states = next;
|
||||
}
|
||||
int accept = -1;
|
||||
for (int id : states) {
|
||||
if (fStates[id].fKind == NFAState::kAccept_Kind) {
|
||||
int result = fStates[id].fData[0];
|
||||
if (accept == -1 || result < accept) {
|
||||
accept = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
return accept;
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SKSL_NFA
|
||||
#define SKSL_NFA
|
||||
|
||||
#include "NFAState.h"
|
||||
#include "RegexNode.h"
|
||||
|
||||
/**
|
||||
* A nondeterministic finite automaton for matching regular expressions. The NFA is initialized with
|
||||
* a number of regular expressions, and then matches a string against all of them simultaneously.
|
||||
*/
|
||||
struct NFA {
|
||||
/**
|
||||
* Adds a new regular expression to the set of expressions matched by this automaton, returning
|
||||
* its index.
|
||||
*/
|
||||
int addRegex(const RegexNode& regex) {
|
||||
std::vector<int> accept;
|
||||
// we reserve token 0 for END_OF_FILE, so this starts at 1
|
||||
accept.push_back(this->addState(NFAState(++fRegexCount)));
|
||||
std::vector<int> startStates = regex.createStates(this, accept);
|
||||
fStartStates.insert(fStartStates.end(), startStates.begin(), startStates.end());
|
||||
return fStartStates.size() - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new state to the NFA, returning its index.
|
||||
*/
|
||||
int addState(NFAState s) {
|
||||
fStates.push_back(std::move(s));
|
||||
return fStates.size() - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches a string against all of the regexes added to this NFA. Returns the index of the first
|
||||
* (in addRegex order) matching expression, or -1 if no match. This is relatively slow and used
|
||||
* only for debugging purposes; the NFA should be converted to a DFA before actual use.
|
||||
*/
|
||||
int match(std::string s) const;
|
||||
|
||||
int fRegexCount = 0;
|
||||
|
||||
std::vector<NFAState> fStates;
|
||||
|
||||
std::vector<int> fStartStates;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,150 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SKSL_NFASTATE
|
||||
#define SKSL_NFASTATE
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "LexUtil.h"
|
||||
|
||||
struct NFAState {
|
||||
enum Kind {
|
||||
// represents an accept state - if the NFA ends up in this state, we have successfully
|
||||
// matched the token indicated by fData[0]
|
||||
kAccept_Kind,
|
||||
// matches the single character fChar
|
||||
kChar_Kind,
|
||||
// the regex '.'; matches any char but '\n'
|
||||
kDot_Kind,
|
||||
// a state which serves as a placeholder for the states indicated in fData. When we
|
||||
// transition to this state, we instead transition to all of the fData states.
|
||||
kRemapped_Kind,
|
||||
// contains a list of true/false values in fData. fData[c] tells us whether we accept the
|
||||
// character c.
|
||||
kTable_Kind
|
||||
};
|
||||
|
||||
NFAState(Kind kind, std::vector<int> next)
|
||||
: fKind(kind)
|
||||
, fNext(std::move(next)) {}
|
||||
|
||||
NFAState(char c, std::vector<int> next)
|
||||
: fKind(kChar_Kind)
|
||||
, fChar(c)
|
||||
, fNext(std::move(next)) {}
|
||||
|
||||
NFAState(std::vector<int> states)
|
||||
: fKind(kRemapped_Kind)
|
||||
, fData(std::move(states)) {}
|
||||
|
||||
NFAState(bool inverse, std::vector<bool> accepts, std::vector<int> next)
|
||||
: fKind(kTable_Kind)
|
||||
, fInverse(inverse)
|
||||
, fNext(std::move(next)) {
|
||||
for (bool b : accepts) {
|
||||
fData.push_back(b);
|
||||
}
|
||||
}
|
||||
|
||||
NFAState(int token)
|
||||
: fKind(kAccept_Kind) {
|
||||
fData.push_back(token);
|
||||
}
|
||||
|
||||
bool accept(char c) const {
|
||||
switch (fKind) {
|
||||
case kAccept_Kind:
|
||||
return false;
|
||||
case kChar_Kind:
|
||||
return c == fChar;
|
||||
case kDot_Kind:
|
||||
return c != '\n';
|
||||
case kTable_Kind: {
|
||||
bool value;
|
||||
if ((size_t) c < fData.size()) {
|
||||
value = fData[c];
|
||||
} else {
|
||||
value = false;
|
||||
}
|
||||
return value != fInverse;
|
||||
}
|
||||
default:
|
||||
ABORT("unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
std::string description() const {
|
||||
switch (fKind) {
|
||||
case kAccept_Kind:
|
||||
return "Accept(" + std::to_string(fData[0]) + ")";
|
||||
case kChar_Kind: {
|
||||
std::string result = "Char('" + std::string(1, fChar) + "'";
|
||||
for (int v : fNext) {
|
||||
result += ", ";
|
||||
result += std::to_string(v);
|
||||
}
|
||||
result += ")";
|
||||
return result;
|
||||
}
|
||||
case kDot_Kind: {
|
||||
std::string result = "Dot(";
|
||||
const char* separator = "";
|
||||
for (int v : fNext) {
|
||||
result += separator;
|
||||
result += std::to_string(v);
|
||||
separator = ", ";
|
||||
}
|
||||
result += ")";
|
||||
return result;
|
||||
}
|
||||
case kRemapped_Kind: {
|
||||
std::string result = "Remapped(";
|
||||
const char* separator = "";
|
||||
for (int v : fData) {
|
||||
result += separator;
|
||||
result += std::to_string(v);
|
||||
separator = ", ";
|
||||
}
|
||||
result += ")";
|
||||
return result;
|
||||
}
|
||||
case kTable_Kind: {
|
||||
std::string result = std::string("Table(") + (fInverse ? "true" : "false") + ", [";
|
||||
const char* separator = "";
|
||||
for (int v : fData) {
|
||||
result += separator;
|
||||
result += v ? "true" : "false";
|
||||
separator = ", ";
|
||||
}
|
||||
result += "]";
|
||||
for (int n : fNext) {
|
||||
result += ", ";
|
||||
result += std::to_string(n);
|
||||
}
|
||||
result += ")";
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
ABORT("unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
Kind fKind;
|
||||
|
||||
char fChar = 0;
|
||||
|
||||
bool fInverse = false;
|
||||
|
||||
std::vector<int> fData;
|
||||
|
||||
// states we transition to upon a succesful match from this state
|
||||
std::vector<int> fNext;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,130 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "DFA.h"
|
||||
#include "DFAState.h"
|
||||
#include "NFA.h"
|
||||
#include "NFAState.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
* Converts a nondeterministic finite automaton to a deterministic finite automaton. Since NFAs and
|
||||
* DFAs differ only in that an NFA allows multiple states at the same time, we can find each
|
||||
* possible combination of simultaneous NFA states and give this combination a label. These labelled
|
||||
* nodes are our DFA nodes, since we can only be in one such unique set of NFA states at a time.
|
||||
*
|
||||
* As an NFA can end up in multiple accept states at the same time (for instance, the token "while"
|
||||
* is valid for both WHILE and IDENTIFIER), we disambiguate by preferring the first matching regex
|
||||
* (in terms of the order in which they were added to the NFA).
|
||||
*/
|
||||
class NFAtoDFA {
|
||||
public:
|
||||
NFAtoDFA(NFA* nfa)
|
||||
: fNFA(*nfa) {}
|
||||
|
||||
/**
|
||||
* Returns a DFA created from the NFA.
|
||||
*/
|
||||
DFA convert() {
|
||||
// create state 0, the "reject" state
|
||||
getState(DFAState::Label({}));
|
||||
// create a state representing being in all of the NFA's start states at once
|
||||
std::vector<int> startStates = fNFA.fStartStates;
|
||||
std::sort(startStates.begin(), startStates.end());
|
||||
// this becomes state 1, our start state
|
||||
DFAState* start = getState(DFAState::Label(startStates));
|
||||
this->scanState(start);
|
||||
|
||||
int stateCount = 0;
|
||||
for (const auto& row : fTransitions) {
|
||||
stateCount = std::max(stateCount, (int) row.size());
|
||||
}
|
||||
return DFA(fTransitions, fAccepts);
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* Returns an existing state with the given label, or creates a new one and returns it.
|
||||
*/
|
||||
DFAState* getState(DFAState::Label label) {
|
||||
auto found = fStates.find(label);
|
||||
if (found == fStates.end()) {
|
||||
int id = fStates.size();
|
||||
fStates[label] = std::unique_ptr<DFAState>(new DFAState(id, label));
|
||||
return fStates[label].get();
|
||||
}
|
||||
return found->second.get();
|
||||
}
|
||||
|
||||
void add(int nfaState, std::vector<int>* states) {
|
||||
NFAState state = fNFA.fStates[nfaState];
|
||||
if (state.fKind == NFAState::kRemapped_Kind) {
|
||||
for (int next : state.fData) {
|
||||
this->add(next, states);
|
||||
}
|
||||
} else {
|
||||
for (int state : *states) {
|
||||
if (nfaState == state) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
states->push_back(nfaState);
|
||||
}
|
||||
}
|
||||
|
||||
void addTransition(char c, int start, int next) {
|
||||
while (fTransitions.size() <= (size_t) c) {
|
||||
fTransitions.push_back(std::vector<int>());
|
||||
}
|
||||
std::vector<int>& row = fTransitions[c];
|
||||
while (row.size() <= (size_t) start) {
|
||||
row.push_back(-1);
|
||||
}
|
||||
row[start] = next;
|
||||
}
|
||||
|
||||
void scanState(DFAState* state) {
|
||||
state->fIsScanned = true;
|
||||
for (char c = 9; c <= DFA::END_CHAR; ++c) {
|
||||
std::vector<int> next;
|
||||
int bestAccept = INT_MAX;
|
||||
for (int idx : state->fLabel.fStates) {
|
||||
const NFAState& nfaState = fNFA.fStates[idx];
|
||||
if (nfaState.accept(c)) {
|
||||
for (int nextState : nfaState.fNext) {
|
||||
if (fNFA.fStates[nextState].fKind == NFAState::kAccept_Kind) {
|
||||
bestAccept = std::min(bestAccept, fNFA.fStates[nextState].fData[0]);
|
||||
}
|
||||
this->add(nextState, &next);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::sort(next.begin(), next.end());
|
||||
DFAState* nextState = this->getState(DFAState::Label(next));
|
||||
this->addTransition(c, state->fId, nextState->fId);
|
||||
if (bestAccept != INT_MAX) {
|
||||
while (fAccepts.size() <= (size_t) nextState->fId) {
|
||||
fAccepts.push_back(-1);
|
||||
}
|
||||
fAccepts[nextState->fId] = bestAccept;
|
||||
}
|
||||
if (!nextState->fIsScanned) {
|
||||
this->scanState(nextState);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const NFA& fNFA;
|
||||
std::unordered_map<DFAState::Label, std::unique_ptr<DFAState>> fStates;
|
||||
std::vector<std::vector<int>> fTransitions;
|
||||
std::vector<int> fAccepts;
|
||||
};
|
@ -1,117 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "RegexNode.h"
|
||||
|
||||
#include "NFA.h"
|
||||
|
||||
std::vector<int> RegexNode::createStates(NFA* nfa, const std::vector<int>& accept) const {
|
||||
std::vector<int> result;
|
||||
switch (fKind) {
|
||||
case kChar_Kind:
|
||||
result.push_back(nfa->addState(NFAState(fPayload.fChar, accept)));
|
||||
break;
|
||||
case kCharset_Kind: {
|
||||
std::vector<bool> chars;
|
||||
for (const RegexNode& child : fChildren) {
|
||||
if (child.fKind == kChar_Kind) {
|
||||
while (chars.size() <= (size_t) child.fPayload.fChar) {
|
||||
chars.push_back(false);
|
||||
}
|
||||
chars[child.fPayload.fChar] = true;
|
||||
} else {
|
||||
ASSERT(child.fKind == kRange_Kind);
|
||||
while (chars.size() <= (size_t) child.fChildren[1].fPayload.fChar) {
|
||||
chars.push_back(false);
|
||||
}
|
||||
for (char c = child.fChildren[0].fPayload.fChar;
|
||||
c <= child.fChildren[1].fPayload.fChar;
|
||||
++c) {
|
||||
chars[c] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
result.push_back(nfa->addState(NFAState(fPayload.fBool, chars, accept)));
|
||||
break;
|
||||
}
|
||||
case kConcat_Kind: {
|
||||
std::vector<int> right = fChildren[1].createStates(nfa, accept);
|
||||
result = fChildren[0].createStates(nfa, right);
|
||||
break;
|
||||
}
|
||||
case kDot_Kind:
|
||||
result.push_back(nfa->addState(NFAState(NFAState::kDot_Kind, accept)));
|
||||
break;
|
||||
case kOr_Kind: {
|
||||
std::vector<int> states = fChildren[0].createStates(nfa, accept);
|
||||
result.insert(result.end(), states.begin(), states.end());
|
||||
states = fChildren[1].createStates(nfa, accept);
|
||||
result.insert(result.end(), states.begin(), states.end());
|
||||
break;
|
||||
}
|
||||
case kPlus_Kind: {
|
||||
std::vector<int> next = accept;
|
||||
std::vector<int> placeholder;
|
||||
int id = nfa->addState(NFAState(placeholder));
|
||||
next.push_back(id);
|
||||
result = fChildren[0].createStates(nfa, next);
|
||||
nfa->fStates[id] = NFAState(result);
|
||||
break;
|
||||
}
|
||||
case kQuestion_Kind:
|
||||
result = fChildren[0].createStates(nfa, accept);
|
||||
result.insert(result.end(), accept.begin(), accept.end());
|
||||
break;
|
||||
case kRange_Kind:
|
||||
ABORT("unreachable");
|
||||
case kStar_Kind: {
|
||||
std::vector<int> next = accept;
|
||||
std::vector<int> placeholder;
|
||||
int id = nfa->addState(NFAState(placeholder));
|
||||
next.push_back(id);
|
||||
result = fChildren[0].createStates(nfa, next);
|
||||
result.insert(result.end(), accept.begin(), accept.end());
|
||||
nfa->fStates[id] = NFAState(result);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string RegexNode::description() const {
|
||||
switch (fKind) {
|
||||
case kChar_Kind:
|
||||
return std::string(1, fPayload.fChar);
|
||||
case kCharset_Kind: {
|
||||
std::string result("[");
|
||||
if (fPayload.fBool) {
|
||||
result += "^";
|
||||
}
|
||||
for (const RegexNode& c : fChildren) {
|
||||
result += c.description();
|
||||
}
|
||||
result += "]";
|
||||
return result;
|
||||
}
|
||||
case kConcat_Kind:
|
||||
return fChildren[0].description() + fChildren[1].description();
|
||||
case kDot_Kind:
|
||||
return ".";
|
||||
case kOr_Kind:
|
||||
return "(" + fChildren[0].description() + "|" + fChildren[1].description() + ")";
|
||||
case kPlus_Kind:
|
||||
return fChildren[0].description() + "+";
|
||||
case kQuestion_Kind:
|
||||
return fChildren[0].description() + "?";
|
||||
case kRange_Kind:
|
||||
return fChildren[0].description() + "-" + fChildren[1].description();
|
||||
case kStar_Kind:
|
||||
return fChildren[0].description() + "*";
|
||||
default:
|
||||
return "<" + std::to_string(fKind) + ">";
|
||||
}
|
||||
}
|
@ -1,78 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SKSL_REGEXNODE
|
||||
#define SKSL_REGEXNODE
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
struct NFA;
|
||||
|
||||
/**
|
||||
* Represents a node in the parse tree of a regular expression.
|
||||
*/
|
||||
struct RegexNode {
|
||||
enum Kind {
|
||||
kChar_Kind,
|
||||
kCharset_Kind,
|
||||
kConcat_Kind,
|
||||
kDot_Kind,
|
||||
kOr_Kind,
|
||||
kPlus_Kind,
|
||||
kRange_Kind,
|
||||
kQuestion_Kind,
|
||||
kStar_Kind
|
||||
};
|
||||
|
||||
RegexNode(Kind kind)
|
||||
: fKind(kind) {}
|
||||
|
||||
RegexNode(Kind kind, char payload)
|
||||
: fKind(kind) {
|
||||
fPayload.fChar = payload;
|
||||
}
|
||||
|
||||
RegexNode(Kind kind, const char* children)
|
||||
: fKind(kind) {
|
||||
fPayload.fBool = false;
|
||||
while (*children != '\0') {
|
||||
fChildren.emplace_back(kChar_Kind, *children);
|
||||
++children;
|
||||
}
|
||||
}
|
||||
|
||||
RegexNode(Kind kind, RegexNode child)
|
||||
: fKind(kind) {
|
||||
fChildren.push_back(std::move(child));
|
||||
}
|
||||
|
||||
RegexNode(Kind kind, RegexNode child1, RegexNode child2)
|
||||
: fKind(kind) {
|
||||
fChildren.push_back(std::move(child1));
|
||||
fChildren.push_back(std::move(child2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates NFA states for this node, with a successful match against this node resulting in a
|
||||
* transition to all of the states in the accept vector.
|
||||
*/
|
||||
std::vector<int> createStates(NFA* nfa, const std::vector<int>& accept) const;
|
||||
|
||||
std::string description() const;
|
||||
|
||||
Kind fKind;
|
||||
|
||||
union Payload {
|
||||
char fChar;
|
||||
bool fBool;
|
||||
} fPayload;
|
||||
|
||||
std::vector<RegexNode> fChildren;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,176 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "RegexParser.h"
|
||||
|
||||
#include "LexUtil.h"
|
||||
|
||||
RegexNode RegexParser::parse(std::string source) {
|
||||
fSource = source;
|
||||
fIndex = 0;
|
||||
ASSERT(fStack.size() == 0);
|
||||
this->regex();
|
||||
ASSERT(fStack.size() == 1);
|
||||
ASSERT(fIndex == source.size());
|
||||
return this->pop();
|
||||
}
|
||||
|
||||
char RegexParser::peek() {
|
||||
if (fIndex >= fSource.size()) {
|
||||
return END;
|
||||
}
|
||||
return fSource[fIndex];
|
||||
}
|
||||
|
||||
void RegexParser::expect(char c) {
|
||||
if (this->peek() != c) {
|
||||
printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
|
||||
exit(1);
|
||||
}
|
||||
++fIndex;
|
||||
}
|
||||
|
||||
RegexNode RegexParser::pop() {
|
||||
RegexNode result = fStack.top();
|
||||
fStack.pop();
|
||||
return result;
|
||||
}
|
||||
|
||||
void RegexParser::term() {
|
||||
switch (this->peek()) {
|
||||
case '(': this->group(); break;
|
||||
case '[': this->set(); break;
|
||||
case '.': this->dot(); break;
|
||||
default: this->literal();
|
||||
}
|
||||
}
|
||||
|
||||
void RegexParser::quantifiedTerm() {
|
||||
this->term();
|
||||
switch (this->peek()) {
|
||||
case '*': fStack.push(RegexNode(RegexNode::kStar_Kind, this->pop())); ++fIndex; break;
|
||||
case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind, this->pop())); ++fIndex; break;
|
||||
case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
void RegexParser::sequence() {
|
||||
this->quantifiedTerm();
|
||||
for (;;) {
|
||||
switch (this->peek()) {
|
||||
case END: // fall through
|
||||
case '|': // fall through
|
||||
case ')': return;
|
||||
default:
|
||||
this->sequence();
|
||||
RegexNode right = this->pop();
|
||||
RegexNode left = this->pop();
|
||||
fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RegexNode RegexParser::escapeSequence(char c) {
|
||||
switch (c) {
|
||||
case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
|
||||
case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
|
||||
case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
|
||||
case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
|
||||
default: return RegexNode(RegexNode::kChar_Kind, c);
|
||||
}
|
||||
}
|
||||
|
||||
void RegexParser::literal() {
|
||||
char c = this->peek();
|
||||
if (c == '\\') {
|
||||
++fIndex;
|
||||
fStack.push(this->escapeSequence(peek()));
|
||||
++fIndex;
|
||||
}
|
||||
else {
|
||||
fStack.push(RegexNode(RegexNode::kChar_Kind, c));
|
||||
++fIndex;
|
||||
}
|
||||
}
|
||||
|
||||
void RegexParser::dot() {
|
||||
this->expect('.');
|
||||
fStack.push(RegexNode(RegexNode::kDot_Kind));
|
||||
}
|
||||
|
||||
void RegexParser::group() {
|
||||
this->expect('(');
|
||||
this->regex();
|
||||
this->expect(')');
|
||||
}
|
||||
|
||||
void RegexParser::setItem() {
|
||||
this->literal();
|
||||
if (this->peek() == '-') {
|
||||
++fIndex;
|
||||
if (peek() == ']') {
|
||||
fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
|
||||
}
|
||||
else {
|
||||
literal();
|
||||
RegexNode end = this->pop();
|
||||
ASSERT(end.fKind == RegexNode::kChar_Kind);
|
||||
RegexNode start = this->pop();
|
||||
ASSERT(start.fKind == RegexNode::kChar_Kind);
|
||||
fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegexParser::set() {
|
||||
expect('[');
|
||||
size_t depth = fStack.size();
|
||||
RegexNode set(RegexNode::kCharset_Kind);
|
||||
if (this->peek() == '^') {
|
||||
++fIndex;
|
||||
set.fPayload.fBool = true;
|
||||
}
|
||||
else {
|
||||
set.fPayload.fBool = false;
|
||||
}
|
||||
for (;;) {
|
||||
switch (this->peek()) {
|
||||
case ']':
|
||||
++fIndex;
|
||||
while (fStack.size() > depth) {
|
||||
set.fChildren.push_back(this->pop());
|
||||
}
|
||||
fStack.push(std::move(set));
|
||||
return;
|
||||
case END:
|
||||
printf("unterminated character set\n");
|
||||
exit(1);
|
||||
default:
|
||||
this->setItem();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegexParser::regex() {
|
||||
this->sequence();
|
||||
switch (this->peek()) {
|
||||
case '|': {
|
||||
++fIndex;
|
||||
this->regex();
|
||||
RegexNode right = this->pop();
|
||||
RegexNode left = this->pop();
|
||||
fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
|
||||
break;
|
||||
}
|
||||
case END: // fall through
|
||||
case ')':
|
||||
return;
|
||||
default:
|
||||
ASSERT(false);
|
||||
}
|
||||
}
|
@ -1,89 +0,0 @@
|
||||
/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SKSL_REGEXPARSER
|
||||
#define SKSL_REGEXPARSER
|
||||
|
||||
#include "RegexNode.h"
|
||||
|
||||
#include <stack>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* Turns a simple regular expression into a parse tree. The regular expression syntax supports only
|
||||
* the basic quantifiers ('*', '+', and '?'), alternation ('|'), character sets ('[a-z]'), and
|
||||
* groups ('()').
|
||||
*/
|
||||
class RegexParser {
|
||||
public:
|
||||
RegexNode parse(std::string source);
|
||||
|
||||
private:
|
||||
static constexpr char END = '\0';
|
||||
|
||||
char peek();
|
||||
|
||||
void expect(char c);
|
||||
|
||||
RegexNode pop();
|
||||
|
||||
/**
|
||||
* Matches a char literal, parenthesized group, character set, or dot ('.').
|
||||
*/
|
||||
void term();
|
||||
|
||||
/**
|
||||
* Matches a term followed by an optional quantifier ('*', '+', or '?').
|
||||
*/
|
||||
void quantifiedTerm();
|
||||
|
||||
/**
|
||||
* Matches a sequence of quantifiedTerms.
|
||||
*/
|
||||
void sequence();
|
||||
|
||||
/**
|
||||
* Returns a node representing the given escape character (e.g. escapeSequence('n') returns a
|
||||
* node which matches a newline character).
|
||||
*/
|
||||
RegexNode escapeSequence(char c);
|
||||
|
||||
/**
|
||||
* Matches a literal character or escape sequence.
|
||||
*/
|
||||
void literal();
|
||||
|
||||
/**
|
||||
* Matches a dot ('.').
|
||||
*/
|
||||
void dot();
|
||||
|
||||
/**
|
||||
* Matches a parenthesized group.
|
||||
*/
|
||||
void group();
|
||||
|
||||
/**
|
||||
* Matches a literal character, escape sequence, or character range from a character set.
|
||||
*/
|
||||
void setItem();
|
||||
|
||||
/**
|
||||
* Matches a character set.
|
||||
*/
|
||||
void set();
|
||||
|
||||
void regex();
|
||||
|
||||
std::string fSource;
|
||||
|
||||
size_t fIndex;
|
||||
|
||||
std::stack<RegexNode> fStack;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,95 +0,0 @@
|
||||
FLOAT_LITERAL = [0-9]*\.[0-9]+([eE][+-]?[0-9]+)?|[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?|[0-9]+([eE][+-]?[0-9]+)
|
||||
INT_LITERAL = [0-9]+|0x[0-9a-fA-F]+
|
||||
TRUE_LITERAL = "true"
|
||||
FALSE_LITERAL = "false"
|
||||
IF = "if"
|
||||
STATIC_IF = "@if"
|
||||
ELSE = "else"
|
||||
FOR = "for"
|
||||
WHILE = "while"
|
||||
DO = "do"
|
||||
SWITCH = "switch"
|
||||
STATIC_SWITCH = "@switch"
|
||||
CASE = "case"
|
||||
DEFAULT = "default"
|
||||
BREAK = "break"
|
||||
CONTINUE = "continue"
|
||||
DISCARD = "discard"
|
||||
RETURN = "return"
|
||||
IN = "in"
|
||||
OUT = "out"
|
||||
INOUT = "inout"
|
||||
UNIFORM = "uniform"
|
||||
CONST = "const"
|
||||
LOWP = "lowp"
|
||||
MEDIUMP = "mediump"
|
||||
HIGHP = "highp"
|
||||
FLAT = "flat"
|
||||
NOPERSPECTIVE = "noperspective"
|
||||
READONLY = "readonly"
|
||||
WRITEONLY = "writeonly"
|
||||
COHERENT = "coherent"
|
||||
VOLATILE = "volatile"
|
||||
RESTRICT = "restrict"
|
||||
BUFFER = "buffer"
|
||||
HASSIDEEFFECTS = "sk_has_side_effects"
|
||||
STRUCT = "struct"
|
||||
LAYOUT = "layout"
|
||||
PRECISION = "precision"
|
||||
IDENTIFIER = [a-zA-Z_$]([0-9]|[a-zA-Z_$])*
|
||||
DIRECTIVE = #[a-zA-Z_$]([0-9]|[a-zA-Z_$])*
|
||||
SECTION = @[a-zA-Z_$]([0-9]|[a-zA-Z_$])*
|
||||
LPAREN = "("
|
||||
RPAREN = ")"
|
||||
LBRACE = "{"
|
||||
RBRACE = "}"
|
||||
LBRACKET = "["
|
||||
RBRACKET = "]"
|
||||
DOT = "."
|
||||
COMMA = ","
|
||||
PLUSPLUS = "++"
|
||||
MINUSMINUS = "--"
|
||||
PLUS = "+"
|
||||
MINUS = "-"
|
||||
STAR = "*"
|
||||
SLASH = "/"
|
||||
PERCENT = "%"
|
||||
SHL = "<<"
|
||||
SHR = ">>"
|
||||
BITWISEOR = "|"
|
||||
BITWISEXOR = "^"
|
||||
BITWISEAND = "&"
|
||||
BITWISENOT = "~"
|
||||
LOGICALOR = "||"
|
||||
LOGICALXOR = "^^"
|
||||
LOGICALAND = "&&"
|
||||
LOGICALNOT = "!"
|
||||
QUESTION = "?"
|
||||
COLON = ":"
|
||||
EQ = "="
|
||||
EQEQ = "=="
|
||||
NEQ = "!="
|
||||
GT = ">"
|
||||
LT = "<"
|
||||
GTEQ = ">="
|
||||
LTEQ = "<="
|
||||
PLUSEQ = "+="
|
||||
MINUSEQ = "-="
|
||||
STAREQ = "*="
|
||||
SLASHEQ = "/="
|
||||
PERCENTEQ = "%="
|
||||
SHLEQ = "<<="
|
||||
SHREQ = ">>="
|
||||
BITWISEOREQ = "|="
|
||||
BITWISEXOREQ = "^="
|
||||
BITWISEANDEQ = "&="
|
||||
LOGICALOREQ = "||="
|
||||
LOGICALXOREQ = "^^="
|
||||
LOGICALANDEQ = "&&="
|
||||
SEMICOLON = ";"
|
||||
ARROW = "->"
|
||||
COLONCOLON = "::"
|
||||
WHITESPACE = \s+
|
||||
LINE_COMMENT = //.*
|
||||
BLOCK_COMMENT = /\*([^*]|\*[^/])*\*/
|
||||
INVALID = .
|
Loading…
Reference in New Issue
Block a user