qt5base-lts/util/lexgen/main.cpp
Qt by Nokia 38be0d1383 Initial import from the monolithic Qt.
This is the beginning of revision history for this module. If you
want to look at revision history older than this, please refer to the
Qt Git wiki for how to use Git history grafting. At the time of
writing, this wiki is located here:

http://qt.gitorious.org/qt/pages/GitIntroductionWithQt

If you have already performed the grafting and you don't see any
history beyond this commit, try running "git log" with the "--follow"
argument.

Branched from the monolithic repo, Qt master branch, at commit
896db169ea224deb96c59ce8af800d019de63f12
2011-04-27 12:05:43 +02:00

324 lines
10 KiB
C++

/****************************************************************************
**
** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
** All rights reserved.
** Contact: Nokia Corporation (qt-info@nokia.com)
**
** This file is part of the utils of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** No Commercial Usage
** This file contains pre-release code and may not be distributed.
** You may use this file in accordance with the terms and conditions
** contained in the Technology Preview License Agreement accompanying
** this package.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights. These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** If you have questions regarding the use of this file, please contact
** Nokia at qt-info@nokia.com.
**
**
**
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "nfa.h"
#include "re2nfa.h"
#include "configfile.h"
#include "generator.h"
#include <QFile>
#include <QCoreApplication>
#include <QFileInfo>
#include <QDateTime>
struct Symbol
{
QString token;
QString lexem;
};
static QList<Symbol> tokenize(const DFA &dfa, const QString &input, Config *cfg, bool *ok = 0)
{
QList<Symbol> symbols;
Symbol lastSymbol;
int state = 0;
int lastAcceptingState = -1;
QString lastAcceptingLexem;
int lastAcceptingPos = -1;
for (int i = 0; i < input.length(); ++i) {
QChar ch = input.at(i);
QChar chForInput = ch;
if (cfg->caseSensitivity == Qt::CaseInsensitive)
chForInput = chForInput.toLower();
int next = dfa.at(state).transitions.value(chForInput.unicode());
if (cfg->debug)
qDebug() << "input" << input.at(i) << "leads to state" << next;
if (next) {
lastSymbol.lexem.append(input.at(i));
lastSymbol.token = dfa.at(next).symbol;
if (!lastSymbol.token.isEmpty()) {
lastAcceptingState = next;
lastAcceptingLexem = lastSymbol.lexem;
lastAcceptingPos = i;
}
state = next;
} else {
if (lastAcceptingState != -1) {
if (cfg->debug)
qDebug() << "adding" << dfa.at(lastAcceptingState).symbol << "and backtracking to" << lastAcceptingPos;
Symbol s;
s.token = dfa.at(lastAcceptingState).symbol;
s.lexem = lastAcceptingLexem;
symbols << s;
lastSymbol = Symbol();
state = 0;
i = lastAcceptingPos;
lastAcceptingPos = -1;
lastAcceptingState = -1;
continue;
}
if (state == 0 || lastSymbol.token.isEmpty()) {
if (cfg->debug)
qDebug() << "invalid input";
if (ok)
*ok = false;
return symbols;
}
if (cfg->debug)
qDebug() << "appending symbol with token" << lastSymbol.token;
symbols << lastSymbol;
lastSymbol = Symbol();
state = 0;
lastAcceptingState = -1;
--i;
}
}
if (!lastSymbol.token.isEmpty()) {
if (cfg->debug)
qDebug() << "appending (last) symbol with token" << lastSymbol.token;
symbols << lastSymbol;
} else if (lastAcceptingState != -1) {
if (cfg->debug)
qDebug() << "appending last accepting state with token" << dfa.at(lastAcceptingState).symbol;
Symbol s;
s.lexem = lastAcceptingLexem;
s.token = dfa.at(lastAcceptingState).symbol;
symbols << s;
}
if (ok)
*ok = true;
return symbols;
}
static QSet<InputType> determineMaxInputSet(const ConfigFile::Section &section)
{
QSet<InputType> set;
QString inputTypeName;
foreach (const ConfigFile::Entry &entry, section)
if (entry.key == QLatin1String("InputType")) {
if (!inputTypeName.isEmpty()) {
qWarning("Error: InputType field specified multiple times in config file");
return QSet<InputType>();
}
inputTypeName = entry.value;
}
if (inputTypeName.isEmpty())
inputTypeName = "quint8";
if (inputTypeName == "quint8") {
for (int i = 1; i < 256; ++i)
set.insert(i);
} /* else if ### */
else {
qWarning("Error: Unknown input type '%s'", qPrintable(inputTypeName));
return QSet<InputType>();
}
return set;
}
static bool loadConfig(const QString &ruleFile, Config *cfg)
{
ConfigFile::SectionMap sections = ConfigFile::parse(ruleFile);
if (sections.isEmpty()) {
qWarning("Error parsing %s", qPrintable(ruleFile));
return false;
}
QSet<InputType> maxInputSet = determineMaxInputSet(sections.value("Options"));
if (maxInputSet.isEmpty())
return false;
Qt::CaseSensitivity cs = Qt::CaseInsensitive;
if (sections.value("Options").contains("case-sensitive"))
cs = Qt::CaseSensitive;
cfg->configSections = sections;
cfg->caseSensitivity = cs;
cfg->className = sections.value("Options").value("classname", "Scanner");
cfg->maxInputSet = maxInputSet;
cfg->ruleFile = ruleFile;
return true;
}
static DFA generateMachine(const Config &cfg)
{
if (cfg.cache) {
QFileInfo ruleInfo(cfg.ruleFile);
QFileInfo cacheInfo(ruleInfo.baseName() + ".dfa");
if (cacheInfo.exists()
&& cacheInfo.lastModified() > ruleInfo.lastModified()) {
QFile f(cacheInfo.absoluteFilePath());
f.open(QIODevice::ReadOnly);
QDataStream stream(&f);
DFA machine;
stream >> machine;
return machine;
}
}
QMap<QString, NFA> macros;
foreach (ConfigFile::Entry e, cfg.configSections.value("Macros")) {
int errCol = 0;
if (cfg.debug)
qDebug() << "parsing" << e.value;
NFA nfa = RE2NFA(macros, cfg.maxInputSet, cfg.caseSensitivity).parse(e.value, &errCol);
if (nfa.isEmpty()) {
qWarning("Parse error in line %d column %d", e.lineNumber, errCol);
return DFA();
}
macros.insert(e.key, nfa);
}
if (!cfg.configSections.contains("Tokens")) {
qWarning("Rule file does not contain a [Tokens] section!");
return DFA();
}
QVector<NFA> tokens;
foreach (ConfigFile::Entry e, cfg.configSections.value("Tokens")) {
int errCol = 0;
if (cfg.debug)
qDebug() << "parsing" << e.value;
NFA tok = RE2NFA(macros, cfg.maxInputSet, cfg.caseSensitivity).parse(e.value, &errCol);
if (tok.isEmpty()) {
qWarning("Parse error in line %d column %d while parsing token %s", e.lineNumber, errCol, e.key.toLocal8Bit().constData());
return DFA();
}
tok.setTerminationSymbol(e.key);
tokens.append(tok);
}
NFA giganticStateMachine;
foreach (NFA nfa, tokens)
if (giganticStateMachine.isEmpty())
giganticStateMachine = nfa;
else
giganticStateMachine = NFA::createAlternatingNFA(giganticStateMachine, nfa);
DFA result = giganticStateMachine.toDFA().minimize();
if (cfg.cache) {
QFileInfo ruleInfo(cfg.ruleFile);
QFileInfo cacheInfo(ruleInfo.baseName() + ".dfa");
QFile f(cacheInfo.absoluteFilePath());
f.open(QIODevice::WriteOnly | QIODevice::Truncate);
QDataStream stream(&f);
stream << result;
}
return result;
}
#if !defined(AUTOTEST)
int main(int argc, char **argv)
{
QCoreApplication app(argc, argv);
QString ruleFile;
Config cfg;
const QStringList arguments = app.arguments().mid(1);
cfg.debug = arguments.contains("-debug");
const bool testRules = arguments.contains("-test");
cfg.cache = arguments.contains("-cache");
foreach (const QString &arg, arguments)
if (!arg.startsWith(QLatin1Char('-'))) {
ruleFile = arg;
break;
}
if (ruleFile.isEmpty()) {
qWarning("usage: lexgen [-test rulefile");
qWarning(" ");
qWarning(" the -test option will cause lexgen to interpret standard input");
qWarning(" according to the specified rules and print out pairs of token and");
qWarning(" lexical element");
return 1;
}
if (!loadConfig(ruleFile, &cfg))
return 1;
DFA machine = generateMachine(cfg);
if (machine.isEmpty())
return 1;
if (testRules) {
qWarning("Testing:");
QString input = QTextStream(stdin).readAll();
/*
qDebug() << "NFA has" << machine.stateCount() << "states";
qDebug() << "Converting to DFA... (this may take a while)";
DFA dfa = machine.toDFA();
qDebug() << "DFA has" << dfa.count() << "states";
qDebug() << "Minimizing...";
dfa = dfa.minimize();
qDebug() << "Minimized DFA has" << dfa.count() << "states";
*/
DFA dfa = machine;
if (cfg.debug)
qDebug() << "tokenizing" << input;
bool ok = false;
QList<Symbol> symbols = tokenize(dfa, input, &cfg, &ok);
if (symbols.isEmpty()) {
qWarning("No tokens produced!");
} else {
foreach (Symbol s, symbols)
qDebug() << s.token << ":" << s.lexem;
}
if (ok)
qDebug() << symbols.count() << "tokens produced.";
else
qDebug() << "Error while tokenizing!";
} else {
Generator gen(machine, cfg);
QTextStream(stdout)
<< gen.generate();
}
return 0;
}
#endif