2019-11-23 20:27:39 +08:00
|
|
|
/****************************************************************************
|
|
|
|
Copyright (C) 2013 Henry van Merode. All rights reserved.
|
|
|
|
Copyright (c) 2015-2016 Chukong Technologies Inc.
|
|
|
|
Copyright (c) 2017-2018 Xiamen Yaji Software Co., Ltd.
|
2021-12-25 10:04:45 +08:00
|
|
|
|
2022-10-01 16:24:52 +08:00
|
|
|
https://axmolengine.github.io/
|
2021-12-25 10:04:45 +08:00
|
|
|
|
2019-11-23 20:27:39 +08:00
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
furnished to do so, subject to the following conditions:
|
2021-12-25 10:04:45 +08:00
|
|
|
|
2019-11-23 20:27:39 +08:00
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
|
|
all copies or substantial portions of the Software.
|
2021-12-25 10:04:45 +08:00
|
|
|
|
2019-11-23 20:27:39 +08:00
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
THE SOFTWARE.
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
#include "CCPUScriptLexer.h"
|
|
|
|
|
2022-07-11 17:50:21 +08:00
|
|
|
NS_AX_BEGIN
|
2021-12-25 10:04:45 +08:00
|
|
|
PUScriptLexer::PUScriptLexer() {}
|
2019-11-23 20:27:39 +08:00
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
PUScriptLexer::~PUScriptLexer() {}
|
2019-11-23 20:27:39 +08:00
|
|
|
|
2021-12-31 12:12:40 +08:00
|
|
|
void PUScriptLexer::openLexer(std::string_view str, std::string_view source, PUScriptTokenList& tokens)
|
2019-11-23 20:27:39 +08:00
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
enum
|
|
|
|
{
|
|
|
|
READY = 0,
|
|
|
|
COMMENT,
|
|
|
|
MULTICOMMENT,
|
|
|
|
WORD,
|
|
|
|
QUOTE,
|
|
|
|
VAR,
|
|
|
|
POSSIBLECOMMENT
|
|
|
|
};
|
2019-11-23 20:27:39 +08:00
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
const wchar_t varopener = '$', quote = '\"', slash = '/', backslash = '\\', openbrace = '{', closebrace = '}',
|
|
|
|
colon = ':', star = '*', cr = '\r', lf = '\n';
|
2019-11-23 20:27:39 +08:00
|
|
|
char c = 0, lastc = 0;
|
2021-12-25 10:04:45 +08:00
|
|
|
|
2019-11-23 20:27:39 +08:00
|
|
|
std::string lexeme;
|
|
|
|
unsigned int line = 1, state = READY, lastQuote = 0;
|
2021-12-25 10:04:45 +08:00
|
|
|
|
|
|
|
// ScriptTokenListPtr tokens(OGRE_NEW_T(ScriptTokenList, MEMCATEGORY_GENERAL)(), SPFM_DELETE_T);
|
|
|
|
//
|
2019-11-23 20:27:39 +08:00
|
|
|
// Iterate over the input
|
|
|
|
|
2021-12-31 12:12:40 +08:00
|
|
|
std::string_view::const_iterator i = str.begin(), end = str.end();
|
2021-12-25 10:04:45 +08:00
|
|
|
while (i != end)
|
2019-11-23 20:27:39 +08:00
|
|
|
{
|
|
|
|
lastc = c;
|
2021-12-25 10:04:45 +08:00
|
|
|
c = *i;
|
|
|
|
|
|
|
|
if (c == quote)
|
2019-11-23 20:27:39 +08:00
|
|
|
lastQuote = line;
|
2021-12-25 10:04:45 +08:00
|
|
|
|
|
|
|
switch (state)
|
2019-11-23 20:27:39 +08:00
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
case READY:
|
|
|
|
if (c == slash && lastc == slash)
|
|
|
|
{
|
|
|
|
// Comment start, clear out the lexeme
|
|
|
|
lexeme = "";
|
|
|
|
state = COMMENT;
|
|
|
|
}
|
|
|
|
else if (c == star && lastc == slash)
|
|
|
|
{
|
|
|
|
lexeme = "";
|
|
|
|
state = MULTICOMMENT;
|
|
|
|
}
|
|
|
|
else if (c == quote)
|
|
|
|
{
|
|
|
|
// Clear out the lexeme ready to be filled with quotes!
|
|
|
|
lexeme = c;
|
|
|
|
state = QUOTE;
|
|
|
|
}
|
|
|
|
else if (c == varopener)
|
|
|
|
{
|
|
|
|
// Set up to read in a variable
|
|
|
|
lexeme = c;
|
|
|
|
state = VAR;
|
|
|
|
}
|
|
|
|
else if (isNewline(c))
|
|
|
|
{
|
|
|
|
lexeme = c;
|
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
}
|
|
|
|
else if (!isWhitespace(c))
|
|
|
|
{
|
|
|
|
lexeme = c;
|
|
|
|
if (c == slash)
|
|
|
|
state = POSSIBLECOMMENT;
|
2019-11-23 20:27:39 +08:00
|
|
|
else
|
|
|
|
state = WORD;
|
2021-12-25 10:04:45 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case COMMENT:
|
|
|
|
// This newline happens to be ignored automatically
|
|
|
|
if (isNewline(c))
|
|
|
|
state = READY;
|
|
|
|
break;
|
|
|
|
case MULTICOMMENT:
|
|
|
|
if (c == slash && lastc == star)
|
|
|
|
state = READY;
|
|
|
|
break;
|
|
|
|
case POSSIBLECOMMENT:
|
|
|
|
if (c == slash && lastc == slash)
|
|
|
|
{
|
|
|
|
lexeme = "";
|
|
|
|
state = COMMENT;
|
2019-11-23 20:27:39 +08:00
|
|
|
break;
|
2021-12-25 10:04:45 +08:00
|
|
|
}
|
|
|
|
else if (c == star && lastc == slash)
|
|
|
|
{
|
|
|
|
lexeme = "";
|
|
|
|
state = MULTICOMMENT;
|
2019-11-23 20:27:39 +08:00
|
|
|
break;
|
2021-12-25 10:04:45 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
state = WORD;
|
|
|
|
}
|
|
|
|
case WORD:
|
|
|
|
if (isNewline(c))
|
|
|
|
{
|
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
lexeme = c;
|
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
state = READY;
|
|
|
|
}
|
|
|
|
else if (isWhitespace(c))
|
|
|
|
{
|
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
state = READY;
|
|
|
|
}
|
|
|
|
else if (c == openbrace || c == closebrace || c == colon)
|
|
|
|
{
|
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
lexeme = c;
|
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
state = READY;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
lexeme += c;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case QUOTE:
|
|
|
|
if (c != backslash)
|
|
|
|
{
|
|
|
|
// Allow embedded quotes with escaping
|
|
|
|
if (c == quote && lastc == backslash)
|
2019-11-23 20:27:39 +08:00
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
lexeme += c;
|
2019-11-23 20:27:39 +08:00
|
|
|
}
|
2021-12-25 10:04:45 +08:00
|
|
|
else if (c == quote)
|
2019-11-23 20:27:39 +08:00
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
lexeme += c;
|
2019-11-23 20:27:39 +08:00
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
state = READY;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
// Backtrack here and allow a backslash normally within the quote
|
|
|
|
if (lastc == backslash)
|
|
|
|
lexeme = lexeme + "\\" + c;
|
|
|
|
else
|
|
|
|
lexeme += c;
|
2019-11-23 20:27:39 +08:00
|
|
|
}
|
2021-12-25 10:04:45 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case VAR:
|
|
|
|
if (isNewline(c))
|
|
|
|
{
|
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
lexeme = c;
|
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
state = READY;
|
|
|
|
}
|
|
|
|
else if (isWhitespace(c))
|
|
|
|
{
|
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
state = READY;
|
|
|
|
}
|
|
|
|
else if (c == openbrace || c == closebrace || c == colon)
|
|
|
|
{
|
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
lexeme = c;
|
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
state = READY;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
lexeme += c;
|
|
|
|
}
|
|
|
|
break;
|
2019-11-23 20:27:39 +08:00
|
|
|
}
|
2021-12-25 10:04:45 +08:00
|
|
|
|
2019-11-23 20:27:39 +08:00
|
|
|
// Separate check for newlines just to track line numbers
|
2021-12-25 10:04:45 +08:00
|
|
|
if (c == cr || (c == lf && lastc != cr))
|
2019-11-23 20:27:39 +08:00
|
|
|
line++;
|
2021-12-25 10:04:45 +08:00
|
|
|
|
2019-11-23 20:27:39 +08:00
|
|
|
++i;
|
|
|
|
}
|
2021-12-25 10:04:45 +08:00
|
|
|
|
2019-11-23 20:27:39 +08:00
|
|
|
// Check for valid exit states
|
2021-12-25 10:04:45 +08:00
|
|
|
if (state == WORD || state == VAR)
|
2019-11-23 20:27:39 +08:00
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!lexeme.empty())
|
2019-11-23 20:27:39 +08:00
|
|
|
setToken(lexeme, line, source, &tokens);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
if (state == QUOTE)
|
2019-11-23 20:27:39 +08:00
|
|
|
{
|
|
|
|
printf("Exception\n");
|
2021-12-25 10:04:45 +08:00
|
|
|
|
|
|
|
// OGRE_EXCEPT(Exception::ERR_INVALID_STATE,
|
|
|
|
// Ogre::String("no matching \" found for \" at line ") +
|
|
|
|
// Ogre::StringConverter::toString(lastQuote),
|
|
|
|
// "ScriptLexer::tokenize");
|
2019-11-23 20:27:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-31 12:12:40 +08:00
|
|
|
void PUScriptLexer::setToken(std::string_view lexeme, int line, std::string_view source, PUScriptTokenList* tokens)
|
2021-12-25 10:04:45 +08:00
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
const char openBracket = '{', closeBracket = '}', colon = ':', quote = '\"', var = '$';
|
2019-11-23 20:27:39 +08:00
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
PUScriptToken* token = new PUScriptToken;
|
2019-11-23 20:27:39 +08:00
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
token->lexeme = lexeme;
|
|
|
|
token->line = line;
|
|
|
|
token->file = source;
|
|
|
|
bool ignore = false;
|
2019-11-23 20:27:39 +08:00
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
// Check the user token map first
|
|
|
|
if (lexeme.size() == 1 && isNewline(lexeme[0]))
|
|
|
|
{
|
|
|
|
token->type = TID_NEWLINE;
|
|
|
|
if (!tokens->empty() && tokens->back()->type == TID_NEWLINE)
|
2019-11-23 20:27:39 +08:00
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
ignore = true;
|
|
|
|
delete token;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (lexeme.size() == 1 && lexeme[0] == openBracket)
|
|
|
|
token->type = TID_LBRACKET;
|
|
|
|
else if (lexeme.size() == 1 && lexeme[0] == closeBracket)
|
|
|
|
token->type = TID_RBRACKET;
|
|
|
|
else if (lexeme.size() == 1 && lexeme[0] == colon)
|
|
|
|
token->type = TID_COLON;
|
|
|
|
else if (lexeme[0] == var)
|
|
|
|
token->type = TID_VARIABLE;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// This is either a non-zero length phrase or quoted phrase
|
|
|
|
if (lexeme.size() >= 2 && lexeme[0] == quote && lexeme[lexeme.size() - 1] == quote)
|
|
|
|
{
|
|
|
|
token->type = TID_QUOTE;
|
2019-11-23 20:27:39 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
token->type = TID_WORD;
|
2019-11-23 20:27:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!ignore)
|
2022-08-09 09:54:53 +08:00
|
|
|
tokens->emplace_back(token);
|
2021-12-25 10:04:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool PUScriptLexer::isWhitespace(char c) const
|
|
|
|
{
|
|
|
|
return c == ' ' || c == '\r' || c == '\t';
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PUScriptLexer::isNewline(char c) const
|
|
|
|
{
|
|
|
|
|
|
|
|
return c == '\n' || c == '\r';
|
|
|
|
}
|
2019-11-23 20:27:39 +08:00
|
|
|
|
2022-07-11 17:50:21 +08:00
|
|
|
NS_AX_END
|