cmake/Source/LexerParser/cmListFileLexer.in.l

561 lines
14 KiB
Plaintext
Raw Normal View History

%{
2016-10-30 18:24:19 +01:00
/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
file Copyright.txt or https://cmake.org/licensing for details. */
/*
This file must be translated to C and modified to build everywhere.
2016-07-09 11:21:54 +02:00
Run flex >= 2.6 like this:
2018-01-26 17:06:56 +01:00
flex --nounistd -DFLEXINT_H --noline -ocmListFileLexer.c cmListFileLexer.in.l
Modify cmListFileLexer.c:
2018-01-26 17:06:56 +01:00
- remove trailing whitespace: sed -i 's/\s*$//' cmListFileLexer.c
- remove blank lines at end of file: sed -i '${/^$/d;}' cmListFileLexer.c
- #include "cmStandardLexer.h" at the top: sed -i '1i#include "cmStandardLexer.h"' cmListFileLexer.c
*/
2017-04-14 19:02:05 +02:00
/* IWYU pragma: no_forward_declare yyguts_t */
2019-11-11 23:01:05 +01:00
#ifdef _WIN32
2017-07-20 19:35:53 +02:00
#include "cmsys/Encoding.h"
2014-08-03 19:52:23 +02:00
#endif
/* Setup the proper cmListFileLexer_yylex declaration. */
#define YY_EXTRA_TYPE cmListFileLexer*
#define YY_DECL int cmListFileLexer_yylex (yyscan_t yyscanner, cmListFileLexer* lexer)
#include "cmListFileLexer.h"
/*--------------------------------------------------------------------------*/
struct cmListFileLexer_s
{
cmListFileLexer_Token token;
2017-04-14 19:02:05 +02:00
int bracket;
2014-08-03 19:52:23 +02:00
int comment;
int line;
int column;
int size;
FILE* file;
2014-08-03 19:52:23 +02:00
size_t cr;
char* string_buffer;
char* string_position;
int string_left;
yyscan_t scanner;
};
static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
int length);
static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
int length);
static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
size_t bufferSize);
static void cmListFileLexerInit(cmListFileLexer* lexer);
static void cmListFileLexerDestroy(cmListFileLexer* lexer);
/* Replace the lexer input function. */
#undef YY_INPUT
#define YY_INPUT(buf, result, max_size) \
2019-11-11 23:01:05 +01:00
do { result = cmListFileLexerInput(cmListFileLexer_yyget_extra(yyscanner), buf, max_size); } while (0)
/*--------------------------------------------------------------------------*/
%}
2018-01-26 17:06:56 +01:00
%option prefix="cmListFileLexer_yy"
%option reentrant
%option yylineno
%option noyywrap
%pointer
%x STRING
2014-08-03 19:52:23 +02:00
%x BRACKET
%x BRACKETEND
%x COMMENT
MAKEVAR \$\([A-Za-z0-9_]*\)
2018-08-09 18:06:22 +02:00
UNQUOTED ([^ \0\t\r\n\(\)#\\\"[=]|\\[^\0\n])
2014-08-03 19:52:23 +02:00
LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t[=])*\"
%%
2014-08-03 19:52:23 +02:00
<INITIAL,COMMENT>\n {
lexer->token.type = cmListFileLexer_Token_Newline;
cmListFileLexerSetToken(lexer, yytext, yyleng);
++lexer->line;
lexer->column = 1;
2014-08-03 19:52:23 +02:00
BEGIN(INITIAL);
return 1;
}
2014-08-03 19:52:23 +02:00
#?\[=*\[\n? {
const char* bracket = yytext;
lexer->comment = yytext[0] == '#';
2016-07-09 11:21:54 +02:00
if (lexer->comment) {
2014-08-03 19:52:23 +02:00
lexer->token.type = cmListFileLexer_Token_CommentBracket;
bracket += 1;
2016-07-09 11:21:54 +02:00
} else {
2014-08-03 19:52:23 +02:00
lexer->token.type = cmListFileLexer_Token_ArgumentBracket;
2016-07-09 11:21:54 +02:00
}
2014-08-03 19:52:23 +02:00
cmListFileLexerSetToken(lexer, "", 0);
2016-07-09 11:21:54 +02:00
lexer->bracket = strchr(bracket+1, '[') - bracket;
if (yytext[yyleng-1] == '\n') {
2014-08-03 19:52:23 +02:00
++lexer->line;
lexer->column = 1;
2016-07-09 11:21:54 +02:00
} else {
2014-08-03 19:52:23 +02:00
lexer->column += yyleng;
2016-07-09 11:21:54 +02:00
}
2014-08-03 19:52:23 +02:00
BEGIN(BRACKET);
}
# {
lexer->column += yyleng;
BEGIN(COMMENT);
}
2018-01-26 17:06:56 +01:00
<COMMENT>[^\0\n]* {
lexer->column += yyleng;
}
\( {
lexer->token.type = cmListFileLexer_Token_ParenLeft;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
\) {
lexer->token.type = cmListFileLexer_Token_ParenRight;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
2014-08-03 19:52:23 +02:00
[A-Za-z_][A-Za-z0-9_]* {
lexer->token.type = cmListFileLexer_Token_Identifier;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
2014-08-03 19:52:23 +02:00
<BRACKET>\]=* {
/* Handle ]]====]=======]*/
cmListFileLexerAppend(lexer, yytext, yyleng);
lexer->column += yyleng;
2016-07-09 11:21:54 +02:00
if (yyleng == lexer->bracket) {
2014-08-03 19:52:23 +02:00
BEGIN(BRACKETEND);
2016-07-09 11:21:54 +02:00
}
2014-08-03 19:52:23 +02:00
}
<BRACKETEND>\] {
lexer->column += yyleng;
/* Erase the partial bracket from the token. */
lexer->token.length -= lexer->bracket;
lexer->token.text[lexer->token.length] = 0;
BEGIN(INITIAL);
return 1;
}
2018-08-09 18:06:22 +02:00
<BRACKET>([^]\0\n])+ {
2014-08-03 19:52:23 +02:00
cmListFileLexerAppend(lexer, yytext, yyleng);
lexer->column += yyleng;
}
<BRACKET,BRACKETEND>\n {
cmListFileLexerAppend(lexer, yytext, yyleng);
++lexer->line;
lexer->column = 1;
BEGIN(BRACKET);
}
2018-01-26 17:06:56 +01:00
<BRACKET,BRACKETEND>[^\0\n] {
2014-08-03 19:52:23 +02:00
cmListFileLexerAppend(lexer, yytext, yyleng);
lexer->column += yyleng;
BEGIN(BRACKET);
}
<BRACKET,BRACKETEND><<EOF>> {
lexer->token.type = cmListFileLexer_Token_BadBracket;
BEGIN(INITIAL);
return 1;
}
({UNQUOTED}|=|\[=*{UNQUOTED})({UNQUOTED}|[[=])* {
2013-11-03 12:27:13 +02:00
lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
2014-08-03 19:52:23 +02:00
({MAKEVAR}|{UNQUOTED}|=|\[=*{LEGACY})({LEGACY}|[[=])* {
lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
2014-09-13 15:30:52 +02:00
\[ {
lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
\" {
lexer->token.type = cmListFileLexer_Token_ArgumentQuoted;
cmListFileLexerSetToken(lexer, "", 0);
lexer->column += yyleng;
BEGIN(STRING);
}
2018-08-09 18:06:22 +02:00
<STRING>([^\\\0\n\"]|\\[^\0\n])+ {
cmListFileLexerAppend(lexer, yytext, yyleng);
lexer->column += yyleng;
}
2013-11-03 12:27:13 +02:00
<STRING>\\\n {
2014-08-03 19:52:23 +02:00
/* Continuation: text is not part of string */
2013-11-03 12:27:13 +02:00
++lexer->line;
lexer->column = 1;
}
<STRING>\n {
cmListFileLexerAppend(lexer, yytext, yyleng);
++lexer->line;
lexer->column = 1;
}
<STRING>\" {
lexer->column += yyleng;
BEGIN(INITIAL);
return 1;
}
2018-01-26 17:06:56 +01:00
<STRING>[^\0\n] {
cmListFileLexerAppend(lexer, yytext, yyleng);
lexer->column += yyleng;
}
<STRING><<EOF>> {
lexer->token.type = cmListFileLexer_Token_BadString;
BEGIN(INITIAL);
return 1;
}
2013-11-03 12:27:13 +02:00
[ \t\r]+ {
lexer->token.type = cmListFileLexer_Token_Space;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
2013-11-03 12:27:13 +02:00
return 1;
}
. {
lexer->token.type = cmListFileLexer_Token_BadCharacter;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
<<EOF>> {
lexer->token.type = cmListFileLexer_Token_None;
cmListFileLexerSetToken(lexer, 0, 0);
return 0;
}
%%
/*--------------------------------------------------------------------------*/
static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
int length)
{
/* Set the token line and column number. */
lexer->token.line = lexer->line;
lexer->token.column = lexer->column;
/* Use the same buffer if possible. */
2016-07-09 11:21:54 +02:00
if (lexer->token.text) {
if (text && length < lexer->size) {
strcpy(lexer->token.text, text);
lexer->token.length = length;
return;
2016-07-09 11:21:54 +02:00
}
free(lexer->token.text);
lexer->token.text = 0;
lexer->size = 0;
2016-07-09 11:21:54 +02:00
}
/* Need to extend the buffer. */
2016-07-09 11:21:54 +02:00
if (text) {
lexer->token.text = strdup(text);
lexer->token.length = length;
2016-07-09 11:21:54 +02:00
lexer->size = length + 1;
} else {
lexer->token.length = 0;
2016-07-09 11:21:54 +02:00
}
}
/*--------------------------------------------------------------------------*/
static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
int length)
{
char* temp;
int newSize;
/* If the appended text will fit in the buffer, do not reallocate. */
newSize = lexer->token.length + length + 1;
2016-07-09 11:21:54 +02:00
if (lexer->token.text && newSize <= lexer->size) {
strcpy(lexer->token.text + lexer->token.length, text);
lexer->token.length += length;
return;
2016-07-09 11:21:54 +02:00
}
/* We need to extend the buffer. */
temp = malloc(newSize);
2016-07-09 11:21:54 +02:00
if (lexer->token.text) {
memcpy(temp, lexer->token.text, lexer->token.length);
free(lexer->token.text);
2016-07-09 11:21:54 +02:00
}
memcpy(temp + lexer->token.length, text, length);
temp[lexer->token.length + length] = 0;
lexer->token.text = temp;
lexer->token.length += length;
lexer->size = newSize;
}
/*--------------------------------------------------------------------------*/
static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
size_t bufferSize)
{
2016-07-09 11:21:54 +02:00
if (lexer) {
if (lexer->file) {
2014-08-03 19:52:23 +02:00
/* Convert CRLF -> LF explicitly. The C FILE "t"ext mode
does not convert newlines on all platforms. Move any
trailing CR to the start of the buffer for the next read. */
size_t cr = lexer->cr;
size_t n;
buffer[0] = '\r';
2016-07-09 11:21:54 +02:00
n = fread(buffer + cr, 1, bufferSize - cr, lexer->file);
if (n) {
2014-08-03 19:52:23 +02:00
char* o = buffer;
const char* i = buffer;
const char* e;
n += cr;
2016-07-09 11:21:54 +02:00
cr = (buffer[n - 1] == '\r') ? 1 : 0;
2014-08-03 19:52:23 +02:00
e = buffer + n - cr;
2016-07-09 11:21:54 +02:00
while (i != e) {
if (i[0] == '\r' && i[1] == '\n') {
2014-08-03 19:52:23 +02:00
++i;
}
2016-07-09 11:21:54 +02:00
*o++ = *i++;
2014-08-03 19:52:23 +02:00
}
2016-07-09 11:21:54 +02:00
n = o - buffer;
} else {
2014-08-03 19:52:23 +02:00
n = cr;
cr = 0;
2016-07-09 11:21:54 +02:00
}
2014-08-03 19:52:23 +02:00
lexer->cr = cr;
return n;
2016-07-09 11:21:54 +02:00
} else if (lexer->string_left) {
int length = lexer->string_left;
2016-07-09 11:21:54 +02:00
if ((int)bufferSize < length) {
length = (int)bufferSize;
}
memcpy(buffer, lexer->string_position, length);
lexer->string_position += length;
lexer->string_left -= length;
return length;
}
2016-07-09 11:21:54 +02:00
}
return 0;
}
/*--------------------------------------------------------------------------*/
static void cmListFileLexerInit(cmListFileLexer* lexer)
{
2016-07-09 11:21:54 +02:00
if (lexer->file || lexer->string_buffer) {
cmListFileLexer_yylex_init(&lexer->scanner);
cmListFileLexer_yyset_extra(lexer, lexer->scanner);
2016-07-09 11:21:54 +02:00
}
}
/*--------------------------------------------------------------------------*/
static void cmListFileLexerDestroy(cmListFileLexer* lexer)
{
2014-08-03 19:52:23 +02:00
cmListFileLexerSetToken(lexer, 0, 0);
2016-07-09 11:21:54 +02:00
if (lexer->file || lexer->string_buffer) {
cmListFileLexer_yylex_destroy(lexer->scanner);
2016-07-09 11:21:54 +02:00
if (lexer->file) {
fclose(lexer->file);
lexer->file = 0;
2016-07-09 11:21:54 +02:00
}
if (lexer->string_buffer) {
free(lexer->string_buffer);
lexer->string_buffer = 0;
lexer->string_left = 0;
lexer->string_position = 0;
}
2016-07-09 11:21:54 +02:00
}
}
/*--------------------------------------------------------------------------*/
2017-04-14 19:02:05 +02:00
cmListFileLexer* cmListFileLexer_New(void)
{
cmListFileLexer* lexer = (cmListFileLexer*)malloc(sizeof(cmListFileLexer));
2016-07-09 11:21:54 +02:00
if (!lexer) {
return 0;
2016-07-09 11:21:54 +02:00
}
memset(lexer, 0, sizeof(*lexer));
lexer->line = 1;
lexer->column = 1;
return lexer;
}
/*--------------------------------------------------------------------------*/
void cmListFileLexer_Delete(cmListFileLexer* lexer)
{
2014-08-03 19:52:23 +02:00
cmListFileLexer_SetFileName(lexer, 0, 0);
free(lexer);
}
/*--------------------------------------------------------------------------*/
2014-08-03 19:52:23 +02:00
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
{
unsigned char b[2];
2016-07-09 11:21:54 +02:00
if (fread(b, 1, 2, f) == 2) {
if (b[0] == 0xEF && b[1] == 0xBB) {
if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
2014-08-03 19:52:23 +02:00
return cmListFileLexer_BOM_UTF8;
}
2016-07-09 11:21:54 +02:00
} else if (b[0] == 0xFE && b[1] == 0xFF) {
2014-08-03 19:52:23 +02:00
/* UTF-16 BE */
return cmListFileLexer_BOM_UTF16BE;
2016-07-09 11:21:54 +02:00
} else if (b[0] == 0 && b[1] == 0) {
if (fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF) {
2014-08-03 19:52:23 +02:00
return cmListFileLexer_BOM_UTF32BE;
}
2016-07-09 11:21:54 +02:00
} else if (b[0] == 0xFF && b[1] == 0xFE) {
2014-08-03 19:52:23 +02:00
fpos_t p;
fgetpos(f, &p);
2016-07-09 11:21:54 +02:00
if (fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) {
2014-08-03 19:52:23 +02:00
return cmListFileLexer_BOM_UTF32LE;
2016-07-09 11:21:54 +02:00
}
2018-01-26 17:06:56 +01:00
if (fsetpos(f, &p) != 0) {
return cmListFileLexer_BOM_Broken;
}
2014-08-03 19:52:23 +02:00
return cmListFileLexer_BOM_UTF16LE;
}
2016-07-09 11:21:54 +02:00
}
2018-01-26 17:06:56 +01:00
if (fseek(f, 0, SEEK_SET) != 0) {
return cmListFileLexer_BOM_Broken;
}
2014-08-03 19:52:23 +02:00
return cmListFileLexer_BOM_None;
}
/*--------------------------------------------------------------------------*/
int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
cmListFileLexer_BOM* bom)
{
int result = 1;
cmListFileLexerDestroy(lexer);
2016-07-09 11:21:54 +02:00
if (name) {
2014-08-03 19:52:23 +02:00
#ifdef _WIN32
wchar_t* wname = cmsysEncoding_DupToWide(name);
lexer->file = _wfopen(wname, L"rb");
free(wname);
#else
lexer->file = fopen(name, "rb");
#endif
2016-07-09 11:21:54 +02:00
if (lexer->file) {
if (bom) {
2014-08-03 19:52:23 +02:00
*bom = cmListFileLexer_ReadBOM(lexer->file);
}
2016-07-09 11:21:54 +02:00
} else {
result = 0;
}
2016-07-09 11:21:54 +02:00
}
cmListFileLexerInit(lexer);
return result;
}
/*--------------------------------------------------------------------------*/
int cmListFileLexer_SetString(cmListFileLexer* lexer, const char* text)
{
int result = 1;
cmListFileLexerDestroy(lexer);
2016-07-09 11:21:54 +02:00
if (text) {
int length = (int)strlen(text);
2016-07-09 11:21:54 +02:00
lexer->string_buffer = (char*)malloc(length + 1);
if (lexer->string_buffer) {
strcpy(lexer->string_buffer, text);
lexer->string_position = lexer->string_buffer;
lexer->string_left = length;
2016-07-09 11:21:54 +02:00
} else {
result = 0;
}
2016-07-09 11:21:54 +02:00
}
cmListFileLexerInit(lexer);
return result;
}
/*--------------------------------------------------------------------------*/
cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer)
{
2020-08-30 11:54:41 +02:00
if (!lexer->file && !lexer->string_buffer) {
return 0;
2016-07-09 11:21:54 +02:00
}
if (cmListFileLexer_yylex(lexer->scanner, lexer)) {
return &lexer->token;
2016-07-09 11:21:54 +02:00
} else {
2014-08-03 19:52:23 +02:00
cmListFileLexer_SetFileName(lexer, 0, 0);
return 0;
2016-07-09 11:21:54 +02:00
}
}
/*--------------------------------------------------------------------------*/
long cmListFileLexer_GetCurrentLine(cmListFileLexer* lexer)
{
2020-08-30 11:54:41 +02:00
return lexer->line;
}
/*--------------------------------------------------------------------------*/
long cmListFileLexer_GetCurrentColumn(cmListFileLexer* lexer)
{
2020-08-30 11:54:41 +02:00
return lexer->column;
}
/*--------------------------------------------------------------------------*/
const char* cmListFileLexer_GetTypeAsString(cmListFileLexer* lexer,
cmListFileLexer_Type type)
{
(void)lexer;
2016-07-09 11:21:54 +02:00
switch (type) {
case cmListFileLexer_Token_None:
return "nothing";
case cmListFileLexer_Token_Space:
return "space";
case cmListFileLexer_Token_Newline:
return "newline";
case cmListFileLexer_Token_Identifier:
return "identifier";
case cmListFileLexer_Token_ParenLeft:
return "left paren";
case cmListFileLexer_Token_ParenRight:
return "right paren";
case cmListFileLexer_Token_ArgumentUnquoted:
return "unquoted argument";
case cmListFileLexer_Token_ArgumentQuoted:
return "quoted argument";
case cmListFileLexer_Token_ArgumentBracket:
return "bracket argument";
case cmListFileLexer_Token_CommentBracket:
return "bracket comment";
case cmListFileLexer_Token_BadCharacter:
return "bad character";
case cmListFileLexer_Token_BadBracket:
return "unterminated bracket";
case cmListFileLexer_Token_BadString:
return "unterminated string";
}
return "unknown token";
}