You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
561 lines
14 KiB
561 lines
14 KiB
%{
|
|
/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
|
file Copyright.txt or https://cmake.org/licensing for details. */
|
|
/*
|
|
|
|
This file must be translated to C and modified to build everywhere.
|
|
|
|
Run flex >= 2.6 like this:
|
|
|
|
flex --nounistd -DFLEXINT_H --noline -ocmListFileLexer.c cmListFileLexer.in.l
|
|
|
|
Modify cmListFileLexer.c:
|
|
- remove trailing whitespace: sed -i 's/\s*$//' cmListFileLexer.c
|
|
- remove blank lines at end of file: sed -i '${/^$/d;}' cmListFileLexer.c
|
|
- #include "cmStandardLexer.h" at the top: sed -i '1i#include "cmStandardLexer.h"' cmListFileLexer.c
|
|
|
|
*/
|
|
|
|
/* IWYU pragma: no_forward_declare yyguts_t */
|
|
|
|
#ifdef _WIN32
|
|
#include "cmsys/Encoding.h"
|
|
#endif
|
|
|
|
/* Setup the proper cmListFileLexer_yylex declaration. */
|
|
#define YY_EXTRA_TYPE cmListFileLexer*
|
|
#define YY_DECL int cmListFileLexer_yylex (yyscan_t yyscanner, cmListFileLexer* lexer)
|
|
|
|
#include "cmListFileLexer.h"
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
struct cmListFileLexer_s
|
|
{
|
|
cmListFileLexer_Token token;
|
|
int bracket;
|
|
int comment;
|
|
int line;
|
|
int column;
|
|
int size;
|
|
FILE* file;
|
|
size_t cr;
|
|
char* string_buffer;
|
|
char* string_position;
|
|
int string_left;
|
|
yyscan_t scanner;
|
|
};
|
|
|
|
static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
|
|
int length);
|
|
static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
|
|
int length);
|
|
static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
|
|
size_t bufferSize);
|
|
static void cmListFileLexerInit(cmListFileLexer* lexer);
|
|
static void cmListFileLexerDestroy(cmListFileLexer* lexer);
|
|
|
|
/* Replace the lexer input function. */
|
|
#undef YY_INPUT
|
|
#define YY_INPUT(buf, result, max_size) \
|
|
do { result = cmListFileLexerInput(cmListFileLexer_yyget_extra(yyscanner), buf, max_size); } while (0)
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
%}
|
|
|
|
%option prefix="cmListFileLexer_yy"
|
|
|
|
%option reentrant
|
|
%option yylineno
|
|
%option noyywrap
|
|
%pointer
|
|
%x STRING
|
|
%x BRACKET
|
|
%x BRACKETEND
|
|
%x COMMENT
|
|
|
|
MAKEVAR \$\([A-Za-z0-9_]*\)
|
|
UNQUOTED ([^ \0\t\r\n\(\)#\\\"[=]|\\[^\0\n])
|
|
LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t[=])*\"
|
|
|
|
%%
|
|
|
|
<INITIAL,COMMENT>\n {
|
|
lexer->token.type = cmListFileLexer_Token_Newline;
|
|
cmListFileLexerSetToken(lexer, yytext, yyleng);
|
|
++lexer->line;
|
|
lexer->column = 1;
|
|
BEGIN(INITIAL);
|
|
return 1;
|
|
}
|
|
|
|
#?\[=*\[\n? {
|
|
const char* bracket = yytext;
|
|
lexer->comment = yytext[0] == '#';
|
|
if (lexer->comment) {
|
|
lexer->token.type = cmListFileLexer_Token_CommentBracket;
|
|
bracket += 1;
|
|
} else {
|
|
lexer->token.type = cmListFileLexer_Token_ArgumentBracket;
|
|
}
|
|
cmListFileLexerSetToken(lexer, "", 0);
|
|
lexer->bracket = strchr(bracket+1, '[') - bracket;
|
|
if (yytext[yyleng-1] == '\n') {
|
|
++lexer->line;
|
|
lexer->column = 1;
|
|
} else {
|
|
lexer->column += yyleng;
|
|
}
|
|
BEGIN(BRACKET);
|
|
}
|
|
|
|
# {
|
|
lexer->column += yyleng;
|
|
BEGIN(COMMENT);
|
|
}
|
|
|
|
<COMMENT>[^\0\n]* {
|
|
lexer->column += yyleng;
|
|
}
|
|
|
|
\( {
|
|
lexer->token.type = cmListFileLexer_Token_ParenLeft;
|
|
cmListFileLexerSetToken(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
return 1;
|
|
}
|
|
|
|
\) {
|
|
lexer->token.type = cmListFileLexer_Token_ParenRight;
|
|
cmListFileLexerSetToken(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
return 1;
|
|
}
|
|
|
|
[A-Za-z_][A-Za-z0-9_]* {
|
|
lexer->token.type = cmListFileLexer_Token_Identifier;
|
|
cmListFileLexerSetToken(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
return 1;
|
|
}
|
|
|
|
<BRACKET>\]=* {
|
|
/* Handle ]]====]=======]*/
|
|
cmListFileLexerAppend(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
if (yyleng == lexer->bracket) {
|
|
BEGIN(BRACKETEND);
|
|
}
|
|
}
|
|
|
|
<BRACKETEND>\] {
|
|
lexer->column += yyleng;
|
|
/* Erase the partial bracket from the token. */
|
|
lexer->token.length -= lexer->bracket;
|
|
lexer->token.text[lexer->token.length] = 0;
|
|
BEGIN(INITIAL);
|
|
return 1;
|
|
}
|
|
|
|
<BRACKET>([^]\0\n])+ {
|
|
cmListFileLexerAppend(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
}
|
|
|
|
<BRACKET,BRACKETEND>\n {
|
|
cmListFileLexerAppend(lexer, yytext, yyleng);
|
|
++lexer->line;
|
|
lexer->column = 1;
|
|
BEGIN(BRACKET);
|
|
}
|
|
|
|
<BRACKET,BRACKETEND>[^\0\n] {
|
|
cmListFileLexerAppend(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
BEGIN(BRACKET);
|
|
}
|
|
|
|
<BRACKET,BRACKETEND><<EOF>> {
|
|
lexer->token.type = cmListFileLexer_Token_BadBracket;
|
|
BEGIN(INITIAL);
|
|
return 1;
|
|
}
|
|
|
|
({UNQUOTED}|=|\[=*{UNQUOTED})({UNQUOTED}|[[=])* {
|
|
lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
|
|
cmListFileLexerSetToken(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
return 1;
|
|
}
|
|
|
|
({MAKEVAR}|{UNQUOTED}|=|\[=*{LEGACY})({LEGACY}|[[=])* {
|
|
lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
|
|
cmListFileLexerSetToken(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
return 1;
|
|
}
|
|
|
|
\[ {
|
|
lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
|
|
cmListFileLexerSetToken(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
return 1;
|
|
}
|
|
|
|
\" {
|
|
lexer->token.type = cmListFileLexer_Token_ArgumentQuoted;
|
|
cmListFileLexerSetToken(lexer, "", 0);
|
|
lexer->column += yyleng;
|
|
BEGIN(STRING);
|
|
}
|
|
|
|
<STRING>([^\\\0\n\"]|\\[^\0\n])+ {
|
|
cmListFileLexerAppend(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
}
|
|
|
|
<STRING>\\\n {
|
|
/* Continuation: text is not part of string */
|
|
++lexer->line;
|
|
lexer->column = 1;
|
|
}
|
|
|
|
<STRING>\n {
|
|
cmListFileLexerAppend(lexer, yytext, yyleng);
|
|
++lexer->line;
|
|
lexer->column = 1;
|
|
}
|
|
|
|
<STRING>\" {
|
|
lexer->column += yyleng;
|
|
BEGIN(INITIAL);
|
|
return 1;
|
|
}
|
|
|
|
<STRING>[^\0\n] {
|
|
cmListFileLexerAppend(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
}
|
|
|
|
<STRING><<EOF>> {
|
|
lexer->token.type = cmListFileLexer_Token_BadString;
|
|
BEGIN(INITIAL);
|
|
return 1;
|
|
}
|
|
|
|
[ \t\r]+ {
|
|
lexer->token.type = cmListFileLexer_Token_Space;
|
|
cmListFileLexerSetToken(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
return 1;
|
|
}
|
|
|
|
. {
|
|
lexer->token.type = cmListFileLexer_Token_BadCharacter;
|
|
cmListFileLexerSetToken(lexer, yytext, yyleng);
|
|
lexer->column += yyleng;
|
|
return 1;
|
|
}
|
|
|
|
<<EOF>> {
|
|
lexer->token.type = cmListFileLexer_Token_None;
|
|
cmListFileLexerSetToken(lexer, 0, 0);
|
|
return 0;
|
|
}
|
|
|
|
%%
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
|
|
int length)
|
|
{
|
|
/* Set the token line and column number. */
|
|
lexer->token.line = lexer->line;
|
|
lexer->token.column = lexer->column;
|
|
|
|
/* Use the same buffer if possible. */
|
|
if (lexer->token.text) {
|
|
if (text && length < lexer->size) {
|
|
strcpy(lexer->token.text, text);
|
|
lexer->token.length = length;
|
|
return;
|
|
}
|
|
free(lexer->token.text);
|
|
lexer->token.text = 0;
|
|
lexer->size = 0;
|
|
}
|
|
|
|
/* Need to extend the buffer. */
|
|
if (text) {
|
|
lexer->token.text = strdup(text);
|
|
lexer->token.length = length;
|
|
lexer->size = length + 1;
|
|
} else {
|
|
lexer->token.length = 0;
|
|
}
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
|
|
int length)
|
|
{
|
|
char* temp;
|
|
int newSize;
|
|
|
|
/* If the appended text will fit in the buffer, do not reallocate. */
|
|
newSize = lexer->token.length + length + 1;
|
|
if (lexer->token.text && newSize <= lexer->size) {
|
|
strcpy(lexer->token.text + lexer->token.length, text);
|
|
lexer->token.length += length;
|
|
return;
|
|
}
|
|
|
|
/* We need to extend the buffer. */
|
|
temp = malloc(newSize);
|
|
if (lexer->token.text) {
|
|
memcpy(temp, lexer->token.text, lexer->token.length);
|
|
free(lexer->token.text);
|
|
}
|
|
memcpy(temp + lexer->token.length, text, length);
|
|
temp[lexer->token.length + length] = 0;
|
|
lexer->token.text = temp;
|
|
lexer->token.length += length;
|
|
lexer->size = newSize;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
|
|
size_t bufferSize)
|
|
{
|
|
if (lexer) {
|
|
if (lexer->file) {
|
|
/* Convert CRLF -> LF explicitly. The C FILE "t"ext mode
|
|
does not convert newlines on all platforms. Move any
|
|
trailing CR to the start of the buffer for the next read. */
|
|
size_t cr = lexer->cr;
|
|
size_t n;
|
|
buffer[0] = '\r';
|
|
n = fread(buffer + cr, 1, bufferSize - cr, lexer->file);
|
|
if (n) {
|
|
char* o = buffer;
|
|
const char* i = buffer;
|
|
const char* e;
|
|
n += cr;
|
|
cr = (buffer[n - 1] == '\r') ? 1 : 0;
|
|
e = buffer + n - cr;
|
|
while (i != e) {
|
|
if (i[0] == '\r' && i[1] == '\n') {
|
|
++i;
|
|
}
|
|
*o++ = *i++;
|
|
}
|
|
n = o - buffer;
|
|
} else {
|
|
n = cr;
|
|
cr = 0;
|
|
}
|
|
lexer->cr = cr;
|
|
return n;
|
|
} else if (lexer->string_left) {
|
|
int length = lexer->string_left;
|
|
if ((int)bufferSize < length) {
|
|
length = (int)bufferSize;
|
|
}
|
|
memcpy(buffer, lexer->string_position, length);
|
|
lexer->string_position += length;
|
|
lexer->string_left -= length;
|
|
return length;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
static void cmListFileLexerInit(cmListFileLexer* lexer)
|
|
{
|
|
if (lexer->file || lexer->string_buffer) {
|
|
cmListFileLexer_yylex_init(&lexer->scanner);
|
|
cmListFileLexer_yyset_extra(lexer, lexer->scanner);
|
|
}
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
static void cmListFileLexerDestroy(cmListFileLexer* lexer)
|
|
{
|
|
cmListFileLexerSetToken(lexer, 0, 0);
|
|
if (lexer->file || lexer->string_buffer) {
|
|
cmListFileLexer_yylex_destroy(lexer->scanner);
|
|
if (lexer->file) {
|
|
fclose(lexer->file);
|
|
lexer->file = 0;
|
|
}
|
|
if (lexer->string_buffer) {
|
|
free(lexer->string_buffer);
|
|
lexer->string_buffer = 0;
|
|
lexer->string_left = 0;
|
|
lexer->string_position = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
cmListFileLexer* cmListFileLexer_New(void)
|
|
{
|
|
cmListFileLexer* lexer = (cmListFileLexer*)malloc(sizeof(cmListFileLexer));
|
|
if (!lexer) {
|
|
return 0;
|
|
}
|
|
memset(lexer, 0, sizeof(*lexer));
|
|
lexer->line = 1;
|
|
lexer->column = 1;
|
|
return lexer;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
void cmListFileLexer_Delete(cmListFileLexer* lexer)
|
|
{
|
|
cmListFileLexer_SetFileName(lexer, 0, 0);
|
|
free(lexer);
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
|
{
|
|
unsigned char b[2];
|
|
if (fread(b, 1, 2, f) == 2) {
|
|
if (b[0] == 0xEF && b[1] == 0xBB) {
|
|
if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
|
|
return cmListFileLexer_BOM_UTF8;
|
|
}
|
|
} else if (b[0] == 0xFE && b[1] == 0xFF) {
|
|
/* UTF-16 BE */
|
|
return cmListFileLexer_BOM_UTF16BE;
|
|
} else if (b[0] == 0 && b[1] == 0) {
|
|
if (fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF) {
|
|
return cmListFileLexer_BOM_UTF32BE;
|
|
}
|
|
} else if (b[0] == 0xFF && b[1] == 0xFE) {
|
|
fpos_t p;
|
|
fgetpos(f, &p);
|
|
if (fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) {
|
|
return cmListFileLexer_BOM_UTF32LE;
|
|
}
|
|
if (fsetpos(f, &p) != 0) {
|
|
return cmListFileLexer_BOM_Broken;
|
|
}
|
|
return cmListFileLexer_BOM_UTF16LE;
|
|
}
|
|
}
|
|
if (fseek(f, 0, SEEK_SET) != 0) {
|
|
return cmListFileLexer_BOM_Broken;
|
|
}
|
|
return cmListFileLexer_BOM_None;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
|
|
cmListFileLexer_BOM* bom)
|
|
{
|
|
int result = 1;
|
|
cmListFileLexerDestroy(lexer);
|
|
if (name) {
|
|
#ifdef _WIN32
|
|
wchar_t* wname = cmsysEncoding_DupToWide(name);
|
|
lexer->file = _wfopen(wname, L"rb");
|
|
free(wname);
|
|
#else
|
|
lexer->file = fopen(name, "rb");
|
|
#endif
|
|
if (lexer->file) {
|
|
if (bom) {
|
|
*bom = cmListFileLexer_ReadBOM(lexer->file);
|
|
}
|
|
} else {
|
|
result = 0;
|
|
}
|
|
}
|
|
cmListFileLexerInit(lexer);
|
|
return result;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
int cmListFileLexer_SetString(cmListFileLexer* lexer, const char* text)
|
|
{
|
|
int result = 1;
|
|
cmListFileLexerDestroy(lexer);
|
|
if (text) {
|
|
int length = (int)strlen(text);
|
|
lexer->string_buffer = (char*)malloc(length + 1);
|
|
if (lexer->string_buffer) {
|
|
strcpy(lexer->string_buffer, text);
|
|
lexer->string_position = lexer->string_buffer;
|
|
lexer->string_left = length;
|
|
} else {
|
|
result = 0;
|
|
}
|
|
}
|
|
cmListFileLexerInit(lexer);
|
|
return result;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer)
|
|
{
|
|
if (!lexer->file && !lexer->string_buffer) {
|
|
return 0;
|
|
}
|
|
if (cmListFileLexer_yylex(lexer->scanner, lexer)) {
|
|
return &lexer->token;
|
|
} else {
|
|
cmListFileLexer_SetFileName(lexer, 0, 0);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
long cmListFileLexer_GetCurrentLine(cmListFileLexer* lexer)
|
|
{
|
|
return lexer->line;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
long cmListFileLexer_GetCurrentColumn(cmListFileLexer* lexer)
|
|
{
|
|
return lexer->column;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------*/
|
|
const char* cmListFileLexer_GetTypeAsString(cmListFileLexer* lexer,
|
|
cmListFileLexer_Type type)
|
|
{
|
|
(void)lexer;
|
|
switch (type) {
|
|
case cmListFileLexer_Token_None:
|
|
return "nothing";
|
|
case cmListFileLexer_Token_Space:
|
|
return "space";
|
|
case cmListFileLexer_Token_Newline:
|
|
return "newline";
|
|
case cmListFileLexer_Token_Identifier:
|
|
return "identifier";
|
|
case cmListFileLexer_Token_ParenLeft:
|
|
return "left paren";
|
|
case cmListFileLexer_Token_ParenRight:
|
|
return "right paren";
|
|
case cmListFileLexer_Token_ArgumentUnquoted:
|
|
return "unquoted argument";
|
|
case cmListFileLexer_Token_ArgumentQuoted:
|
|
return "quoted argument";
|
|
case cmListFileLexer_Token_ArgumentBracket:
|
|
return "bracket argument";
|
|
case cmListFileLexer_Token_CommentBracket:
|
|
return "bracket comment";
|
|
case cmListFileLexer_Token_BadCharacter:
|
|
return "bad character";
|
|
case cmListFileLexer_Token_BadBracket:
|
|
return "unterminated bracket";
|
|
case cmListFileLexer_Token_BadString:
|
|
return "unterminated string";
|
|
}
|
|
return "unknown token";
|
|
}
|