From e45e455b9b977f425d14ebf54f8990a58c294b65 Mon Sep 17 00:00:00 2001 From: "zik.saleeba" Date: Tue, 27 Jul 2010 10:35:54 +0000 Subject: [PATCH] Now handling multi-line string constants correctly. Also handling either LF newlines or CR/LF newlines in multi-line string constants correctly. Changed handling of multi-line comments to use the new, simpler Lexer->EmitExtraNewlines field. Made FANCY_ERROR_REPORTING standard to reduce code obfuscation. Fixed issue 98. git-svn-id: http://picoc.googlecode.com/svn/trunk@473 21eae674-98b7-11dd-bd71-f92a316d2d60 --- lex.c | 69 +++++++++++++++++++++--------------------------------- parse.c | 2 -- picoc.h | 10 +++----- platform.c | 11 --------- platform.h | 3 --- 5 files changed, 30 insertions(+), 65 deletions(-) diff --git a/lex.c b/lex.c index 82b5c47..4d3fe9e 100644 --- a/lex.c +++ b/lex.c @@ -19,15 +19,9 @@ #define NEXTIS3PLUS(c,x,d,y,e,z,a) { if (NextChar == (c)) { LEXER_INC(Lexer); GotToken = (x); } else if (NextChar == (d)) { if (Lexer->Pos[1] == (e)) { LEXER_INCN(Lexer, 2); GotToken = (z); } else { LEXER_INC(Lexer); GotToken = (y); } } else GotToken = (a); } #define NEXTISEXACTLY3(c,d,y,z) { if (NextChar == (c) && Lexer->Pos[1] == (d)) { LEXER_INCN(Lexer, 2); GotToken = (y); } else GotToken = (z); } -#ifdef FANCY_ERROR_REPORTING #define LEXER_INC(l) ( (l)->Pos++, (l)->CharacterPos++ ) #define LEXER_INCN(l, n) ( (l)->Pos+=(n), (l)->CharacterPos+=(n) ) #define TOKEN_DATA_OFFSET 2 -#else -#define LEXER_INC(l) (l)->Pos++ -#define LEXER_INCN(l, n) (l)->Pos+=(n) -#define TOKEN_DATA_OFFSET 1 -#endif #define MAX_CHAR_VALUE 255 /* maximum value which can be represented by a "char" data type */ @@ -260,8 +254,12 @@ unsigned char LexUnEscapeCharacter(const char **From, const char *End) unsigned char ThisChar; while ( *From != End && **From == '\\' && - &(*From)[1] != End && (*From)[1] == '\n') - (*From) += 2; /* skip escaped end of lines */ + &(*From)[1] != End && (*From)[1] == '\n' ) + (*From) += 2; /* skip escaped end of lines with LF line termination */ + + while ( *From != End && **From == '\\' && + &(*From)[1] != End && &(*From)[2] != End && (*From)[1] == '\r' && (*From)[2] == '\n') + (*From) += 3; /* skip escaped end of lines with CR/LF line termination */ if (*From == End) return '\\'; @@ -310,7 +308,20 @@ enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value *Value, { /* find the end */ if (Escape) + { + if (*Lexer->Pos == '\r' && Lexer->Pos+1 != Lexer->End) + Lexer->Pos++; + + if (*Lexer->Pos == '\n' && Lexer->Pos+1 != Lexer->End) + { + Lexer->Line++; + Lexer->Pos++; + Lexer->CharacterPos = 0; + Lexer->EmitExtraNewlines++; + } + Escape = FALSE; + } else if (*Lexer->Pos == '\\') Escape = TRUE; @@ -368,14 +379,9 @@ void LexSkipComment(struct LexState *Lexer, char NextChar, enum LexToken *Return while (Lexer->Pos != Lexer->End && (*(Lexer->Pos-1) != '*' || *Lexer->Pos != '/')) { if (*Lexer->Pos == '\n') - { - LEXER_INC(Lexer); - Lexer->Mode = LexModeMultiLineComment; - *ReturnToken = TokenEndOfLine; - return; - } - else - LEXER_INC(Lexer); + Lexer->EmitExtraNewlines++; + + LEXER_INC(Lexer); } if (Lexer->Pos != Lexer->End) @@ -398,12 +404,11 @@ enum LexToken LexScanGetToken(struct LexState *Lexer, struct Value **Value) char NextChar; enum LexToken GotToken = TokenNone; - /* handle the end of multi-line comments */ - if (Lexer->Mode == LexModeMultiLineComment) + /* handle cases line multi-line comments or string constants which mess up the line count */ + if (Lexer->EmitExtraNewlines > 0) { - LexSkipComment(Lexer, '*', &GotToken); - if (GotToken != TokenNone) - return GotToken; + Lexer->EmitExtraNewlines--; + return TokenEndOfLine; } /* scan for a token */ @@ -417,9 +422,7 @@ enum LexToken LexScanGetToken(struct LexState *Lexer, struct Value **Value) Lexer->Line++; Lexer->Pos++; Lexer->Mode = LexModeNormal; -#ifdef FANCY_ERROR_REPORTING Lexer->CharacterPos = 0; -#endif return TokenEndOfLine; } else if (Lexer->Mode == LexModeHashDefine || Lexer->Mode == LexModeHashDefineSpace) @@ -499,16 +502,10 @@ void *LexTokenise(struct LexState *Lexer, int *TokenLen) struct Value *GotValue; int MemUsed = 0; int ValueSize; -#ifdef FANCY_ERROR_REPORTING int ReserveSpace = (Lexer->End - Lexer->Pos) * 4 + 16; -#else - int ReserveSpace = (Lexer->End - Lexer->Pos) * 3 + 16; -#endif void *TokenSpace = HeapAllocStack(ReserveSpace); char *TokenPos = (char *)TokenSpace; -#ifdef FANCY_ERROR_REPORTING int LastCharacterPos = 0; -#endif if (TokenSpace == NULL) LexFail(Lexer, "out of memory"); @@ -525,11 +522,9 @@ void *LexTokenise(struct LexState *Lexer, int *TokenLen) TokenPos++; MemUsed++; -#ifdef FANCY_ERROR_REPORTING *(unsigned char *)TokenPos = (unsigned char)LastCharacterPos; TokenPos++; MemUsed++; -#endif ValueSize = LexTokenSize(Token); if (ValueSize > 0) @@ -540,9 +535,7 @@ void *LexTokenise(struct LexState *Lexer, int *TokenLen) MemUsed += ValueSize; } -#ifdef FANCY_ERROR_REPORTING LastCharacterPos = Lexer->CharacterPos; -#endif } while (Token != TokenEOF); @@ -578,10 +571,9 @@ void *LexAnalyse(const char *FileName, const char *Source, int SourceLen, int *T Lexer.Line = 1; Lexer.FileName = FileName; Lexer.Mode = LexModeNormal; -#ifdef FANCY_ERROR_REPORTING + Lexer.EmitExtraNewlines = 0; Lexer.CharacterPos = 1; Lexer.SourceText = Source; -#endif return LexTokenise(&Lexer, TokenLen); } @@ -596,10 +588,8 @@ void LexInitParser(struct ParseState *Parser, const char *SourceText, void *Toke Parser->SearchLabel = 0; Parser->HashIfLevel = 0; Parser->HashIfEvaluateToLevel = 0; -#ifdef FANCY_ERROR_REPORTING Parser->CharacterPos = 0; Parser->SourceText = SourceText; -#endif } /* get the next token, without pre-processing */ @@ -657,9 +647,7 @@ enum LexToken LexGetRawToken(struct ParseState *Parser, struct Value **Value, in /* start a new list */ InteractiveHead = LineNode; Parser->Line = 1; -#ifdef FANCY_ERROR_REPORTING Parser->CharacterPos = 0; -#endif } else InteractiveTail->Next = LineNode; @@ -688,10 +676,7 @@ enum LexToken LexGetRawToken(struct ParseState *Parser, struct Value **Value, in } } while ((Parser->FileName == StrEmpty && Token == TokenEOF) || Token == TokenEndOfLine); -#ifdef FANCY_ERROR_REPORTING Parser->CharacterPos = *((unsigned char *)Parser->Pos + 1); -#endif - ValueSize = LexTokenSize(Token); if (ValueSize > 0) { diff --git a/parse.c b/parse.c index 03bf89d..56535a3 100644 --- a/parse.c +++ b/parse.c @@ -315,9 +315,7 @@ void ParserCopyPos(struct ParseState *To, struct ParseState *From) To->Line = From->Line; To->HashIfLevel = From->HashIfLevel; To->HashIfEvaluateToLevel = From->HashIfEvaluateToLevel; -#ifdef FANCY_ERROR_REPORTING To->CharacterPos = From->CharacterPos; -#endif } /* parse a "for" statement */ diff --git a/picoc.h b/picoc.h index fbb275d..ef43fd3 100644 --- a/picoc.h +++ b/picoc.h @@ -109,10 +109,8 @@ struct ParseState int SearchLabel; /* what case label we're searching for */ int HashIfLevel; int HashIfEvaluateToLevel; -#ifdef FANCY_ERROR_REPORTING int CharacterPos; const char *SourceText; -#endif }; /* values */ @@ -249,8 +247,7 @@ enum LexMode LexModeHashInclude, LexModeHashDefine, LexModeHashDefineSpace, - LexModeHashDefineSpaceIdent, - LexModeMultiLineComment + LexModeHashDefineSpaceIdent }; struct LexState @@ -259,11 +256,10 @@ struct LexState const char *End; const char *FileName; int Line; - enum LexMode Mode; -#ifdef FANCY_ERROR_REPORTING int CharacterPos; const char *SourceText; -#endif + enum LexMode Mode; + int EmitExtraNewlines; }; /* library function definition */ diff --git a/platform.c b/platform.c index b5ef60c..b4b26f9 100644 --- a/platform.c +++ b/platform.c @@ -1,6 +1,5 @@ #include "picoc.h" -#ifdef FANCY_ERROR_REPORTING void PrintSourceTextErrorLine(const char *FileName, const char *SourceText, int Line, int CharacterPos) { int LineCount; @@ -40,17 +39,12 @@ void PrintSourceTextErrorLine(const char *FileName, const char *SourceText, int PlatformPrintf("^\n%s:%d: ", FileName, Line, CharacterPos); } -#endif /* display the source line and line number to identify an error */ void PlatformErrorPrefix(struct ParseState *Parser) { if (Parser != NULL) -#ifdef FANCY_ERROR_REPORTING PrintSourceTextErrorLine(Parser->FileName, Parser->SourceText, Parser->Line, Parser->CharacterPos); -#else - PlatformPrintf("%s:%d: ", Parser->FileName, Parser->Line); -#endif } /* exit with a message */ @@ -88,12 +82,7 @@ void LexFail(struct LexState *Lexer, const char *Message, ...) { va_list Args; -#ifdef FANCY_ERROR_REPORTING PrintSourceTextErrorLine(Lexer->FileName, Lexer->SourceText, Lexer->Line, Lexer->CharacterPos); -#else - PlatformPrintf("%s:%d: ", Lexer->FileName, Lexer->Line); -#endif - va_start(Args, Message); PlatformVPrintf(Message, Args); va_end(Args); diff --git a/platform.h b/platform.h index 4310cd2..3bd8c30 100644 --- a/platform.h +++ b/platform.h @@ -26,8 +26,6 @@ #define LOCAL_TABLE_SIZE 11 /* size of local variable table (can expand) */ #define STRUCT_TABLE_SIZE 11 /* size of struct/union member table (can expand) */ -#define FANCY_ERROR_REPORTING /* optional feature - gives more detailed error messages but uses more memory */ - #define INTERACTIVE_PROMPT_START "starting picoc\n" #define INTERACTIVE_PROMPT_STATEMENT "picoc> " #define INTERACTIVE_PROMPT_LINE " > " @@ -95,7 +93,6 @@ extern jmp_buf ExitBuf; # define assert(x) # define BUILTIN_MINI_STDLIB # undef BIG_ENDIAN -# undef FANCY_ERROR_REPORTING # else # ifdef SURVEYOR_HOST