2009-02-01 06:31:18 -05:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
2008-10-13 06:53:25 -04:00
|
|
|
#include <ctype.h>
|
2008-12-26 21:25:49 -05:00
|
|
|
#include <math.h>
|
2009-02-01 06:31:18 -05:00
|
|
|
#include <string.h>
|
|
|
|
#include <stdarg.h>
|
2008-10-13 06:53:25 -04:00
|
|
|
|
2008-10-12 20:53:28 -04:00
|
|
|
#include "picoc.h"
|
|
|
|
|
2008-10-13 06:53:25 -04:00
|
|
|
|
2009-01-03 23:08:49 -05:00
|
|
|
#define isCidstart(c) (isalpha(c) || (c)=='_' || (c)=='#')
|
2008-10-14 07:46:42 -04:00
|
|
|
#define isCident(c) (isalnum(c) || (c)=='_')
|
2008-10-13 06:53:25 -04:00
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
#define NEXTIS(c,x,y) { if (NextChar == (c)) { Lexer->Pos++; GotToken = (x); } else GotToken = (y); }
|
|
|
|
#define NEXTIS3(c,x,d,y,z) { if (NextChar == (c)) { Lexer->Pos++; GotToken = (x); } else NEXTIS(d,y,z) }
|
|
|
|
#define NEXTIS4(c,x,d,y,e,z,a) { if (NextChar == (c)) { Lexer->Pos++; GotToken = (x); } else NEXTIS3(d,y,e,z,a) }
|
2009-02-20 04:04:45 -05:00
|
|
|
#define NEXTISEXACTLY3(c,d,y,z) { if (NextChar == (c) && Lexer->Pos[1] == (d)) { Lexer->Pos += 2; GotToken = (y); } else GotToken = (z); }
|
2008-10-12 20:53:28 -04:00
|
|
|
|
2009-01-26 03:57:32 -05:00
|
|
|
static union AnyValue LexAnyValue;
|
|
|
|
static struct Value LexValue = { TypeVoid, &LexAnyValue, FALSE, FALSE };
|
2008-10-13 06:53:25 -04:00
|
|
|
|
2008-10-12 20:53:28 -04:00
|
|
|
struct ReservedWord
|
|
|
|
{
|
|
|
|
const char *Word;
|
|
|
|
enum LexToken Token;
|
2009-02-01 23:53:45 -05:00
|
|
|
const char *SharedWord; /* word stored in shared string space */
|
2008-10-12 20:53:28 -04:00
|
|
|
};
|
|
|
|
|
2008-10-13 06:53:25 -04:00
|
|
|
static struct ReservedWord ReservedWords[] =
|
2008-10-12 20:53:28 -04:00
|
|
|
{
|
2009-02-01 23:53:45 -05:00
|
|
|
{ "#define", TokenHashDefine, NULL },
|
|
|
|
{ "#include", TokenHashInclude, NULL },
|
|
|
|
{ "break", TokenBreak, NULL },
|
|
|
|
{ "case", TokenCase, NULL },
|
|
|
|
{ "char", TokenCharType, NULL },
|
2009-02-17 18:36:09 -05:00
|
|
|
{ "continue", TokenContinue, NULL },
|
2009-02-01 23:53:45 -05:00
|
|
|
{ "default", TokenDefault, NULL },
|
|
|
|
{ "do", TokenDo, NULL },
|
|
|
|
{ "double", TokenDoubleType, NULL },
|
|
|
|
{ "else", TokenElse, NULL },
|
|
|
|
{ "enum", TokenEnumType, NULL },
|
|
|
|
{ "float", TokenFloatType, NULL },
|
|
|
|
{ "for", TokenFor, NULL },
|
|
|
|
{ "if", TokenIf, NULL },
|
|
|
|
{ "int", TokenIntType, NULL },
|
|
|
|
{ "long", TokenLongType, NULL },
|
|
|
|
{ "return", TokenReturn, NULL },
|
|
|
|
{ "signed", TokenSignedType, NULL },
|
|
|
|
{ "short", TokenShortType, NULL },
|
|
|
|
{ "struct", TokenStructType, NULL },
|
|
|
|
{ "switch", TokenSwitch, NULL },
|
|
|
|
{ "typedef", TokenTypedef, NULL },
|
|
|
|
{ "union", TokenUnionType, NULL },
|
|
|
|
{ "unsigned", TokenUnsignedType, NULL },
|
|
|
|
{ "void", TokenVoidType, NULL },
|
|
|
|
{ "while", TokenWhile, NULL }
|
2008-10-12 20:53:28 -04:00
|
|
|
};
|
|
|
|
|
2009-02-01 06:31:18 -05:00
|
|
|
struct LexState
|
2008-10-12 20:53:28 -04:00
|
|
|
{
|
2009-02-01 06:31:18 -05:00
|
|
|
const char *Pos;
|
|
|
|
const char *End;
|
|
|
|
int Line;
|
|
|
|
const char *FileName;
|
|
|
|
};
|
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
|
|
|
|
/* initialise the lexer */
|
|
|
|
void LexInit()
|
|
|
|
{
|
|
|
|
int Count;
|
|
|
|
|
|
|
|
for (Count = 0; Count < sizeof(ReservedWords) / sizeof(struct ReservedWord); Count++)
|
|
|
|
ReservedWords[Count].SharedWord = StrRegister(ReservedWords[Count].Word);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* exit with a message */
|
2009-02-01 06:31:18 -05:00
|
|
|
void LexFail(struct LexState *Lexer, const char *Message, ...)
|
|
|
|
{
|
|
|
|
va_list Args;
|
|
|
|
|
|
|
|
printf("%s:%d: ", Lexer->FileName, Lexer->Line);
|
|
|
|
va_start(Args, Message);
|
|
|
|
vprintf(Message, Args);
|
|
|
|
printf("\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
/* check if a word is a reserved word - used while scanning */
|
2009-02-01 06:31:18 -05:00
|
|
|
enum LexToken LexCheckReservedWord(const char *Word)
|
2008-10-12 20:53:28 -04:00
|
|
|
{
|
|
|
|
int Count;
|
|
|
|
|
|
|
|
for (Count = 0; Count < sizeof(ReservedWords) / sizeof(struct ReservedWord); Count++)
|
|
|
|
{
|
2009-02-01 23:53:45 -05:00
|
|
|
if (Word == ReservedWords[Count].SharedWord)
|
2008-10-13 06:53:25 -04:00
|
|
|
return ReservedWords[Count].Token;
|
2008-10-12 20:53:28 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return TokenNone;
|
|
|
|
}
|
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
/* skip a comment - used while scanning */
|
2009-02-01 06:31:18 -05:00
|
|
|
enum LexToken LexGetNumber(struct LexState *Lexer, struct Value *Value)
|
2008-10-12 20:53:28 -04:00
|
|
|
{
|
2008-10-14 22:09:47 -04:00
|
|
|
int Result = 0;
|
2008-12-26 21:25:49 -05:00
|
|
|
double FPResult;
|
|
|
|
double FPDiv;
|
2008-10-14 22:09:47 -04:00
|
|
|
|
2008-12-26 21:25:49 -05:00
|
|
|
for (; Lexer->Pos != Lexer->End && isdigit(*Lexer->Pos); Lexer->Pos++)
|
|
|
|
Result = Result * 10 + (*Lexer->Pos - '0');
|
|
|
|
|
2009-02-01 06:31:18 -05:00
|
|
|
Value->Typ = &IntType;
|
|
|
|
Value->Val->Integer = Result;
|
2008-12-26 21:25:49 -05:00
|
|
|
if (Lexer->Pos == Lexer->End || *Lexer->Pos != '.')
|
|
|
|
return TokenIntegerConstant;
|
|
|
|
|
2009-02-01 06:31:18 -05:00
|
|
|
Value->Typ = &FPType;
|
2008-12-26 21:25:49 -05:00
|
|
|
Lexer->Pos++;
|
|
|
|
for (FPDiv = 0.1, FPResult = (double)Result; Lexer->Pos != Lexer->End && isdigit(*Lexer->Pos); Lexer->Pos++, FPDiv /= 10.0)
|
|
|
|
FPResult += (*Lexer->Pos - '0') * FPDiv;
|
|
|
|
|
|
|
|
if (Lexer->Pos != Lexer->End && (*Lexer->Pos == 'e' || *Lexer->Pos == 'E'))
|
2008-12-20 06:46:21 -05:00
|
|
|
{
|
|
|
|
Lexer->Pos++;
|
2008-12-26 21:25:49 -05:00
|
|
|
for (Result = 0; Lexer->Pos != Lexer->End && isdigit(*Lexer->Pos); Lexer->Pos++)
|
|
|
|
Result = Result * 10 + (*Lexer->Pos - '0');
|
|
|
|
|
|
|
|
FPResult *= pow(10.0, (double)Result);
|
2008-12-20 06:46:21 -05:00
|
|
|
}
|
2008-12-26 21:25:49 -05:00
|
|
|
|
|
|
|
return TokenFPConstant;
|
2008-10-12 20:53:28 -04:00
|
|
|
}
|
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
/* get a reserved word or identifier - used while scanning */
|
2009-02-01 06:31:18 -05:00
|
|
|
enum LexToken LexGetWord(struct LexState *Lexer, struct Value *Value)
|
2008-10-12 20:53:28 -04:00
|
|
|
{
|
2008-10-14 07:46:42 -04:00
|
|
|
const char *Pos = Lexer->Pos + 1;
|
|
|
|
enum LexToken Token;
|
|
|
|
|
|
|
|
while (Lexer->Pos != Lexer->End && isCident(*Pos))
|
|
|
|
Pos++;
|
|
|
|
|
2009-02-01 06:31:18 -05:00
|
|
|
Value->Typ = &StringType;
|
|
|
|
Value->Val->String = (char *)StrRegister2(Lexer->Pos, Pos - Lexer->Pos);
|
2008-10-14 07:46:42 -04:00
|
|
|
Lexer->Pos = Pos;
|
|
|
|
|
2009-02-01 06:31:18 -05:00
|
|
|
Token = LexCheckReservedWord(Value->Val->String);
|
2008-10-14 07:46:42 -04:00
|
|
|
if (Token != TokenNone)
|
|
|
|
return Token;
|
|
|
|
|
2008-10-13 06:53:25 -04:00
|
|
|
return TokenIdentifier;
|
|
|
|
}
|
|
|
|
|
2009-02-18 23:34:36 -05:00
|
|
|
/* unescape a character from a string or character constant */
|
|
|
|
unsigned char LexUnEscapeCharacter(const char **From, const char *End)
|
|
|
|
{
|
|
|
|
unsigned char ThisChar;
|
|
|
|
|
|
|
|
while ( *From != End && **From == '\\' &&
|
|
|
|
&(*From)[1] != End && (*From)[1] == '\n')
|
|
|
|
(*From) += 2; /* skip escaped end of lines */
|
|
|
|
|
|
|
|
if (*From == End)
|
|
|
|
return '\\';
|
|
|
|
|
|
|
|
if (**From == '\\')
|
|
|
|
{ /* it's escaped */
|
|
|
|
(*From)++;
|
|
|
|
if (*From == End)
|
|
|
|
return '\\';
|
|
|
|
|
|
|
|
ThisChar = *(*From)++;
|
|
|
|
switch (ThisChar)
|
|
|
|
{
|
|
|
|
case '\\': return '\\';
|
|
|
|
case '\'': return '\'';
|
|
|
|
case '"': return '"';
|
|
|
|
case 'a': return '\a';
|
|
|
|
case 'b': return '\b';
|
|
|
|
case 'f': return '\f';
|
|
|
|
case 'n': return '\n';
|
|
|
|
case 'r': return '\r';
|
|
|
|
case 't': return '\t';
|
|
|
|
case 'v': return '\v';
|
2009-02-18 23:55:52 -05:00
|
|
|
/* case '0': XXX - implement octal character constants */
|
|
|
|
/* case 'x': XXX - implement hex character constants */
|
2009-02-18 23:34:36 -05:00
|
|
|
default: return ThisChar;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return *(*From)++;
|
|
|
|
}
|
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
/* get a string constant - used while scanning */
|
2009-02-01 06:31:18 -05:00
|
|
|
enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value *Value)
|
2008-10-13 06:53:25 -04:00
|
|
|
{
|
2008-10-14 22:09:47 -04:00
|
|
|
int Escape = FALSE;
|
2009-02-01 06:31:18 -05:00
|
|
|
const char *StartPos = Lexer->Pos;
|
2009-02-18 23:34:36 -05:00
|
|
|
const char *EndPos;
|
|
|
|
char *EscBuf;
|
|
|
|
char *EscBufPos;
|
2008-10-14 22:09:47 -04:00
|
|
|
|
2009-02-01 06:31:18 -05:00
|
|
|
Value->Typ = &StringType;
|
2008-12-18 23:24:55 -05:00
|
|
|
while (Lexer->Pos != Lexer->End && (*Lexer->Pos != '"' || Escape))
|
2009-02-18 23:34:36 -05:00
|
|
|
{ /* find the end */
|
2008-10-14 22:09:47 -04:00
|
|
|
if (Escape)
|
|
|
|
Escape = FALSE;
|
|
|
|
else if (*Lexer->Pos == '\\')
|
|
|
|
Escape = TRUE;
|
2008-12-18 23:24:55 -05:00
|
|
|
|
|
|
|
Lexer->Pos++;
|
2008-10-14 22:09:47 -04:00
|
|
|
}
|
2009-02-18 23:34:36 -05:00
|
|
|
EndPos = Lexer->Pos;
|
|
|
|
|
|
|
|
EscBuf = HeapAllocStack(EndPos - StartPos);
|
|
|
|
for (EscBufPos = EscBuf, Lexer->Pos = StartPos; Lexer->Pos != EndPos;)
|
|
|
|
*EscBufPos++ = LexUnEscapeCharacter(&Lexer->Pos, EndPos);
|
|
|
|
|
|
|
|
Value->Val->String = (char *)StrRegister2(EscBuf, EscBufPos - EscBuf);
|
|
|
|
HeapPopStack(EscBuf, EndPos - StartPos);
|
2008-12-18 23:24:55 -05:00
|
|
|
if (*Lexer->Pos == '"')
|
2008-10-14 22:09:47 -04:00
|
|
|
Lexer->Pos++;
|
|
|
|
|
2008-10-13 06:53:25 -04:00
|
|
|
return TokenStringConstant;
|
|
|
|
}
|
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
/* get a character constant - used while scanning */
|
2009-02-01 06:31:18 -05:00
|
|
|
enum LexToken LexGetCharacterConstant(struct LexState *Lexer, struct Value *Value)
|
2008-10-13 06:53:25 -04:00
|
|
|
{
|
2009-02-01 06:31:18 -05:00
|
|
|
Value->Typ = &IntType;
|
2009-02-18 23:34:36 -05:00
|
|
|
Lexer->Pos++;
|
|
|
|
Value->Val->Integer = LexUnEscapeCharacter(&Lexer->Pos, Lexer->End);
|
|
|
|
if (Lexer->Pos != Lexer->End || *Lexer->Pos != '\'')
|
|
|
|
LexFail(Lexer, "expected \"'\"");
|
2008-10-14 07:46:42 -04:00
|
|
|
|
2009-02-18 23:34:36 -05:00
|
|
|
Lexer->Pos++;
|
2008-10-13 06:53:25 -04:00
|
|
|
return TokenCharacterConstant;
|
2008-10-12 20:53:28 -04:00
|
|
|
}
|
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
/* skip a comment - used while scanning */
|
|
|
|
void LexSkipComment(struct LexState *Lexer, char NextChar)
|
2009-01-03 23:08:49 -05:00
|
|
|
{
|
|
|
|
Lexer->Pos++;
|
|
|
|
if (NextChar == '*')
|
|
|
|
{ /* conventional C comment */
|
|
|
|
while (Lexer->Pos != Lexer->End && (*(Lexer->Pos-1) != '*' || *Lexer->Pos != '/'))
|
|
|
|
Lexer->Pos++;
|
|
|
|
|
|
|
|
if (Lexer->Pos != Lexer->End)
|
|
|
|
Lexer->Pos++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{ /* C++ style comment */
|
|
|
|
while (Lexer->Pos != Lexer->End && *Lexer->Pos != '\n')
|
|
|
|
Lexer->Pos++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
/* get a single token from the source - used while scanning */
|
|
|
|
enum LexToken LexScanGetToken(struct LexState *Lexer, struct Value **Value)
|
2008-10-12 20:53:28 -04:00
|
|
|
{
|
|
|
|
char ThisChar;
|
|
|
|
char NextChar;
|
2009-02-01 23:53:45 -05:00
|
|
|
enum LexToken GotToken = TokenNone;
|
2008-10-12 20:53:28 -04:00
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
do
|
2009-02-01 06:31:18 -05:00
|
|
|
{
|
2009-02-03 06:09:07 -05:00
|
|
|
*Value = &LexValue;
|
|
|
|
while (Lexer->Pos != Lexer->End && isspace(*Lexer->Pos))
|
|
|
|
{
|
|
|
|
if (*Lexer->Pos == '\n')
|
2009-02-12 04:34:16 -05:00
|
|
|
{
|
2009-02-03 06:09:07 -05:00
|
|
|
Lexer->Line++;
|
2009-02-12 04:34:16 -05:00
|
|
|
Lexer->Pos++;
|
|
|
|
return TokenEndOfLine;
|
|
|
|
}
|
2009-02-03 06:09:07 -05:00
|
|
|
|
|
|
|
Lexer->Pos++;
|
|
|
|
}
|
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
if (Lexer->Pos == Lexer->End)
|
2009-02-02 19:23:13 -05:00
|
|
|
{ /* end of input */
|
|
|
|
if (Lexer->FileName == StrEmpty)
|
|
|
|
{ /* get interactive input */
|
|
|
|
char LineBuffer[LINEBUFFER_MAX];
|
|
|
|
if (fgets(&LineBuffer[0], LINEBUFFER_MAX, stdin) == NULL)
|
|
|
|
return TokenEOF;
|
2009-02-03 06:09:07 -05:00
|
|
|
|
|
|
|
// XXX - finish this
|
2009-02-02 19:23:13 -05:00
|
|
|
}
|
|
|
|
else
|
2009-02-01 23:53:45 -05:00
|
|
|
return TokenEOF;
|
|
|
|
}
|
|
|
|
|
|
|
|
ThisChar = *Lexer->Pos;
|
|
|
|
if (isCidstart(ThisChar))
|
|
|
|
return LexGetWord(Lexer, *Value);
|
|
|
|
|
|
|
|
if (isdigit(ThisChar))
|
|
|
|
return LexGetNumber(Lexer, *Value);
|
|
|
|
|
|
|
|
NextChar = (Lexer->Pos+1 != Lexer->End) ? *(Lexer->Pos+1) : 0;
|
2008-10-12 20:53:28 -04:00
|
|
|
Lexer->Pos++;
|
2009-02-01 23:53:45 -05:00
|
|
|
switch (ThisChar)
|
|
|
|
{
|
2009-02-02 18:45:34 -05:00
|
|
|
case '"': GotToken = LexGetStringConstant(Lexer, *Value); break;
|
|
|
|
case '\'': GotToken = LexGetCharacterConstant(Lexer, *Value); break;
|
|
|
|
case '(': GotToken = TokenOpenBracket; break;
|
|
|
|
case ')': GotToken = TokenCloseBracket; break;
|
|
|
|
case '=': NEXTIS('=', TokenEquality, TokenAssign); break;
|
|
|
|
case '+': NEXTIS3('=', TokenAddAssign, '+', TokenIncrement, TokenPlus); break;
|
|
|
|
case '-': NEXTIS4('=', TokenSubtractAssign, '>', TokenArrow, '-', TokenDecrement, TokenMinus); break;
|
|
|
|
case '*': GotToken = TokenAsterisk; break;
|
|
|
|
case '/': if (NextChar == '/' || NextChar == '*') LexSkipComment(Lexer, NextChar); else GotToken = TokenSlash; break;
|
|
|
|
case '<': NEXTIS('=', TokenLessEqual, TokenLessThan); break;
|
|
|
|
case '>': NEXTIS('=', TokenGreaterEqual, TokenGreaterThan); break;
|
|
|
|
case ';': GotToken = TokenSemicolon; break;
|
|
|
|
case '&': NEXTIS('&', TokenLogicalAnd, TokenAmpersand); break;
|
|
|
|
case '|': NEXTIS('|', TokenLogicalOr, TokenArithmeticOr); break;
|
|
|
|
case '{': GotToken = TokenLeftBrace; break;
|
|
|
|
case '}': GotToken = TokenRightBrace; break;
|
|
|
|
case '[': GotToken = TokenLeftSquareBracket; break;
|
|
|
|
case ']': GotToken = TokenRightSquareBracket; break;
|
|
|
|
case '!': GotToken = TokenUnaryNot; break;
|
|
|
|
case '^': GotToken = TokenArithmeticExor; break;
|
|
|
|
case '~': GotToken = TokenUnaryExor; break;
|
|
|
|
case ',': GotToken = TokenComma; break;
|
2009-02-20 04:04:45 -05:00
|
|
|
case '.': NEXTISEXACTLY3('.', '.', TokenEllipsis, TokenDot); break;
|
2009-02-18 03:19:06 -05:00
|
|
|
case ':': GotToken = TokenColon; break;
|
2009-02-02 18:45:34 -05:00
|
|
|
default: LexFail(Lexer, "illegal character '%c'", ThisChar); break;
|
2009-02-01 23:53:45 -05:00
|
|
|
}
|
|
|
|
} while (GotToken == TokenNone);
|
2008-10-12 20:53:28 -04:00
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
return GotToken;
|
2008-10-12 20:53:28 -04:00
|
|
|
}
|
|
|
|
|
2009-02-02 19:13:50 -05:00
|
|
|
/* what size value goes with each token */
|
|
|
|
int LexTokenSize(enum LexToken Token)
|
|
|
|
{
|
|
|
|
switch (Token)
|
|
|
|
{
|
|
|
|
case TokenIdentifier: case TokenStringConstant: return sizeof(char *);
|
|
|
|
case TokenIntegerConstant: case TokenCharacterConstant: return sizeof(int);
|
|
|
|
case TokenFPConstant: return sizeof(double);
|
|
|
|
default: return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
/* produce tokens from the lexer and return a heap buffer with the result - used for scanning */
|
2009-02-02 06:50:55 -05:00
|
|
|
void *LexTokenise(struct LexState *Lexer)
|
2009-01-04 23:52:33 -05:00
|
|
|
{
|
2009-02-01 23:53:45 -05:00
|
|
|
enum LexToken Token;
|
|
|
|
void *HeapMem;
|
|
|
|
struct Value *GotValue;
|
2009-02-02 17:33:07 -05:00
|
|
|
int MemUsed = 0;
|
2009-02-02 19:13:50 -05:00
|
|
|
int ValueSize;
|
2009-02-20 04:46:46 -05:00
|
|
|
int ReserveSpace = (Lexer->End - Lexer->Pos) * 3 + 1;
|
|
|
|
void *TokenSpace = HeapAllocStack(ReserveSpace);
|
|
|
|
void *TokenPos = TokenSpace;
|
|
|
|
if (TokenSpace == NULL)
|
|
|
|
LexFail(Lexer, "out of memory");
|
2009-02-01 23:53:45 -05:00
|
|
|
|
|
|
|
do
|
|
|
|
{ /* store the token at the end of the stack area */
|
|
|
|
Token = LexScanGetToken(Lexer, &GotValue);
|
2009-02-03 05:39:48 -05:00
|
|
|
#ifdef DEBUG_LEXER
|
2009-02-02 19:43:13 -05:00
|
|
|
printf("Token: %02x\n", Token);
|
2009-02-03 05:39:48 -05:00
|
|
|
#endif
|
2009-02-20 04:46:46 -05:00
|
|
|
*(unsigned char *)TokenPos = Token;
|
|
|
|
TokenPos++;
|
2009-02-01 23:53:45 -05:00
|
|
|
MemUsed++;
|
|
|
|
|
2009-02-02 19:13:50 -05:00
|
|
|
ValueSize = LexTokenSize(Token);
|
|
|
|
if (ValueSize > 0)
|
2009-02-01 23:53:45 -05:00
|
|
|
{ /* store a value as well */
|
2009-02-20 04:46:46 -05:00
|
|
|
memcpy(TokenPos, GotValue->Val, ValueSize);
|
|
|
|
TokenPos += ValueSize;
|
2009-02-01 23:53:45 -05:00
|
|
|
MemUsed += ValueSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
} while (Token != TokenEOF);
|
|
|
|
|
|
|
|
HeapMem = HeapAlloc(MemUsed);
|
2009-02-20 04:46:46 -05:00
|
|
|
if (HeapMem == NULL)
|
|
|
|
LexFail(Lexer, "out of memory");
|
|
|
|
|
|
|
|
memcpy(HeapMem, TokenSpace, MemUsed);
|
|
|
|
HeapPopStack(TokenSpace, ReserveSpace);
|
2009-02-03 05:39:48 -05:00
|
|
|
#ifdef DEBUG_LEXER
|
2009-02-02 19:43:13 -05:00
|
|
|
{
|
|
|
|
int Count;
|
|
|
|
for (Count = 0; Count < MemUsed; Count++)
|
|
|
|
printf("%02x ", *(unsigned char *)(HeapMem+Count));
|
|
|
|
printf("\n");
|
|
|
|
}
|
2009-02-03 05:39:48 -05:00
|
|
|
#endif
|
2009-02-02 19:43:13 -05:00
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
return HeapMem;
|
2009-02-01 06:31:18 -05:00
|
|
|
}
|
|
|
|
|
2009-02-02 06:50:55 -05:00
|
|
|
/* lexically analyse some source text */
|
|
|
|
void *LexAnalyse(const char *FileName, const char *Source, int SourceLen)
|
|
|
|
{
|
|
|
|
struct LexState Lexer;
|
|
|
|
|
|
|
|
Lexer.Pos = Source;
|
|
|
|
Lexer.End = Source + SourceLen;
|
|
|
|
Lexer.Line = 1;
|
|
|
|
Lexer.FileName = FileName;
|
|
|
|
return LexTokenise(&Lexer);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* prepare to parse a pre-tokenised buffer */
|
2009-02-18 03:19:06 -05:00
|
|
|
void LexInitParser(struct ParseState *Parser, void *TokenSource, const char *FileName, int Line, int RunIt)
|
2009-02-02 06:50:55 -05:00
|
|
|
{
|
|
|
|
Parser->Pos = TokenSource;
|
|
|
|
Parser->Line = Line;
|
|
|
|
Parser->FileName = FileName;
|
2009-02-18 03:19:06 -05:00
|
|
|
Parser->Mode = RunIt ? RunModeRun : RunModeSkip;
|
|
|
|
Parser->SearchLabel = 0;
|
2009-02-02 06:50:55 -05:00
|
|
|
}
|
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
/* get the next token given a parser state */
|
2009-02-01 06:31:18 -05:00
|
|
|
enum LexToken LexGetToken(struct ParseState *Parser, struct Value **Value, int IncPos)
|
|
|
|
{
|
2009-02-01 23:53:45 -05:00
|
|
|
enum LexToken Token;
|
2009-02-02 19:13:50 -05:00
|
|
|
int ValueSize;
|
2009-02-01 06:31:18 -05:00
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
while ((enum LexToken)*(unsigned char *)Parser->Pos == TokenEndOfLine)
|
2009-02-01 06:31:18 -05:00
|
|
|
{ /* skip leading newlines */
|
2009-02-01 23:53:45 -05:00
|
|
|
Parser->Line++;
|
|
|
|
Parser->Pos++;
|
2009-02-01 06:31:18 -05:00
|
|
|
}
|
2009-01-04 23:52:33 -05:00
|
|
|
|
2009-02-01 23:53:45 -05:00
|
|
|
Token = (enum LexToken)*(unsigned char *)Parser->Pos;
|
2009-02-02 19:13:50 -05:00
|
|
|
ValueSize = LexTokenSize(Token);
|
|
|
|
if (ValueSize > 0)
|
|
|
|
{ /* this token requires a value - unpack it */
|
2009-02-01 06:31:18 -05:00
|
|
|
if (Value != NULL)
|
2009-02-02 19:13:50 -05:00
|
|
|
{
|
|
|
|
switch (Token)
|
|
|
|
{
|
|
|
|
case TokenStringConstant: case TokenIdentifier: LexValue.Typ = &StringType; break;
|
|
|
|
case TokenIntegerConstant: LexValue.Typ = &IntType; break;
|
|
|
|
case TokenCharacterConstant: LexValue.Typ = &CharType; break;
|
|
|
|
case TokenFPConstant: LexValue.Typ = &FPType; break;
|
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
|
2009-02-02 19:43:13 -05:00
|
|
|
memcpy(LexValue.Val, Parser->Pos+1, ValueSize);
|
2009-02-02 19:13:50 -05:00
|
|
|
LexValue.ValOnHeap = FALSE;
|
|
|
|
LexValue.ValOnStack = FALSE;
|
2009-02-15 00:52:03 -05:00
|
|
|
LexValue.IsLValue = FALSE;
|
2009-02-01 06:31:18 -05:00
|
|
|
*Value = &LexValue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (IncPos)
|
2009-02-02 19:13:50 -05:00
|
|
|
Parser->Pos += ValueSize + 1;
|
2009-01-04 23:52:33 -05:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2009-02-03 19:17:30 -05:00
|
|
|
if (IncPos && Token != TokenEOF)
|
2009-02-01 06:31:18 -05:00
|
|
|
Parser->Pos++;
|
2009-01-04 23:52:33 -05:00
|
|
|
}
|
|
|
|
|
2009-02-03 05:39:48 -05:00
|
|
|
#ifdef DEBUG_LEXER
|
2009-02-02 19:43:13 -05:00
|
|
|
printf("Got token=%02x inc=%d\n", Token, IncPos);
|
2009-02-03 05:39:48 -05:00
|
|
|
#endif
|
2009-02-01 23:53:45 -05:00
|
|
|
return Token;
|
2008-12-22 06:52:31 -05:00
|
|
|
}
|
|
|
|
|