2008-10-14 07:46:42 -04:00
|
|
|
#include <stdlib.h>
|
2008-10-13 06:53:25 -04:00
|
|
|
#include <ctype.h>
|
|
|
|
|
2008-10-12 20:53:28 -04:00
|
|
|
#include "picoc.h"
|
|
|
|
|
2008-10-13 06:53:25 -04:00
|
|
|
|
|
|
|
#define isCidstart(c) (isalpha(c) || (c)=='_')
|
2008-10-14 07:46:42 -04:00
|
|
|
#define isCident(c) (isalnum(c) || (c)=='_')
|
2008-10-13 06:53:25 -04:00
|
|
|
|
2008-10-12 20:53:28 -04:00
|
|
|
#define LEXINC Lexer->Pos++
|
|
|
|
#define NEXTIS(c,x,y) { if (NextChar == (c)) { LEXINC; return (x); } else return (y); }
|
|
|
|
#define NEXTIS3(c,x,d,y,z) { if (NextChar == (c)) { LEXINC; return (x); } else NEXTIS(d,y,z) }
|
|
|
|
#define NEXTIS4(c,x,d,y,e,z,a) { if (NextChar == (c)) { LEXINC; return (x); } else NEXTIS3(d,y,e,z,a) }
|
|
|
|
|
2008-10-13 06:53:25 -04:00
|
|
|
|
2008-10-12 20:53:28 -04:00
|
|
|
struct ReservedWord
|
|
|
|
{
|
|
|
|
const char *Word;
|
|
|
|
enum LexToken Token;
|
|
|
|
};
|
|
|
|
|
2008-10-13 06:53:25 -04:00
|
|
|
static struct ReservedWord ReservedWords[] =
|
2008-10-12 20:53:28 -04:00
|
|
|
{
|
|
|
|
{ "char", TokenCharType },
|
|
|
|
{ "do", TokenDo },
|
|
|
|
{ "else", TokenElse },
|
|
|
|
{ "for", TokenFor },
|
|
|
|
{ "if", TokenIf },
|
|
|
|
{ "int", TokenIntType },
|
|
|
|
{ "while", TokenWhile },
|
|
|
|
{ "void", TokenVoidType }
|
|
|
|
};
|
|
|
|
|
2008-10-13 06:53:25 -04:00
|
|
|
void LexInit(struct LexState *Lexer, const Str *Source, const Str *FileName, int Line)
|
2008-10-12 20:53:28 -04:00
|
|
|
{
|
|
|
|
Lexer->Pos = Source->Str;
|
|
|
|
Lexer->End = Source->Str + Source->Len;
|
|
|
|
Lexer->Line = Line;
|
2008-10-13 06:53:25 -04:00
|
|
|
Lexer->FileName = FileName;
|
2008-10-12 20:53:28 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
enum LexToken LexCheckReservedWord(const Str *Word)
|
|
|
|
{
|
|
|
|
int Count;
|
|
|
|
|
|
|
|
for (Count = 0; Count < sizeof(ReservedWords) / sizeof(struct ReservedWord); Count++)
|
|
|
|
{
|
2008-10-13 06:53:25 -04:00
|
|
|
if (StrEqualC(Word, ReservedWords[Count].Word))
|
|
|
|
return ReservedWords[Count].Token;
|
2008-10-12 20:53:28 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return TokenNone;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
enum LexToken LexGetNumber(struct LexState *Lexer)
|
|
|
|
{
|
2008-10-14 07:46:42 -04:00
|
|
|
Lexer->Value.Integer = strtol(Lexer->Pos, &Lexer->Pos, 10);
|
2008-10-13 06:53:25 -04:00
|
|
|
return TokenIntegerConstant;
|
2008-10-12 20:53:28 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
enum LexToken LexGetWord(struct LexState *Lexer)
|
|
|
|
{
|
2008-10-14 07:46:42 -04:00
|
|
|
const char *Pos = Lexer->Pos + 1;
|
|
|
|
enum LexToken Token;
|
|
|
|
|
|
|
|
while (Lexer->Pos != Lexer->End && isCident(*Pos))
|
|
|
|
Pos++;
|
|
|
|
|
|
|
|
Lexer->Value.String.Str = Lexer->Pos;
|
|
|
|
Lexer->Value.String.Len = Pos - Lexer->Pos;
|
|
|
|
Lexer->Pos = Pos;
|
|
|
|
|
|
|
|
Token = LexCheckReservedWord(&Lexer->Value.String);
|
|
|
|
if (Token != TokenNone)
|
|
|
|
return Token;
|
|
|
|
|
2008-10-13 06:53:25 -04:00
|
|
|
return TokenIdentifier;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
enum LexToken LexGetStringConstant(struct LexState *Lexer)
|
|
|
|
{
|
|
|
|
// XXX
|
|
|
|
return TokenStringConstant;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
enum LexToken LexGetCharacterConstant(struct LexState *Lexer)
|
|
|
|
{
|
2008-10-14 07:46:42 -04:00
|
|
|
Lexer->Value.Integer = Lexer->Pos[1];
|
|
|
|
if (Lexer->Pos[2] != '\'')
|
|
|
|
ProgramError(Lexer->FileName, Lexer->Line, "illegal character '%c'", Lexer->Pos[2]);
|
|
|
|
|
|
|
|
Lexer->Pos += 3;
|
2008-10-13 06:53:25 -04:00
|
|
|
return TokenCharacterConstant;
|
2008-10-12 20:53:28 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
enum LexToken LexGetToken(struct LexState *Lexer)
|
|
|
|
{
|
|
|
|
char ThisChar;
|
|
|
|
char NextChar;
|
|
|
|
|
|
|
|
while (Lexer->Pos != Lexer->End && isspace(*Lexer->Pos))
|
|
|
|
Lexer->Pos++;
|
|
|
|
|
|
|
|
if (Lexer->Pos == Lexer->End)
|
|
|
|
return TokenEOF;
|
|
|
|
|
|
|
|
ThisChar = *Lexer->Pos;
|
|
|
|
if (isCidstart(ThisChar))
|
|
|
|
return LexGetWord(Lexer);
|
|
|
|
|
|
|
|
if (isdigit(ThisChar))
|
|
|
|
return LexGetNumber(Lexer);
|
|
|
|
|
|
|
|
NextChar = (Lexer->Pos+1 != Lexer->End) ? *(Lexer->Pos+1) : 0;
|
|
|
|
LEXINC;
|
|
|
|
switch (ThisChar)
|
|
|
|
{
|
|
|
|
case '"': return LexGetStringConstant(Lexer);
|
|
|
|
case '\'': return LexGetCharacterConstant(Lexer);
|
|
|
|
case '(': return TokenOpenBracket;
|
|
|
|
case ')': return TokenCloseBracket;
|
2008-10-13 06:53:25 -04:00
|
|
|
case '=': NEXTIS('=', TokenEquality, TokenAssign);
|
2008-10-12 20:53:28 -04:00
|
|
|
case '+': NEXTIS3('=', TokenAddAssign, '+', TokenIncrement, TokenPlus);
|
|
|
|
case '-': NEXTIS4('=', TokenSubtractAssign, '>', TokenArrow, '-', TokenDecrement, TokenMinus);
|
|
|
|
case '*': return TokenAsterisk;
|
|
|
|
case '/': return TokenSlash;
|
2008-10-13 06:53:25 -04:00
|
|
|
case '<': NEXTIS('=', TokenLessEqual, TokenLessThan);
|
|
|
|
case '>': NEXTIS('=', TokenGreaterEqual, TokenGreaterThan);
|
|
|
|
case ';': return TokenSemicolon;
|
2008-10-12 20:53:28 -04:00
|
|
|
case '&': NEXTIS('&', TokenLogicalAnd, TokenAmpersand);
|
|
|
|
case '|': NEXTIS('|', TokenLogicalOr, TokenArithmeticOr);
|
|
|
|
case '{': return TokenLeftBrace;
|
|
|
|
case '}': return TokenRightBrace;
|
|
|
|
case '[': return TokenLeftAngleBracket;
|
|
|
|
case ']': return TokenRightAngleBracket;
|
|
|
|
case '^': return TokenArithmeticExor;
|
|
|
|
case '~': return TokenUnaryExor;
|
|
|
|
case ',': return TokenComma;
|
|
|
|
case '.': return TokenDot;
|
|
|
|
}
|
|
|
|
|
|
|
|
ProgramError(Lexer->FileName, Lexer->Line, "illegal character '%c'", ThisChar);
|
|
|
|
return TokenEOF;
|
|
|
|
}
|
|
|
|
|