Lexer now supports interactive input.

Lexer now pre-scans the source and returns a buffer of tokens.
Heap can provide unallocated memory for a temporary buffer.

git-svn-id: http://picoc.googlecode.com/svn/trunk@46 21eae674-98b7-11dd-bd71-f92a316d2d60
This commit is contained in:
zik.saleeba 2009-02-02 04:53:45 +00:00
parent 8bd1c18d24
commit 0c0ac9b332
4 changed files with 175 additions and 108 deletions

13
heap.c
View file

@ -6,12 +6,6 @@
#define FREELIST_BUCKETS 8 /* freelists for 4, 8, 12 ... 32 byte allocs */ #define FREELIST_BUCKETS 8 /* freelists for 4, 8, 12 ... 32 byte allocs */
#define SPLIT_MEM_THRESHOLD 16 /* don't split memory which is close in size */ #define SPLIT_MEM_THRESHOLD 16 /* don't split memory which is close in size */
struct AllocNode
{
int Size;
struct AllocNode *NextFree;
};
static unsigned char HeapMemory[HEAP_SIZE]; /* all memory - stack and heap */ static unsigned char HeapMemory[HEAP_SIZE]; /* all memory - stack and heap */
static void *StackFrame = &HeapMemory; /* the current stack frame */ static void *StackFrame = &HeapMemory; /* the current stack frame */
static void *StackTop = &HeapMemory; /* the top of the stack */ static void *StackTop = &HeapMemory; /* the top of the stack */
@ -61,6 +55,13 @@ int HeapPopStack(void *Addr, int Size)
return TRUE; return TRUE;
} }
/* get all the free space from the top of the stack - only suitable for temporary work */
void *HeapStackGetFreeSpace(int *MemAvailable)
{
*MemAvailable = StackTop - (void *)&HeapMemory;
return StackTop;
}
/* push a new stack frame on to the stack */ /* push a new stack frame on to the stack */
void HeapPushStackFrame() void HeapPushStackFrame()
{ {

197
lex.c
View file

@ -11,9 +11,11 @@
#define isCidstart(c) (isalpha(c) || (c)=='_' || (c)=='#') #define isCidstart(c) (isalpha(c) || (c)=='_' || (c)=='#')
#define isCident(c) (isalnum(c) || (c)=='_') #define isCident(c) (isalnum(c) || (c)=='_')
#define NEXTIS(c,x,y) { if (NextChar == (c)) { Lexer->Pos++; return (x); } else return (y); } #define ISVALUETOKEN(t) ((t) >= TokenIdentifier && (t) <= TokenCharacterConstant)
#define NEXTIS3(c,x,d,y,z) { if (NextChar == (c)) { Lexer->Pos++; return (x); } else NEXTIS(d,y,z) }
#define NEXTIS4(c,x,d,y,e,z,a) { if (NextChar == (c)) { Lexer->Pos++; return (x); } else NEXTIS3(d,y,e,z,a) } #define NEXTIS(c,x,y) { if (NextChar == (c)) { Lexer->Pos++; GotToken = (x); } else GotToken = (y); }
#define NEXTIS3(c,x,d,y,z) { if (NextChar == (c)) { Lexer->Pos++; GotToken = (x); } else NEXTIS(d,y,z) }
#define NEXTIS4(c,x,d,y,e,z,a) { if (NextChar == (c)) { Lexer->Pos++; GotToken = (x); } else NEXTIS3(d,y,e,z,a) }
static union AnyValue LexAnyValue; static union AnyValue LexAnyValue;
static struct Value LexValue = { TypeVoid, &LexAnyValue, FALSE, FALSE }; static struct Value LexValue = { TypeVoid, &LexAnyValue, FALSE, FALSE };
@ -22,35 +24,36 @@ struct ReservedWord
{ {
const char *Word; const char *Word;
enum LexToken Token; enum LexToken Token;
const char *SharedWord; /* word stored in shared string space */
}; };
static struct ReservedWord ReservedWords[] = static struct ReservedWord ReservedWords[] =
{ {
{ "#define", TokenHashDefine }, { "#define", TokenHashDefine, NULL },
{ "#include", TokenHashInclude }, { "#include", TokenHashInclude, NULL },
{ "break", TokenBreak }, { "break", TokenBreak, NULL },
{ "case", TokenCase }, { "case", TokenCase, NULL },
{ "char", TokenCharType }, { "char", TokenCharType, NULL },
{ "default", TokenDefault }, { "default", TokenDefault, NULL },
{ "do", TokenDo }, { "do", TokenDo, NULL },
{ "double", TokenDoubleType }, { "double", TokenDoubleType, NULL },
{ "else", TokenElse }, { "else", TokenElse, NULL },
{ "enum", TokenEnumType }, { "enum", TokenEnumType, NULL },
{ "float", TokenFloatType }, { "float", TokenFloatType, NULL },
{ "for", TokenFor }, { "for", TokenFor, NULL },
{ "if", TokenIf }, { "if", TokenIf, NULL },
{ "int", TokenIntType }, { "int", TokenIntType, NULL },
{ "long", TokenLongType }, { "long", TokenLongType, NULL },
{ "return", TokenReturn }, { "return", TokenReturn, NULL },
{ "signed", TokenSignedType }, { "signed", TokenSignedType, NULL },
{ "short", TokenShortType }, { "short", TokenShortType, NULL },
{ "struct", TokenStructType }, { "struct", TokenStructType, NULL },
{ "switch", TokenSwitch }, { "switch", TokenSwitch, NULL },
{ "typedef", TokenTypedef }, { "typedef", TokenTypedef, NULL },
{ "union", TokenUnionType }, { "union", TokenUnionType, NULL },
{ "unsigned", TokenUnsignedType }, { "unsigned", TokenUnsignedType, NULL },
{ "void", TokenVoidType }, { "void", TokenVoidType, NULL },
{ "while", TokenWhile } { "while", TokenWhile, NULL }
}; };
struct LexState struct LexState
@ -61,14 +64,25 @@ struct LexState
const char *FileName; const char *FileName;
}; };
void LexInit(struct ParseState *Parser, const char *Source, int SourceLen, const char *FileName, int Line)
/* initialise the lexer */
void LexInit()
{ {
Parser->Pos = Source; int Count;
Parser->End = Source + SourceLen;
for (Count = 0; Count < sizeof(ReservedWords) / sizeof(struct ReservedWord); Count++)
ReservedWords[Count].SharedWord = StrRegister(ReservedWords[Count].Word);
}
/* prepare to parse a pre-tokenised buffer */
void LexInitParser(struct ParseState *Parser, void *TokenSource, int TokenSourceLen, const char *FileName, int Line)
{
Parser->Pos = TokenSource;
Parser->Line = Line; Parser->Line = Line;
Parser->FileName = FileName; Parser->FileName = FileName;
} }
/* exit with a message */
void LexFail(struct LexState *Lexer, const char *Message, ...) void LexFail(struct LexState *Lexer, const char *Message, ...)
{ {
va_list Args; va_list Args;
@ -80,19 +94,21 @@ void LexFail(struct LexState *Lexer, const char *Message, ...)
exit(1); exit(1);
} }
/* check if a word is a reserved word - used while scanning */
enum LexToken LexCheckReservedWord(const char *Word) enum LexToken LexCheckReservedWord(const char *Word)
{ {
int Count; int Count;
for (Count = 0; Count < sizeof(ReservedWords) / sizeof(struct ReservedWord); Count++) for (Count = 0; Count < sizeof(ReservedWords) / sizeof(struct ReservedWord); Count++)
{ {
if (strcmp(Word, ReservedWords[Count].Word) == 0) if (Word == ReservedWords[Count].SharedWord)
return ReservedWords[Count].Token; return ReservedWords[Count].Token;
} }
return TokenNone; return TokenNone;
} }
/* skip a comment - used while scanning */
enum LexToken LexGetNumber(struct LexState *Lexer, struct Value *Value) enum LexToken LexGetNumber(struct LexState *Lexer, struct Value *Value)
{ {
int Result = 0; int Result = 0;
@ -124,6 +140,7 @@ enum LexToken LexGetNumber(struct LexState *Lexer, struct Value *Value)
return TokenFPConstant; return TokenFPConstant;
} }
/* get a reserved word or identifier - used while scanning */
enum LexToken LexGetWord(struct LexState *Lexer, struct Value *Value) enum LexToken LexGetWord(struct LexState *Lexer, struct Value *Value)
{ {
const char *Pos = Lexer->Pos + 1; const char *Pos = Lexer->Pos + 1;
@ -143,6 +160,7 @@ enum LexToken LexGetWord(struct LexState *Lexer, struct Value *Value)
return TokenIdentifier; return TokenIdentifier;
} }
/* get a string constant - used while scanning */
enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value *Value) enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value *Value)
{ {
int Escape = FALSE; int Escape = FALSE;
@ -166,6 +184,7 @@ enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value *Value)
return TokenStringConstant; return TokenStringConstant;
} }
/* get a character constant - used while scanning */
enum LexToken LexGetCharacterConstant(struct LexState *Lexer, struct Value *Value) enum LexToken LexGetCharacterConstant(struct LexState *Lexer, struct Value *Value)
{ {
Value->Typ = &IntType; Value->Typ = &IntType;
@ -177,7 +196,8 @@ enum LexToken LexGetCharacterConstant(struct LexState *Lexer, struct Value *Valu
return TokenCharacterConstant; return TokenCharacterConstant;
} }
enum LexToken LexGetComment(struct LexState *Lexer, char NextChar, struct Value *Value) /* skip a comment - used while scanning */
void LexSkipComment(struct LexState *Lexer, char NextChar)
{ {
Lexer->Pos++; Lexer->Pos++;
if (NextChar == '*') if (NextChar == '*')
@ -193,15 +213,17 @@ enum LexToken LexGetComment(struct LexState *Lexer, char NextChar, struct Value
while (Lexer->Pos != Lexer->End && *Lexer->Pos != '\n') while (Lexer->Pos != Lexer->End && *Lexer->Pos != '\n')
Lexer->Pos++; Lexer->Pos++;
} }
return LexGetToken(Lexer, Value);
} }
enum LexToken LexGetTokenToStack(struct LexState *Lexer, struct Value **Value) /* get a single token from the source - used while scanning */
enum LexToken LexScanGetToken(struct LexState *Lexer, struct Value **Value)
{ {
char ThisChar; char ThisChar;
char NextChar; char NextChar;
enum LexToken GotToken = TokenNone;
do
{
if (Lexer->Pos == Lexer->End) if (Lexer->Pos == Lexer->End)
{ {
char LineBuffer[LINEBUFFER_MAX]; char LineBuffer[LINEBUFFER_MAX];
@ -229,60 +251,95 @@ enum LexToken LexGetTokenToStack(struct LexState *Lexer, struct Value **Value)
Lexer->Pos++; Lexer->Pos++;
switch (ThisChar) switch (ThisChar)
{ {
case '"': return LexGetStringConstant(Lexer, *Value); case '"': GotToken = LexGetStringConstant(Lexer, *Value);
case '\'': return LexGetCharacterConstant(Lexer, *Value); case '\'': GotToken = LexGetCharacterConstant(Lexer, *Value);
case '(': return TokenOpenBracket; case '(': GotToken = TokenOpenBracket;
case ')': return TokenCloseBracket; case ')': GotToken = TokenCloseBracket;
case '=': NEXTIS('=', TokenEquality, TokenAssign); case '=': NEXTIS('=', TokenEquality, TokenAssign);
case '+': NEXTIS3('=', TokenAddAssign, '+', TokenIncrement, TokenPlus); case '+': NEXTIS3('=', TokenAddAssign, '+', TokenIncrement, TokenPlus);
case '-': NEXTIS4('=', TokenSubtractAssign, '>', TokenArrow, '-', TokenDecrement, TokenMinus); case '-': NEXTIS4('=', TokenSubtractAssign, '>', TokenArrow, '-', TokenDecrement, TokenMinus);
case '*': return TokenAsterisk; case '*': GotToken = TokenAsterisk;
case '/': if (NextChar == '/' || NextChar == '*') return LexGetComment(Lexer, NextChar, *Value); else return TokenSlash; case '/': if (NextChar == '/' || NextChar == '*') LexSkipComment(Lexer, NextChar); else GotToken = TokenSlash;
case '<': NEXTIS('=', TokenLessEqual, TokenLessThan); case '<': NEXTIS('=', TokenLessEqual, TokenLessThan);
case '>': NEXTIS('=', TokenGreaterEqual, TokenGreaterThan); case '>': NEXTIS('=', TokenGreaterEqual, TokenGreaterThan);
case ';': return TokenSemicolon; case ';': GotToken = TokenSemicolon;
case '&': NEXTIS('&', TokenLogicalAnd, TokenAmpersand); case '&': NEXTIS('&', TokenLogicalAnd, TokenAmpersand);
case '|': NEXTIS('|', TokenLogicalOr, TokenArithmeticOr); case '|': NEXTIS('|', TokenLogicalOr, TokenArithmeticOr);
case '{': return TokenLeftBrace; case '{': GotToken = TokenLeftBrace;
case '}': return TokenRightBrace; case '}': GotToken = TokenRightBrace;
case '[': return TokenLeftSquareBracket; case '[': GotToken = TokenLeftSquareBracket;
case ']': return TokenRightSquareBracket; case ']': GotToken = TokenRightSquareBracket;
case '!': return TokenUnaryNot; case '!': GotToken = TokenUnaryNot;
case '^': return TokenArithmeticExor; case '^': GotToken = TokenArithmeticExor;
case '~': return TokenUnaryExor; case '~': GotToken = TokenUnaryExor;
case ',': return TokenComma; case ',': GotToken = TokenComma;
case '.': return TokenDot; case '.': GotToken = TokenDot;
default: LexFail(Lexer, "illegal character '%c'", ThisChar);
}
} while (GotToken == TokenNone);
return GotToken;
} }
LexFail(Lexer, "illegal character '%c'", ThisChar); /* produce tokens from the lexer and return a heap buffer with the result - used for scanning */
return TokenEOF; void *LexTokeniseToHeap(struct LexState *Lexer)
}
void LexTokeniseToStack(struct LexState *Lexer, struct Value **Value)
{ {
XXX - finish this enum LexToken Token;
void *HeapMem;
struct Value *GotValue;
int MemAvailable;
int MemUsed;
void *TokenSpace = HeapStackGetFreeSpace(&MemAvailable);
do
{ /* store the token at the end of the stack area */
Token = LexScanGetToken(Lexer, &GotValue);
*(char *)TokenSpace = Token;
TokenSpace++;
MemUsed++;
if (ISVALUETOKEN(Token))
{ /* store a value as well */
int ValueSize = sizeof(struct Value) + GotValue->Typ->Sizeof;
if (MemAvailable - MemUsed <= ValueSize)
LexFail(Lexer, "out of memory while lexing");
memcpy(TokenSpace, GotValue, ValueSize);
TokenSpace += ValueSize;
MemUsed += ValueSize;
} }
if (MemAvailable <= MemUsed)
LexFail(Lexer, "out of memory while lexing");
} while (Token != TokenEOF);
if (MemAvailable < MemUsed*2 + sizeof(struct AllocNode)) /* need memory for stack copy + heap copy */
LexFail(Lexer, "out of memory while lexing");
HeapMem = HeapAlloc(MemUsed);
memcpy(HeapMem, HeapStackGetFreeSpace(&MemAvailable), MemUsed);
return HeapMem;
}
/* get the next token given a parser state */
enum LexToken LexGetToken(struct ParseState *Parser, struct Value **Value, int IncPos) enum LexToken LexGetToken(struct ParseState *Parser, struct Value **Value, int IncPos)
{ {
enum LexToken; enum LexToken Token;
while (Parser->Pos != Parser->End && (enum LexToken)*(unsigned char *)Parser->Pos == TokenEndOfLine) while ((enum LexToken)*(unsigned char *)Parser->Pos == TokenEndOfLine)
{ /* skip leading newlines */ { /* skip leading newlines */
Pos->Line++; Parser->Line++;
Pos++; Parser->Pos++;
} }
if (Parser->Pos == Parser->End) Token = (enum LexToken)*(unsigned char *)Parser->Pos;
return TokenEOF; if (ISVALUETOKEN(Token))
LexToken = (enum LexToken)*(unsigned char *)Parser->Pos;
if (LexToken >= TokenIdentifier && LexToken <= TokenCharacterConstant)
{ /* this token requires a value */ { /* this token requires a value */
int ValueLen = sizeof(struct Value) + ((struct Value *)Parser->Pos)->Typ->Sizeof; int ValueLen = sizeof(struct Value) + ((struct Value *)Parser->Pos)->Typ->Sizeof;
if (Value != NULL) if (Value != NULL)
{ /* copy the value out (aligns it in the process) */ { /* copy the value out (aligns it in the process) */
memcpy(LexValue, (struct Value *)Parser->Pos, ValueLen); memcpy(&LexValue, (struct Value *)Parser->Pos, ValueLen);
*Value = &LexValue; *Value = &LexValue;
} }
@ -291,10 +348,10 @@ enum LexToken LexGetToken(struct ParseState *Parser, struct Value **Value, int I
} }
else else
{ {
if (IncPos) if (IncPos && Token != TokenEndOfLine)
Parser->Pos++; Parser->Pos++;
} }
return LexToken; return Token;
} }

View file

@ -65,6 +65,7 @@ int main(int argc, char **argv)
HeapInit(); HeapInit();
StrInit(); StrInit();
LexInit();
ParseInit(); ParseInit();
ScanFile(argv[1]); ScanFile(argv[1]);

12
picoc.h
View file

@ -62,11 +62,17 @@ enum LexToken
TokenHashDefine, TokenHashInclude TokenHashDefine, TokenHashInclude
}; };
/* used in dynamic memory allocation */
struct AllocNode
{
int Size;
struct AllocNode *NextFree;
};
/* parser state - has all this detail so we can parse nested files */ /* parser state - has all this detail so we can parse nested files */
struct ParseState struct ParseState
{ {
const void *Pos; const void *Pos;
const void *End;
int Line; int Line;
const char *FileName; const char *FileName;
}; };
@ -207,7 +213,8 @@ int TableGet(struct Table *Tbl, const char *Key, struct Value **Val);
const char *TableSetKey(struct Table *Tbl, const char *Ident, int IdentLen); const char *TableSetKey(struct Table *Tbl, const char *Ident, int IdentLen);
/* lex.c */ /* lex.c */
void LexInit(struct ParseState *Parser, const char *Source, int SourceLen, const char *FileName, int Line); void LexInit();
void LexInitParser(struct ParseState *Parser, void *TokenSource, int TokenSourceLen, const char *FileName, int Line);
enum LexToken LexGetToken(struct ParseState *Parser, struct Value **Value, int IncPos); enum LexToken LexGetToken(struct ParseState *Parser, struct Value **Value, int IncPos);
void LexToEndOfLine(struct ParseState *Parser); void LexToEndOfLine(struct ParseState *Parser);
@ -231,6 +238,7 @@ void IntrinsicInit(struct Table *GlobalTable);
void HeapInit(); void HeapInit();
void *HeapAllocStack(int Size); void *HeapAllocStack(int Size);
int HeapPopStack(void *Addr, int Size); int HeapPopStack(void *Addr, int Size);
void *HeapStackGetFreeSpace(int *MemAvailable);
void HeapPushStackFrame(); void HeapPushStackFrame();
int HeapPopStackFrame(); int HeapPopStackFrame();
void *HeapAlloc(int Size); void *HeapAlloc(int Size);