From 86af5318daa90212e53bb473c6f84ef97b453e63 Mon Sep 17 00:00:00 2001 From: "zik.saleeba" Date: Sun, 1 Feb 2009 11:31:18 +0000 Subject: [PATCH] Bulko lexer change for more efficient pre-scanned tokens. Removed Str type - replaced with standard C strings. Added hashed string tables for efficient string storage. git-svn-id: http://picoc.googlecode.com/svn/trunk@43 21eae674-98b7-11dd-bd71-f92a316d2d60 --- Makefile | 2 +- heap.c | 8 +- intrinsic.c | 4 +- lex.c | 171 ++++++++++++----------- parse.c | 386 ++++++++++++++++++++++++++++------------------------ picoc.c | 62 ++++----- picoc.h | 127 ++++++++--------- str.c | 37 ++--- table.c | 48 ++++--- variable.c | 42 +++--- 10 files changed, 463 insertions(+), 424 deletions(-) diff --git a/Makefile b/Makefile index 9b3d3b4..db40e4f 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ CFLAGS=-Wall -g LIBS=-lm TARGET = picoc -SRCS = picoc.c table.c str.c parse.c lex.c intrinsic.c heap.c type.c variable.c +SRCS = picoc.c table.c lex.c parse.c intrinsic.c heap.c type.c variable.c str.c OBJS := $(SRCS:%.c=%.o) all: $(TARGET) diff --git a/heap.c b/heap.c index c94facb..fefa371 100644 --- a/heap.c +++ b/heap.c @@ -88,7 +88,10 @@ void *HeapAlloc(int Size) struct AllocNode *NewMem = NULL; struct AllocNode **FreeNode; int AllocSize = MEM_ALIGN(Size) + sizeof(NewMem->Size); - int Bucket = AllocSize >> 2; + int Bucket = AllocSize >> 2; + + if (Size == 0) + return NULL; if (Bucket < FREELIST_BUCKETS && FreeListBucket[Bucket] != NULL) { /* try to allocate from a freelist bucket first */ @@ -137,6 +140,9 @@ void HeapFree(void *Mem) struct AllocNode *MemNode = (struct AllocNode *)(Mem-sizeof(int)); int Bucket = MemNode->Size >> 2; + if (Mem == NULL) + return; + if ((void *)MemNode == HeapBottom) { /* pop it off the bottom of the heap, reducing the heap size */ HeapBottom += sizeof(int) + MemNode->Size; diff --git a/intrinsic.c b/intrinsic.c index 8cbe8e3..40b48e8 100644 --- a/intrinsic.c +++ b/intrinsic.c @@ -44,9 +44,7 @@ void IntrinsicInit(struct Table *GlobalTable) for (Count = 0; Count < sizeof(Intrinsics) / sizeof(struct IntrinsicFunction); Count++) { - Source.Str = Intrinsics[Count].Prototype; - Source.Len = strlen(Source.Str); - LexInit(&Lexer, &Source, &IntrinsicFilename, Count+1); + LexInit(&Lexer, Intrinsics[Count].Prototype, strlen(Source.Str), &IntrinsicFilename, Count+1); TypeParse(&Lexer, &Typ, &Identifier); IntrinsicReferenceNo[Count] = -1 - Count; IntrinsicValue[Count].Typ = &FunctionType; diff --git a/lex.c b/lex.c index 713e0b0..9809860 100644 --- a/lex.c +++ b/lex.c @@ -1,5 +1,9 @@ +#include +#include #include #include +#include +#include #include "picoc.h" @@ -49,28 +53,47 @@ static struct ReservedWord ReservedWords[] = { "while", TokenWhile } }; -void LexInit(struct LexState *Lexer, const Str *Source, const Str *FileName, int Line) +struct LexState { - Lexer->Pos = Source->Str; - Lexer->End = Source->Str + Source->Len; - Lexer->Line = Line; - Lexer->FileName = FileName; + const char *Pos; + const char *End; + int Line; + const char *FileName; +}; + +void LexInit(struct ParseState *Parser, const char *Source, int SourceLen, const char *FileName, int Line) +{ + Parser->Pos = Source; + Parser->End = Source + SourceLen; + Parser->Line = Line; + Parser->FileName = FileName; } -enum LexToken LexCheckReservedWord(const Str *Word) +void LexFail(struct LexState *Lexer, const char *Message, ...) +{ + va_list Args; + + printf("%s:%d: ", Lexer->FileName, Lexer->Line); + va_start(Args, Message); + vprintf(Message, Args); + printf("\n"); + exit(1); +} + +enum LexToken LexCheckReservedWord(const char *Word) { int Count; for (Count = 0; Count < sizeof(ReservedWords) / sizeof(struct ReservedWord); Count++) { - if (StrEqualC(Word, ReservedWords[Count].Word)) + if (strcmp(Word, ReservedWords[Count].Word) == 0) return ReservedWords[Count].Token; } return TokenNone; } -enum LexToken LexGetNumber(struct LexState *Lexer, struct Value **Value) +enum LexToken LexGetNumber(struct LexState *Lexer, struct Value *Value) { int Result = 0; double FPResult; @@ -79,12 +102,12 @@ enum LexToken LexGetNumber(struct LexState *Lexer, struct Value **Value) for (; Lexer->Pos != Lexer->End && isdigit(*Lexer->Pos); Lexer->Pos++) Result = Result * 10 + (*Lexer->Pos - '0'); - (*Value)->Typ = &IntType; - (*Value)->Val->Integer = Result; + Value->Typ = &IntType; + Value->Val->Integer = Result; if (Lexer->Pos == Lexer->End || *Lexer->Pos != '.') return TokenIntegerConstant; - (*Value)->Typ = &FPType; + Value->Typ = &FPType; Lexer->Pos++; for (FPDiv = 0.1, FPResult = (double)Result; Lexer->Pos != Lexer->End && isdigit(*Lexer->Pos); Lexer->Pos++, FPDiv /= 10.0) FPResult += (*Lexer->Pos - '0') * FPDiv; @@ -101,7 +124,7 @@ enum LexToken LexGetNumber(struct LexState *Lexer, struct Value **Value) return TokenFPConstant; } -enum LexToken LexGetWord(struct LexState *Lexer, struct Value **Value) +enum LexToken LexGetWord(struct LexState *Lexer, struct Value *Value) { const char *Pos = Lexer->Pos + 1; enum LexToken Token; @@ -109,24 +132,24 @@ enum LexToken LexGetWord(struct LexState *Lexer, struct Value **Value) while (Lexer->Pos != Lexer->End && isCident(*Pos)) Pos++; - (*Value)->Typ = &StringType; - (*Value)->Val->String.Str = Lexer->Pos; - (*Value)->Val->String.Len = Pos - Lexer->Pos; + Value->Typ = &StringType; + Value->Val->String = (char *)StrRegister2(Lexer->Pos, Pos - Lexer->Pos); Lexer->Pos = Pos; - Token = LexCheckReservedWord(&(*Value)->Val->String); + Token = LexCheckReservedWord(Value->Val->String); if (Token != TokenNone) return Token; return TokenIdentifier; } -enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value **Value) +enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value *Value) { int Escape = FALSE; + const char *StartPos = Lexer->Pos; - (*Value)->Typ = &StringType; - (*Value)->Val->String.Str = Lexer->Pos; + // XXX - do escaping here + Value->Typ = &StringType; while (Lexer->Pos != Lexer->End && (*Lexer->Pos != '"' || Escape)) { if (Escape) @@ -136,25 +159,25 @@ enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value **Value) Lexer->Pos++; } - (*Value)->Val->String.Len = Lexer->Pos - (*Value)->Val->String.Str; + Value->Val->String = (char *)StrRegister2(StartPos, Lexer->Pos - StartPos); if (*Lexer->Pos == '"') Lexer->Pos++; return TokenStringConstant; } -enum LexToken LexGetCharacterConstant(struct LexState *Lexer, struct Value **Value) +enum LexToken LexGetCharacterConstant(struct LexState *Lexer, struct Value *Value) { - (*Value)->Typ = &IntType; - (*Value)->Val->Integer = Lexer->Pos[1]; + Value->Typ = &IntType; + Value->Val->Integer = Lexer->Pos[1]; if (Lexer->Pos[2] != '\'') - ProgramFail(Lexer, "illegal character '%c'", Lexer->Pos[2]); + LexFail(Lexer, "illegal character '%c'", Lexer->Pos[2]); Lexer->Pos += 3; return TokenCharacterConstant; } -enum LexToken LexGetComment(struct LexState *Lexer, char NextChar, struct Value **Value) +enum LexToken LexGetComment(struct LexState *Lexer, char NextChar, struct Value *Value) { Lexer->Pos++; if (NextChar == '*') @@ -174,11 +197,18 @@ enum LexToken LexGetComment(struct LexState *Lexer, char NextChar, struct Value return LexGetToken(Lexer, Value); } -enum LexToken LexGetTokenUncached(struct LexState *Lexer, struct Value **Value) +enum LexToken LexGetTokenToStack(struct LexState *Lexer, struct Value **Value) { char ThisChar; char NextChar; + if (Lexer->Pos == Lexer->End) + { + char LineBuffer[LINEBUFFER_MAX]; + if (fgets(&LineBuffer[0], LINEBUFFER_MAX, stdin) == NULL) + return TokenEOF; + } + *Value = &LexValue; while (Lexer->Pos != Lexer->End && isspace(*Lexer->Pos)) { @@ -188,29 +218,26 @@ enum LexToken LexGetTokenUncached(struct LexState *Lexer, struct Value **Value) Lexer->Pos++; } - if (Lexer->Pos == Lexer->End) - return TokenEOF; - ThisChar = *Lexer->Pos; if (isCidstart(ThisChar)) - return LexGetWord(Lexer, Value); + return LexGetWord(Lexer, *Value); if (isdigit(ThisChar)) - return LexGetNumber(Lexer, Value); + return LexGetNumber(Lexer, *Value); NextChar = (Lexer->Pos+1 != Lexer->End) ? *(Lexer->Pos+1) : 0; Lexer->Pos++; switch (ThisChar) { - case '"': return LexGetStringConstant(Lexer, Value); - case '\'': return LexGetCharacterConstant(Lexer, Value); + case '"': return LexGetStringConstant(Lexer, *Value); + case '\'': return LexGetCharacterConstant(Lexer, *Value); case '(': return TokenOpenBracket; case ')': return TokenCloseBracket; case '=': NEXTIS('=', TokenEquality, TokenAssign); case '+': NEXTIS3('=', TokenAddAssign, '+', TokenIncrement, TokenPlus); case '-': NEXTIS4('=', TokenSubtractAssign, '>', TokenArrow, '-', TokenDecrement, TokenMinus); case '*': return TokenAsterisk; - case '/': if (NextChar == '/' || NextChar == '*') return LexGetComment(Lexer, NextChar, Value); else return TokenSlash; + case '/': if (NextChar == '/' || NextChar == '*') return LexGetComment(Lexer, NextChar, *Value); else return TokenSlash; case '<': NEXTIS('=', TokenLessEqual, TokenLessThan); case '>': NEXTIS('=', TokenGreaterEqual, TokenGreaterThan); case ';': return TokenSemicolon; @@ -227,59 +254,47 @@ enum LexToken LexGetTokenUncached(struct LexState *Lexer, struct Value **Value) case '.': return TokenDot; } - ProgramFail(Lexer, "illegal character '%c'", ThisChar); + LexFail(Lexer, "illegal character '%c'", ThisChar); return TokenEOF; } -enum LexToken LexGetToken(struct LexState *Lexer, struct Value **Value) +void LexTokeniseToStack(struct LexState *Lexer, struct Value **Value) { - static const char *CachedPos = NULL; - static union AnyValue CachedAnyValue; - static struct Value CachedValue; - static struct LexState CachedLexer; - static enum LexToken CachedToken; + XXX - finish this +} + +enum LexToken LexGetToken(struct ParseState *Parser, struct Value **Value, int IncPos) +{ + enum LexToken; + + while (Parser->Pos != Parser->End && (enum LexToken)*(unsigned char *)Parser->Pos == TokenEndOfLine) + { /* skip leading newlines */ + Pos->Line++; + Pos++; + } - if (Lexer->Pos == CachedPos) - { - *Value = &CachedValue; - CachedValue.Val = &CachedAnyValue; - *Lexer = CachedLexer; + if (Parser->Pos == Parser->End) + return TokenEOF; + + LexToken = (enum LexToken)*(unsigned char *)Parser->Pos; + if (LexToken >= TokenIdentifier && LexToken <= TokenCharacterConstant) + { /* this token requires a value */ + int ValueLen = sizeof(struct Value) + ((struct Value *)Parser->Pos)->Typ->Sizeof; + if (Value != NULL) + { /* copy the value out (aligns it in the process) */ + memcpy(LexValue, (struct Value *)Parser->Pos, ValueLen); + *Value = &LexValue; + } + + if (IncPos) + Parser->Pos += ValueLen + 1; } else { - CachedPos = Lexer->Pos; - CachedToken = LexGetTokenUncached(Lexer, Value); - CachedLexer = *Lexer; - CachedValue = **Value; - CachedAnyValue = *(*Value)->Val; + if (IncPos) + Parser->Pos++; } - return CachedToken; + return LexToken; } -enum LexToken LexGetPlainToken(struct LexState *Lexer) -{ - struct Value *Value; - return LexGetToken(Lexer, &Value); -} - -/* look at the next token without changing the lexer state */ -enum LexToken LexPeekToken(struct LexState *Lexer, struct Value **Value) -{ - struct LexState LocalState = *Lexer; - return LexGetToken(&LocalState, Value); -} - -enum LexToken LexPeekPlainToken(struct LexState *Lexer) -{ - struct LexState LocalState = *Lexer; - struct Value *Value; - return LexGetToken(&LocalState, &Value); -} - -/* skip everything up to the end of the line */ -void LexToEndOfLine(struct LexState *Lexer) -{ - while (Lexer->Pos != Lexer->End && *Lexer->Pos != '\n') - Lexer->Pos++; -} diff --git a/parse.c b/parse.c index c0cc4a8..dd6feb0 100644 --- a/parse.c +++ b/parse.c @@ -8,25 +8,27 @@ int ParameterUsed = 0; struct Value *ReturnValue; /* local prototypes */ -int ParseIntExpression(struct LexState *Lexer, int RunIt); -int ParseStatement(struct LexState *Lexer, int RunIt); -int ParseArguments(struct LexState *Lexer, int RunIt); +int ParseIntExpression(struct ParseState *Parser, int RunIt); +int ParseStatement(struct ParseState *Parser, int RunIt); +int ParseArguments(struct ParseState *Parser, int RunIt); /* initialise the parser */ void ParseInit() { + StrInit(); VariableInit(); IntrinsicInit(&GlobalTable); TypeInit(); } /* parse a parameter list, defining parameters as local variables in the current scope */ -void ParseParameterList(struct LexState *CallLexer, struct LexState *FuncLexer, int RunIt) +void ParseParameterList(struct ParseState *CallLexer, struct FuncDef *Func, int RunIt) { + XXX - fix this struct ValueType *Typ; Str Identifier; - enum LexToken Token = LexGetPlainToken(FuncLexer); /* open bracket */ + enum LexToken Token = LexGetToken(FuncLexer, NULL, TRUE); /* open bracket */ int ParamCount; for (ParamCount = 0; ParamCount < ParameterUsed; ParamCount++) @@ -43,7 +45,7 @@ void ParseParameterList(struct LexState *CallLexer, struct LexState *FuncLexer, } } - Token = LexGetPlainToken(FuncLexer); + Token = LexGetToken(FuncLexer, NULL, TRUE); if (ParamCount < ParameterUsed-1 && Token != TokenComma) ProgramFail(FuncLexer, "comma expected"); } @@ -52,96 +54,97 @@ void ParseParameterList(struct LexState *CallLexer, struct LexState *FuncLexer, ProgramFail(FuncLexer, "')' expected"); if (ParameterUsed == 0) - Token = LexGetPlainToken(FuncLexer); + Token = LexGetToken(FuncLexer, NULL, TRUE); if (Token != TokenCloseBracket) ProgramFail(CallLexer, "wrong number of arguments"); } /* do a function call */ -void ParseFunctionCall(struct LexState *Lexer, struct Value **Result, int ResultOnHeap, Str *FuncName, int RunIt) +void ParseFunctionCall(struct ParseState *Parser, struct Value **Result, int ResultOnHeap, Str *FuncName, int RunIt) { - enum LexToken Token = LexGetPlainToken(Lexer); /* open bracket */ + XXX - fix this + enum LexToken Token = LexGetToken(Parser, NULL, TRUE); /* open bracket */ /* parse arguments */ ParameterUsed = 0; do { - if (ParseExpression(Lexer, &Parameter[ParameterUsed], FALSE, RunIt)) + if (ParseExpression(Parser, &Parameter[ParameterUsed], FALSE, RunIt)) { if (RunIt && ParameterUsed >= PARAMETER_MAX) - ProgramFail(Lexer, "too many arguments"); + ProgramFail(Parser, "too many arguments"); ParameterUsed++; - Token = LexGetPlainToken(Lexer); + Token = LexGetToken(Parser, NULL, TRUE); if (Token != TokenComma && Token != TokenCloseBracket) - ProgramFail(Lexer, "comma expected"); + ProgramFail(Parser, "comma expected"); } else { - Token = LexGetPlainToken(Lexer); + Token = LexGetToken(Parser, NULL, TRUE); if (!TokenCloseBracket) - ProgramFail(Lexer, "bad argument"); + ProgramFail(Parser, "bad argument"); } } while (Token != TokenCloseBracket); if (RunIt) { - struct LexState FuncLexer; + struct ParseState FuncLexer; struct ValueType *ReturnType; struct Value *FuncValue; Str FuncName; int Count; - VariableGet(Lexer, &FuncName, &FuncValue); + VariableGet(Parser, &FuncName, &FuncValue); if ((*Result)->Typ->Base != TypeFunction) - ProgramFail(Lexer, "not a function - can't call"); + ProgramFail(Parser, "not a function - can't call"); - VariableStackFrameAdd(Lexer); - if (FuncValue->Val->Lexer.Line >= 0) - FuncLexer = FuncValue->Val->Lexer; + VariableStackFrameAdd(Parser); + if (FuncValue->Val->Parser.Line >= 0) + FuncLexer = FuncValue->Val->Parser; else - IntrinsicGetLexer(&FuncLexer, FuncValue->Val->Lexer.Line); + IntrinsicGetLexer(&FuncLexer, FuncValue->Val->Parser.Line); TypeParse(&FuncLexer, &ReturnType, &FuncName); /* get the return type */ - *Result = VariableAllocValueFromType(Lexer, ReturnType, ResultOnHeap); - ParseParameterList(Lexer, &FuncLexer, TRUE); /* parameters */ - if (FuncValue->Val->Lexer.Line >= 0) + *Result = VariableAllocValueFromType(Parser, ReturnType, ResultOnHeap); + ParseParameterList(Parser, &FuncLexer, TRUE); /* parameters */ + if (FuncValue->Val->Parser.Line >= 0) { /* run a user-defined function */ - if (LexPeekPlainToken(&FuncLexer) != TokenLeftBrace || !ParseStatement(&FuncLexer, TRUE)) + if (LexGetToken(&FuncLexer, NULL, FALSE) != TokenLeftBrace || !ParseStatement(&FuncLexer, TRUE)) ProgramFail(&FuncLexer, "function body expected"); if (ReturnType != (*Result)->Typ) ProgramFail(&FuncLexer, "bad return value"); } else - IntrinsicCall(Lexer, *Result, ReturnType, (*Result)->Val->Lexer.Line); + IntrinsicCall(Parser, *Result, ReturnType, (*Result)->Val->Parser.Line); - VariableStackFramePop(Lexer); + VariableStackFramePop(Parser); for (Count = ParameterUsed-1; Count >= 0; Count--) /* free stack space used by parameters */ - VariableStackPop(Lexer, Parameter[Count]); + VariableStackPop(Parser, Parameter[Count]); } } /* parse a single value */ -int ParseValue(struct LexState *Lexer, struct Value **Result, int ResultOnHeap, int RunIt) +int ParseValue(struct ParseState *Parser, struct Value **Result, int ResultOnHeap, int RunIt) { - struct LexState PreState = *Lexer; + struct ParseState PreState = *Parser; struct Value *LexValue; int IntValue; - enum LexToken Token = LexGetToken(Lexer, &LexValue); + enum LexToken Token = LexGetToken(Parser, &LexValue, TRUE); switch (Token) { case TokenIntegerConstant: case TokenCharacterConstant: case TokenFPConstant: case TokenStringConstant: - *Result = VariableAllocValueAndCopy(Lexer, LexValue, ResultOnHeap); + *Result = VariableAllocValueAndCopy(Parser, LexValue, ResultOnHeap); break; case TokenMinus: case TokenUnaryExor: case TokenUnaryNot: - IntValue = ParseIntExpression(Lexer, RunIt); + IntValue = ParseIntExpression(Parser, RunIt); if (RunIt) { - *Result = VariableAllocValueFromType(Lexer, &IntType, ResultOnHeap); + *Result = VariableAllocValueFromType(Parser, &IntType, ResultOnHeap); switch(Token) { case TokenMinus: (*Result)->Val->Integer = -IntValue; break; @@ -153,73 +156,73 @@ int ParseValue(struct LexState *Lexer, struct Value **Result, int ResultOnHeap, break; case TokenOpenBracket: - if (!ParseExpression(Lexer, Result, ResultOnHeap, RunIt)) - ProgramFail(Lexer, "invalid expression"); + if (!ParseExpression(Parser, Result, ResultOnHeap, RunIt)) + ProgramFail(Parser, "invalid expression"); - if (LexGetPlainToken(Lexer) != TokenCloseBracket) - ProgramFail(Lexer, "')' expected"); + if (LexGetToken(Parser, NULL, TRUE) != TokenCloseBracket) + ProgramFail(Parser, "')' expected"); break; case TokenAsterisk: case TokenAmpersand: - ProgramFail(Lexer, "not implemented"); + ProgramFail(Parser, "not implemented"); case TokenIdentifier: - if (LexPeekPlainToken(Lexer) == TokenOpenBracket) - ParseFunctionCall(Lexer, Result, ResultOnHeap, &LexValue->Val->String, RunIt); + if (LexGetToken(Parser, NULL, FALSE) == TokenOpenBracket) + ParseFunctionCall(Parser, Result, ResultOnHeap, &LexValue->Val->String, RunIt); else { if (RunIt) { struct Value *IdentValue; - VariableGet(Lexer, &LexValue->Val->String, &IdentValue); + VariableGet(Parser, &LexValue->Val->String, &IdentValue); if (IdentValue->Typ->Base == TypeMacro) { - struct LexState MacroLexer = IdentValue->Val->Lexer; + struct ParseState MacroLexer = IdentValue->Val->Parser; if (!ParseExpression(&MacroLexer, Result, ResultOnHeap, TRUE)) ProgramFail(&MacroLexer, "expression expected"); } else if (!ISVALUETYPE(IdentValue->Typ)) - ProgramFail(Lexer, "bad variable type"); + ProgramFail(Parser, "bad variable type"); } } break; default: - *Lexer = PreState; + *Parser = PreState; return FALSE; } return TRUE; } -struct Value *ParsePushFP(struct LexState *Lexer, int ResultOnHeap, double NewFP) +struct Value *ParsePushFP(struct ParseState *Parser, int ResultOnHeap, double NewFP) { - struct Value *Val = VariableAllocValueFromType(Lexer, &FPType, ResultOnHeap); + struct Value *Val = VariableAllocValueFromType(Parser, &FPType, ResultOnHeap); Val->Val->FP = NewFP; return Val; } -struct Value *ParsePushInt(struct LexState *Lexer, int ResultOnHeap, int NewInt) +struct Value *ParsePushInt(struct ParseState *Parser, int ResultOnHeap, int NewInt) { - struct Value *Val = VariableAllocValueFromType(Lexer, &IntType, ResultOnHeap); + struct Value *Val = VariableAllocValueFromType(Parser, &IntType, ResultOnHeap); Val->Val->Integer = NewInt; return Val; } /* parse an expression. operator precedence is not supported */ -int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnHeap, int RunIt) +int ParseExpression(struct ParseState *Parser, struct Value **Result, int ResultOnHeap, int RunIt) { struct Value *CurrentValue; struct Value *TotalValue; - if (!ParseValue(Lexer, &TotalValue, ResultOnHeap, RunIt)) + if (!ParseValue(Parser, &TotalValue, ResultOnHeap, RunIt)) return FALSE; while (TRUE) { - enum LexToken Token = LexPeekPlainToken(Lexer); + enum LexToken Token = LexGetToken(Parser, NULL, FALSE); switch (Token) { case TokenPlus: case TokenMinus: case TokenAsterisk: case TokenSlash: @@ -227,18 +230,18 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH case TokenLessEqual: case TokenGreaterEqual: case TokenLogicalAnd: case TokenLogicalOr: case TokenAmpersand: case TokenArithmeticOr: case TokenArithmeticExor: case TokenDot: - LexGetPlainToken(Lexer); + LexGetToken(Parser, NULL, TRUE); break; case TokenAssign: case TokenAddAssign: case TokenSubtractAssign: - LexGetPlainToken(Lexer); - if (!ParseExpression(Lexer, &CurrentValue, ResultOnHeap, RunIt)) - ProgramFail(Lexer, "expression expected"); + LexGetToken(Parser, NULL, TRUE); + if (!ParseExpression(Parser, &CurrentValue, ResultOnHeap, RunIt)) + ProgramFail(Parser, "expression expected"); if (RunIt) { if (CurrentValue->Typ->Base != TypeInt || TotalValue->Typ->Base != TypeInt) - ProgramFail(Lexer, "can't assign"); + ProgramFail(Parser, "can't assign"); switch (Token) { @@ -246,7 +249,7 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH case TokenSubtractAssign: TotalValue->Val->Integer -= CurrentValue->Val->Integer; break; default: TotalValue->Val->Integer = CurrentValue->Val->Integer; break; } - VariableStackPop(Lexer, CurrentValue); + VariableStackPop(Parser, CurrentValue); } // fallthrough @@ -256,7 +259,7 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH return TRUE; } - if (!ParseValue(Lexer, &CurrentValue, ResultOnHeap, RunIt)) + if (!ParseValue(Parser, &CurrentValue, ResultOnHeap, RunIt)) return FALSE; if (RunIt) @@ -272,39 +275,39 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH else if (CurrentValue->Typ->Base == TypeFP) FPCurrent = CurrentValue->Val->FP; else - ProgramFail(Lexer, "bad type for operator"); + ProgramFail(Parser, "bad type for operator"); if (TotalValue->Typ->Base == TypeInt) FPTotal = (double)TotalValue->Val->Integer; else if (TotalValue->Typ->Base == TypeFP) FPTotal = TotalValue->Val->FP; else - ProgramFail(Lexer, "bad type for operator"); + ProgramFail(Parser, "bad type for operator"); } - VariableStackPop(Lexer, CurrentValue); - VariableStackPop(Lexer, TotalValue); + VariableStackPop(Parser, CurrentValue); + VariableStackPop(Parser, TotalValue); switch (Token) { - case TokenPlus: TotalValue = ParsePushFP(Lexer, ResultOnHeap, FPTotal + FPCurrent); break; - case TokenMinus: TotalValue = ParsePushFP(Lexer, ResultOnHeap, FPTotal - FPCurrent); break; - case TokenAsterisk: TotalValue = ParsePushFP(Lexer, ResultOnHeap, FPTotal * FPCurrent); break; - case TokenSlash: TotalValue = ParsePushFP(Lexer, ResultOnHeap, FPTotal / FPCurrent); break; - case TokenEquality: TotalValue = ParsePushInt(Lexer, ResultOnHeap, FPTotal == FPCurrent); break; - case TokenLessThan: TotalValue = ParsePushInt(Lexer, ResultOnHeap, FPTotal < FPCurrent); break; - case TokenGreaterThan: TotalValue = ParsePushInt(Lexer, ResultOnHeap, FPTotal > FPCurrent); break; - case TokenLessEqual: TotalValue = ParsePushInt(Lexer, ResultOnHeap, FPTotal <= FPCurrent); break; - case TokenGreaterEqual: TotalValue = ParsePushInt(Lexer, ResultOnHeap, FPTotal >= FPCurrent); break; - case TokenLogicalAnd: case TokenLogicalOr: case TokenAmpersand: case TokenArithmeticOr: case TokenArithmeticExor: ProgramFail(Lexer, "bad type for operator"); break; - case TokenDot: ProgramFail(Lexer, "operator not supported"); break; + case TokenPlus: TotalValue = ParsePushFP(Parser, ResultOnHeap, FPTotal + FPCurrent); break; + case TokenMinus: TotalValue = ParsePushFP(Parser, ResultOnHeap, FPTotal - FPCurrent); break; + case TokenAsterisk: TotalValue = ParsePushFP(Parser, ResultOnHeap, FPTotal * FPCurrent); break; + case TokenSlash: TotalValue = ParsePushFP(Parser, ResultOnHeap, FPTotal / FPCurrent); break; + case TokenEquality: TotalValue = ParsePushInt(Parser, ResultOnHeap, FPTotal == FPCurrent); break; + case TokenLessThan: TotalValue = ParsePushInt(Parser, ResultOnHeap, FPTotal < FPCurrent); break; + case TokenGreaterThan: TotalValue = ParsePushInt(Parser, ResultOnHeap, FPTotal > FPCurrent); break; + case TokenLessEqual: TotalValue = ParsePushInt(Parser, ResultOnHeap, FPTotal <= FPCurrent); break; + case TokenGreaterEqual: TotalValue = ParsePushInt(Parser, ResultOnHeap, FPTotal >= FPCurrent); break; + case TokenLogicalAnd: case TokenLogicalOr: case TokenAmpersand: case TokenArithmeticOr: case TokenArithmeticExor: ProgramFail(Parser, "bad type for operator"); break; + case TokenDot: ProgramFail(Parser, "operator not supported"); break; default: break; } } else { if (CurrentValue->Typ->Base != TypeInt || TotalValue->Typ->Base != TypeInt) - ProgramFail(Lexer, "bad operand types"); + ProgramFail(Parser, "bad operand types"); /* integer arithmetic */ switch (Token) @@ -323,11 +326,11 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH case TokenAmpersand: TotalValue->Val->Integer = TotalValue->Val->Integer & CurrentValue->Val->Integer; break; case TokenArithmeticOr: TotalValue->Val->Integer = TotalValue->Val->Integer | CurrentValue->Val->Integer; break; case TokenArithmeticExor: TotalValue->Val->Integer = TotalValue->Val->Integer ^ CurrentValue->Val->Integer; break; - case TokenDot: ProgramFail(Lexer, "operator not supported"); break; + case TokenDot: ProgramFail(Parser, "operator not supported"); break; default: break; } } - VariableStackPop(Lexer, CurrentValue); + VariableStackPop(Parser, CurrentValue); *Result = TotalValue; } } @@ -336,123 +339,157 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH } /* parse an expression. operator precedence is not supported */ -int ParseIntExpression(struct LexState *Lexer, int RunIt) +int ParseIntExpression(struct ParseState *Parser, int RunIt) { struct Value *Val; int Result = 0; - if (!ParseExpression(Lexer, &Val, FALSE, RunIt)) - ProgramFail(Lexer, "expression expected"); + if (!ParseExpression(Parser, &Val, FALSE, RunIt)) + ProgramFail(Parser, "expression expected"); if (RunIt) { if (Val->Typ->Base != TypeInt) - ProgramFail(Lexer, "integer value expected"); + ProgramFail(Parser, "integer value expected"); Result = Val->Val->Integer; - VariableStackPop(Lexer, Val); + VariableStackPop(Parser, Val); } return Result; } /* parse a function definition and store it for later */ -void ParseFunctionDefinition(struct LexState *Lexer, Str *Identifier, struct LexState *PreState) +void ParseFunctionDefinition(struct ParseState *Parser, struct ValueType *ReturnType, Str *Identifier) { - struct Value *FuncValue = VariableAllocValueAndData(Lexer, sizeof(struct LexState), TRUE); + struct ValueType *ParamTyp; + Str Identifier; + enum LexToken Token; + struct Value *FuncValue; + struct ParseState ParamParser; + int ParamCount = 0; - FuncValue->Val->Lexer = *PreState; - LexGetPlainToken(Lexer); - if (LexGetPlainToken(Lexer) != TokenCloseBracket || LexPeekPlainToken(Lexer) != TokenLeftBrace) - ProgramFail(Lexer, "bad function definition"); + LexGetToken(Parser, NULL, TRUE); /* open bracket */ + ParamParser = *Parser; + Token = LexGetToken(Parser, NULL, TRUE); + if (Token != TokenCloseBracket && Token != TokenEOF) + { /* count the number of parameters */ + ParamCount++; + while ((Token = LexGetToken(Parser, NULL, TRUE)) != TokenCloseBracket && Token != TokenEOF) + { + if (Token == TokenComma) + ParamCount++; + } + } - if (!ParseStatement(Lexer, FALSE)) - ProgramFail(Lexer, "function definition expected"); - - FuncValue->Val->Lexer.End = Lexer->Pos; + FuncValue = VariableAllocValueAndData(Parser, sizeof(struct FuncDef) + sizeof(struct ValueType *) * ParamCount + sizeof(Str *) * ParamCount, TRUE); FuncValue->Typ = &FunctionType; + FuncValue->Val->FuncDef.ReturnType = ReturnType; + FuncValue->Val->FuncDef.NumParams = ParamCount; + FuncValue->Val->FuncDef.ParamType = (void *)FuncValue->Val + sizeof(struct FuncDef); + FuncValue->Val->FuncDef.ParamName = (void *)FuncValue->Val->FuncDef.ParamType + sizeof(struct ValueType *) * ParamCount; + FuncValue->Val->FuncDef.Body = *Parser; + + for (ParamCount = 0; ParamCount < FuncValue->Val->FuncDef.NumParams; ParamCount++) + { /* harvest the parameters into the function definition */ + TypeParse(ParamParser, &Typ, &Identifier); + FuncValue->Val->FuncDef.ParamType[ParamCount] = Typ; + FuncValue->Val->FuncDef.ParamName[ParamCount] = Typ; + + Token = LexGetToken(ParamParser, NULL, TRUE); + if (Token != TokenComma) + ProgramFail(ParamParser, "comma expected"); + } + + if (LexGetToken(Parser, NULL, FALSE) != TokenLeftBrace) + ProgramFail(Parser, "bad function definition"); + + if (!ParseStatement(Parser, FALSE)) + ProgramFail(Parser, "function definition expected"); + + FuncValue->Val->FuncDef.Body.End = Parser->Pos; if (!TableSet(&GlobalTable, Identifier, FuncValue)) - ProgramFail(Lexer, "'%S' is already defined", Identifier); + ProgramFail(Parser, "'%S' is already defined", Identifier); } /* parse a #define macro definition and store it for later */ -void ParseMacroDefinition(struct LexState *Lexer) +void ParseMacroDefinition(struct ParseState *Parser) { + XXX - fix this struct Value *MacroName; - struct Value *MacroValue = VariableAllocValueAndData(Lexer, sizeof(struct LexState), TRUE); + struct Value *MacroValue = VariableAllocValueAndData(Parser, sizeof(struct ParseState), TRUE); - if (LexGetToken(Lexer, &MacroName) != TokenIdentifier) - ProgramFail(Lexer, "identifier expected"); + if (LexGetToken(Parser, &MacroName, TRUE) != TokenIdentifier) + ProgramFail(Parser, "identifier expected"); - MacroValue->Val->Lexer = *Lexer; - LexToEndOfLine(Lexer); - MacroValue->Val->Lexer.End = Lexer->Pos; + MacroValue->Val->Parser = *Parser; + MacroValue->Val->Parser.End = Parser->Pos; MacroValue->Typ = &MacroType; if (!TableSet(&GlobalTable, &MacroName->Val->String, MacroValue)) - ProgramFail(Lexer, "'%S' is already defined", &MacroName->Val->String); + ProgramFail(Parser, "'%S' is already defined", &MacroName->Val->String); } -void ParseFor(struct LexState *Lexer, int RunIt) +void ParseFor(struct ParseState *Parser, int RunIt) { int Condition; - struct LexState PreConditional; - struct LexState PreIncrement; - struct LexState PreStatement; - struct LexState After; + struct ParseState PreConditional; + struct ParseState PreIncrement; + struct ParseState PreStatement; + struct ParseState After; - if (LexGetPlainToken(Lexer) != TokenOpenBracket) - ProgramFail(Lexer, "'(' expected"); + if (LexGetToken(Parser, NULL, TRUE) != TokenOpenBracket) + ProgramFail(Parser, "'(' expected"); - if (!ParseStatement(Lexer, RunIt)) - ProgramFail(Lexer, "statement expected"); + if (!ParseStatement(Parser, RunIt)) + ProgramFail(Parser, "statement expected"); - PreConditional = *Lexer; - Condition = ParseIntExpression(Lexer, RunIt); + PreConditional = *Parser; + Condition = ParseIntExpression(Parser, RunIt); - if (LexGetPlainToken(Lexer) != TokenSemicolon) - ProgramFail(Lexer, "';' expected"); + if (LexGetToken(Parser, NULL, TRUE) != TokenSemicolon) + ProgramFail(Parser, "';' expected"); - PreIncrement = *Lexer; - ParseStatement(Lexer, FALSE); + PreIncrement = *Parser; + ParseStatement(Parser, FALSE); - if (LexGetPlainToken(Lexer) != TokenCloseBracket) - ProgramFail(Lexer, "')' expected"); + if (LexGetToken(Parser, NULL, TRUE) != TokenCloseBracket) + ProgramFail(Parser, "')' expected"); - PreStatement = *Lexer; - if (!ParseStatement(Lexer, RunIt && Condition)) - ProgramFail(Lexer, "statement expected"); + PreStatement = *Parser; + if (!ParseStatement(Parser, RunIt && Condition)) + ProgramFail(Parser, "statement expected"); - After = *Lexer; + After = *Parser; while (Condition && RunIt) { - *Lexer = PreIncrement; - ParseStatement(Lexer, TRUE); + *Parser = PreIncrement; + ParseStatement(Parser, TRUE); - *Lexer = PreConditional; - Condition = ParseIntExpression(Lexer, RunIt); + *Parser = PreConditional; + Condition = ParseIntExpression(Parser, RunIt); if (Condition) { - *Lexer = PreStatement; - ParseStatement(Lexer, TRUE); + *Parser = PreStatement; + ParseStatement(Parser, TRUE); } } - *Lexer = After; + *Parser = After; } /* parse a statement */ -int ParseStatement(struct LexState *Lexer, int RunIt) +int ParseStatement(struct ParseState *Parser, int RunIt) { struct Value *CValue; int Condition; - struct LexState PreState = *Lexer; + struct ParseState PreState = *Parser; Str Identifier; struct ValueType *Typ; - enum LexToken Token = LexGetPlainToken(Lexer); + enum LexToken Token = LexGetToken(Parser, NULL, TRUE); switch (Token) { @@ -460,43 +497,43 @@ int ParseStatement(struct LexState *Lexer, int RunIt) return FALSE; case TokenIdentifier: - *Lexer = PreState; - ParseExpression(Lexer, &CValue, FALSE, RunIt); - if (RunIt) VariableStackPop(Lexer, CValue); + *Parser = PreState; + ParseExpression(Parser, &CValue, FALSE, RunIt); + if (RunIt) VariableStackPop(Parser, CValue); break; case TokenLeftBrace: - while (ParseStatement(Lexer, RunIt)) + while (ParseStatement(Parser, RunIt)) {} - if (LexGetPlainToken(Lexer) != TokenRightBrace) - ProgramFail(Lexer, "'}' expected"); + if (LexGetToken(Parser, NULL, TRUE) != TokenRightBrace) + ProgramFail(Parser, "'}' expected"); break; case TokenIf: - Condition = ParseIntExpression(Lexer, RunIt); + Condition = ParseIntExpression(Parser, RunIt); - if (!ParseStatement(Lexer, RunIt && Condition)) - ProgramFail(Lexer, "statement expected"); + if (!ParseStatement(Parser, RunIt && Condition)) + ProgramFail(Parser, "statement expected"); - if (LexPeekPlainToken(Lexer) == TokenElse) + if (LexGetToken(Parser, NULL, FALSE) == TokenElse) { - LexGetPlainToken(Lexer); - if (!ParseStatement(Lexer, RunIt && !Condition)) - ProgramFail(Lexer, "statement expected"); + LexGetToken(Parser, NULL, TRUE); + if (!ParseStatement(Parser, RunIt && !Condition)) + ProgramFail(Parser, "statement expected"); } break; case TokenWhile: { - struct LexState PreConditional = *Lexer; + struct ParseState PreConditional = *Parser; do { - *Lexer = PreConditional; - Condition = ParseIntExpression(Lexer, RunIt); + *Parser = PreConditional; + Condition = ParseIntExpression(Parser, RunIt); - if (!ParseStatement(Lexer, RunIt && Condition)) - ProgramFail(Lexer, "statement expected"); + if (!ParseStatement(Parser, RunIt && Condition)) + ProgramFail(Parser, "statement expected"); } while (RunIt && Condition); } @@ -504,21 +541,21 @@ int ParseStatement(struct LexState *Lexer, int RunIt) case TokenDo: { - struct LexState PreStatement = *Lexer; + struct ParseState PreStatement = *Parser; do { - *Lexer = PreStatement; - if (!ParseStatement(Lexer, RunIt)) - ProgramFail(Lexer, "statement expected"); + *Parser = PreStatement; + if (!ParseStatement(Parser, RunIt)) + ProgramFail(Parser, "statement expected"); - Condition = ParseIntExpression(Lexer, RunIt); + Condition = ParseIntExpression(Parser, RunIt); } while (Condition && RunIt); } break; case TokenFor: - ParseFor(Lexer, RunIt); + ParseFor(Parser, RunIt); break; case TokenSemicolon: break; @@ -528,30 +565,29 @@ int ParseStatement(struct LexState *Lexer, int RunIt) case TokenFloatType: case TokenDoubleType: case TokenVoidType: - *Lexer = PreState; - TypeParse(Lexer, &Typ, &Identifier); + *Parser = PreState; + TypeParse(Parser, &Typ, &Identifier); if (Identifier.Len == 0) - ProgramFail(Lexer, "identifier expected"); + ProgramFail(Parser, "identifier expected"); /* handle function definitions */ - if (LexPeekPlainToken(Lexer) == TokenOpenBracket) - ParseFunctionDefinition(Lexer, &Identifier, &PreState); + if (LexGetToken(Parser, NULL, FALSE) == TokenOpenBracket) + ParseFunctionDefinition(Parser, &Typ, &Identifier); else - VariableDefine(Lexer, &Identifier, VariableAllocValueFromType(Lexer, Typ, FALSE)); + VariableDefine(Parser, &Identifier, VariableAllocValueFromType(Parser, Typ, FALSE)); break; case TokenHashDefine: - ParseMacroDefinition(Lexer); + ParseMacroDefinition(Parser); break; case TokenHashInclude: { struct Value *LexerValue; - if (LexGetToken(Lexer, &LexerValue) != TokenStringConstant) - ProgramFail(Lexer, "\"filename.h\" expected"); + if (LexGetToken(Parser, &LexerValue, TRUE) != TokenStringConstant) + ProgramFail(Parser, "\"filename.h\" expected"); ScanFile(&LexerValue->Val->String); - LexToEndOfLine(Lexer); break; } @@ -560,11 +596,11 @@ int ParseStatement(struct LexState *Lexer, int RunIt) case TokenBreak: case TokenReturn: case TokenDefault: - ProgramFail(Lexer, "not implemented yet"); + ProgramFail(Parser, "not implemented yet"); break; default: - *Lexer = PreState; + *Parser = PreState; return FALSE; } @@ -572,15 +608,15 @@ int ParseStatement(struct LexState *Lexer, int RunIt) } /* quick scan a source file for definitions */ -void Parse(const Str *FileName, const Str *Source, int RunIt) +void Parse(const Str *FileName, const Str *Source, int SourceLen, int RunIt) { - struct LexState Lexer; + struct ParseState Parser; - LexInit(&Lexer, Source, FileName, 1); + LexInit(&Parser, Source, SourceLen, FileName, 1); - while (ParseStatement(&Lexer, RunIt)) + while (ParseStatement(&Parser, RunIt)) {} - if (Lexer.Pos != Lexer.End) - ProgramFail(&Lexer, "parse error"); + if (Parser.Pos != Parser.End) + ProgramFail(&Parser, "parse error"); } diff --git a/picoc.c b/picoc.c index eed23c6..f7f1c96 100644 --- a/picoc.c +++ b/picoc.c @@ -9,79 +9,63 @@ /* all platform-dependent code is in this file */ -void Fail(const char *Message, ...) -{ - va_list Args; - - va_start(Args, Message); - vStrPrintf(Message, Args); - exit(1); -} - -void ProgramFail(struct LexState *Lexer, const char *Message, ...) +void ProgramFail(struct ParseState *Parser, const char *Message, ...) { va_list Args; - if (Lexer != NULL) - StrPrintf("%S:%d: ", Lexer->FileName, Lexer->Line); + if (Parser != NULL) + printf("%s:%d: ", Parser->FileName, Parser->Line); va_start(Args, Message); - vStrPrintf(Message, Args); - StrPrintf("\n"); + vprintf(Message, Args); + printf("\n"); exit(1); } -/* read a file into memory. this is the only function using malloc(). - * do it differently for embedded devices without malloc */ -Str ReadFile(const Str *FileName) +/* read a file into memory */ +char *ReadFile(const char *FileName) { struct stat FileInfo; char *ReadText; - Str Text; FILE *InFile; - char CFileName[PATH_MAX]; - StrToC(CFileName, PATH_MAX, FileName); + if (stat(FileName, &FileInfo)) + ProgramFail(NULL, "can't read file %s\n", FileName); - if (stat(CFileName, &FileInfo)) - Fail("can't read file %s\n", CFileName); - - ReadText = malloc(FileInfo.st_size); + ReadText = HeapAlloc(FileInfo.st_size); if (ReadText == NULL) - Fail("out of memory\n"); + ProgramFail(NULL, "out of memory\n"); - InFile = fopen(CFileName, "r"); + InFile = fopen(FileName, "r"); if (InFile == NULL) - Fail("can't read file %s\n", CFileName); + ProgramFail(NULL, "can't read file %s\n", FileName); if (fread(ReadText, 1, FileInfo.st_size, InFile) != FileInfo.st_size) - Fail("can't read file %s\n", CFileName); + ProgramFail(NULL, "can't read file %s\n", FileName); - Text.Str = ReadText; - Text.Len = FileInfo.st_size; fclose(InFile); - return Text; + return ReadText; } /* read and scan a file for definitions */ -void ScanFile(const Str *FileName) +void ScanFile(const char *FileName) { - Str SourceStr = ReadFile(FileName); - Parse(FileName, &SourceStr, TRUE); + char *SourceStr = ReadFile(FileName); + Parse(FileName, SourceStr, TRUE); + HeapFree(SourceStr); } int main(int argc, char **argv) { - Str FileName; - if (argc < 2) - Fail("Format: picoc ...\n"); + ProgramFail(NULL, "Format: picoc ...\n"); + HeapInit(); + StrInit(); ParseInit(); - StrFromC(&FileName, argv[1]); - ScanFile(&FileName); + ScanFile(argv[1]); return 0; } diff --git a/picoc.h b/picoc.h index 6f9e289..6141960 100644 --- a/picoc.h +++ b/picoc.h @@ -5,15 +5,16 @@ /* configurable options */ #define HEAP_SIZE 2048 /* space for the heap and the stack */ -#define GLOBAL_TABLE_SIZE 397 /* global variable table */ -#define FUNCTION_STORE_MAX 200 /* maximum number of used-defined functions and macros */ -#define STACK_MAX 10 /* maximum function call stack depth */ -#define PARAMETER_MAX 10 /* maximum number of parameters to a function */ -#define LOCAL_TABLE_SIZE 11 /* maximum number of local variables */ -#define STRUCT_TABLE_SIZE 11 /* maximum number of struct/union members */ #define LARGE_INT_POWER_OF_TEN 1000000000 /* the largest power of ten which fits in an int on this architecture */ #define ARCH_ALIGN_WORDSIZE sizeof(int) /* memory alignment boundary on this architecture */ +#define GLOBAL_TABLE_SIZE 397 /* global variable table */ +#define STRING_TABLE_SIZE 97 /* shared string table size */ +#define PARAMETER_MAX 10 /* maximum number of parameters to a function */ +#define LINEBUFFER_MAX 256 /* maximum number of characters on a line */ +#define LOCAL_TABLE_SIZE 11 /* size of local variable table (can expand) */ +#define STRUCT_TABLE_SIZE 11 /* size of struct/union member table (can expand) */ + /* handy definitions */ #ifndef TRUE #define TRUE 1 @@ -61,28 +62,13 @@ enum LexToken TokenHashDefine, TokenHashInclude }; -/* string type so we can use source file strings */ -typedef struct _Str -{ - int Len; - const char *Str; -} Str; - -/* lexer state - so we can lex nested files */ -struct LexState +/* parser state - has all this detail so we can parse nested files */ +struct ParseState { + const void *Pos; + const void *End; int Line; - const char *Pos; - const char *End; - const Str *FileName; -}; - -/* function definition - really just where it is in the source file */ -struct FuncDef -{ - Str Source; - Str FileName; - int StartLine; + const char *FileName; }; /* values */ @@ -103,18 +89,30 @@ enum BaseType TypeType /* a type (eg. typedef) */ }; +/* data type */ struct ValueType { enum BaseType Base; /* what kind of type this is */ int ArraySize; /* the size of an array type */ int Sizeof; /* the storage required */ - Str Identifier; /* the name of a struct or union */ + const char *Identifier; /* the name of a struct or union */ struct ValueType *FromType; /* the type we're derived from (or NULL) */ struct ValueType *DerivedTypeList; /* first in a list of types derived from this one */ struct ValueType *Next; /* next item in the derived type list */ struct Table *Members; /* members of a struct, union or enum */ }; +/* function definition */ +struct FuncDef +{ + struct ValueType *ReturnType; /* the return value type */ + int NumParams; /* the number of parameters */ + struct Typ *ParamType; /* array of parameter types */ + const char **ParamName; /* array of parameter names */ + void (*Intrinsic)(); /* intrinsic call address or NULL */ + struct ParseState Body; /* lexical tokens of the function body if not intrinsic */ +}; + struct ArrayValue { unsigned int Size; /* the number of elements in the array */ @@ -136,11 +134,12 @@ union AnyValue short ShortInteger; int Integer; double FP; - Str String; + char *String; struct ArrayValue Array; struct PointerValue Pointer; - struct LexState Lexer; + struct ParseState Parser; struct ValueType *Typ; + struct FuncDef FuncDef; }; struct Value @@ -154,7 +153,7 @@ struct Value /* hash table data structure */ struct TableEntry { - Str Key; + const char *Key; struct Value *Val; struct TableEntry *Next; }; @@ -169,7 +168,7 @@ struct Table /* stack frame for function calls */ struct StackFrame { - struct LexState ReturnLex; /* how we got here */ + struct ParseState ReturnParser; /* how we got here */ struct Table LocalTable; /* the local variables and parameters */ struct TableEntry *LocalHashTable[LOCAL_TABLE_SIZE]; struct StackFrame *PreviousStackFrame; /* the next lower stack frame */ @@ -188,49 +187,38 @@ extern struct ValueType FPType; extern struct ValueType VoidType; extern struct ValueType FunctionType; extern struct ValueType MacroType; -extern Str StrEmpty; - -/* str.c */ -void StrToC(char *Dest, int DestSize, const Str *Source); -void StrFromC(Str *Dest, const char *Source); -int StrEqual(const Str *Str1, const Str *Str2); -int StrEqualC(const Str *Str1, const char *Str2); -void StrPrintf(const char *Format, ...); -void vStrPrintf(const char *Format, va_list Args); /* picoc.c */ -void Fail(const char *Message, ...); -void ProgramFail(struct LexState *Lexer, const char *Message, ...); -void ScanFile(const Str *FileName); +void ProgramFail(struct ParseState *Parser, const char *Message, ...); +void ScanFile(const char *FileName); /* table.c */ void TableInit(struct Table *Tbl, struct TableEntry **HashTable, int Size, int OnHeap); -int TableSet(struct Table *Tbl, const Str *Key, struct Value *Val); -int TableGet(struct Table *Tbl, const Str *Key, struct Value **Val); +int TableSet(struct Table *Tbl, const char *Key, struct Value *Val); +int TableGet(struct Table *Tbl, const char *Key, struct Value **Val); +const char *TableSetKey(struct Table *Tbl, const char *Ident, int IdentLen); /* lex.c */ -void LexInit(struct LexState *Lexer, const Str *Source, const Str *FileName, int Line); -enum LexToken LexGetToken(struct LexState *Lexer, struct Value **Value); -enum LexToken LexGetPlainToken(struct LexState *Lexer); -enum LexToken LexPeekToken(struct LexState *Lexer, struct Value **Value); -enum LexToken LexPeekPlainToken(struct LexState *Lexer); -void LexToEndOfLine(struct LexState *Lexer); +void LexInit(struct ParseState *Parser, const char *Source, int SourceLen, const char *FileName, int Line); +enum LexToken LexGetToken(struct ParseState *Parser, struct Value **Value, int IncPos); +void LexToEndOfLine(struct ParseState *Parser); /* parse.c */ void ParseInit(void); -int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnHeap, int RunIt); -int ParseIntExpression(struct LexState *Lexer, int RunIt); -void Parse(const Str *FileName, const Str *Source, int RunIt); +int ParseExpression(struct ParseState *Parser, struct Value **Result, int ResultOnHeap, int RunIt); +int ParseIntExpression(struct ParseState *Parser, int RunIt); +int ParseStatement(struct ParseState *Parser, int RunIt); +void Parse(const char *FileName, const char *Source, int SourceLen, int RunIt); /* type.c */ void TypeInit(); int TypeSizeof(struct ValueType *Typ); -void TypeParse(struct LexState *Lexer, struct ValueType **Typ, Str *Identifier); +void TypeParse(struct ParseState *Parser, struct ValueType **Typ, const char **Identifier); /* intrinsic.c */ void IntrinsicInit(struct Table *GlobalTable); -void IntrinsicGetLexer(struct LexState *Lexer, int IntrinsicId); -void IntrinsicCall(struct LexState *Lexer, struct Value *Result, struct ValueType *ReturnType, int IntrinsicId); +void IntrinsicGetLexer(struct ParseState *Parser, int IntrinsicId); +void IntrinsicCall(struct ParseState *Parser, struct Value *Result, struct ValueType *ReturnType, int IntrinsicId); /* heap.c */ void HeapInit(); @@ -243,15 +231,20 @@ void HeapFree(void *Mem); /* variable.c */ void VariableInit(); -void *VariableAlloc(struct LexState *Lexer, int Size, int OnHeap); -void VariableStackPop(struct LexState *Lexer, struct Value *Var); -struct Value *VariableAllocValueAndData(struct LexState *Lexer, int DataSize, int OnHeap); -struct Value *VariableAllocValueAndCopy(struct LexState *Lexer, struct Value *FromValue, int OnHeap); -struct Value *VariableAllocValueFromType(struct LexState *Lexer, struct ValueType *Typ, int OnHeap); -void VariableDefine(struct LexState *Lexer, const Str *Ident, struct Value *InitValue); -int VariableDefined(Str *Ident); -void VariableGet(struct LexState *Lexer, Str *Ident, struct Value **LVal); -void VariableStackFrameAdd(struct LexState *Lexer); -void VariableStackFramePop(struct LexState *Lexer); +void *VariableAlloc(struct ParseState *Parser, int Size, int OnHeap); +void VariableStackPop(struct ParseState *Parser, struct Value *Var); +struct Value *VariableAllocValueAndData(struct ParseState *Parser, int DataSize, int OnHeap); +struct Value *VariableAllocValueAndCopy(struct ParseState *Parser, struct Value *FromValue, int OnHeap); +struct Value *VariableAllocValueFromType(struct ParseState *Parser, struct ValueType *Typ, int OnHeap); +void VariableDefine(struct ParseState *Parser, const char *Ident, struct Value *InitValue); +int VariableDefined(const char *Ident); +void VariableGet(struct ParseState *Parser, const char *Ident, struct Value **LVal); +void VariableStackFrameAdd(struct ParseState *Parser); +void VariableStackFramePop(struct ParseState *Parser); + +/* str.c */ +void StrInit(); +const char *StrRegister(const char *Str); +const char *StrRegister2(const char *Str, int Len); #endif /* PICOC_H */ diff --git a/str.c b/str.c index 754ccef..443a633 100644 --- a/str.c +++ b/str.c @@ -1,41 +1,29 @@ +/* maintains a shared string table so we don't have to worry about string allocation */ #include #include #include #include #include - #include "picoc.h" -Str StrEmpty = { 0, "" }; +struct Table StringTable; +struct TableEntry *StringHashTable[STRING_TABLE_SIZE]; -/* convert a Str to a C string */ -void StrToC(char *Dest, int DestSize, const Str *Source) +/* initialise the shared string system */ +void StrInit() { - int CopyLen = min(DestSize-1, Source->Len); - memcpy(Dest, Source->Str, CopyLen); - Dest[CopyLen] = '\0'; + TableInit(&StringTable, &StringHashTable[0], STRING_TABLE_SIZE, TRUE); } -/* convert a C string to a Str */ -void StrFromC(Str *Dest, const char *Source) +/* register a string in the shared string store */ +const char *StrRegister2(const char *Str, int Len) { - Dest->Str = Source; - Dest->Len = strlen(Source); + return TableSetKey(&StringTable, Str, Len); } -/* compare two Strs for equality */ -int StrEqual(const Str *Str1, const Str *Str2) +const char *StrRegister(const char *Str) { - if (Str1->Len != Str2->Len) - return FALSE; - - return memcmp(Str1->Str, Str2->Str, Str1->Len) == 0; -} - -/* compare a Str to a C string */ -int StrEqualC(const Str *Str1, const char *Str2) -{ - return strncmp(Str1->Str, Str2, Str1->Len) == 0 && Str2[Str1->Len] == '\0'; + return StrRegister2(Str, strlen(Str)); } /* print an integer to a stream without using printf/sprintf */ @@ -93,6 +81,7 @@ void StrPrintFP(double Num, FILE *Stream) } } +#if 0 /* Str version of printf */ void StrPrintf(const char *Format, ...) { @@ -128,3 +117,5 @@ void vStrPrintf(const char *Format, va_list Args) putchar(*FPos); } } +#endif + diff --git a/table.c b/table.c index dc84bb7..e6096a5 100644 --- a/table.c +++ b/table.c @@ -2,12 +2,12 @@ #include "picoc.h" /* quick hash function */ -static unsigned int TableHash(const Str *Key) +static unsigned int TableHash(const char *Key, int KeyLen) { - if (Key->Len == 0) + if (KeyLen == 0) return 0; else - return ((*Key->Str << 24) | (Key->Str[Key->Len-1] << 16) | (Key->Str[Key->Len >> 1] << 8)) ^ Key->Len; + return ((*Key << 24) | (Key[KeyLen-1] << 16) | (Key[KeyLen >> 1] << 8)) ^ KeyLen; } /* initialise a table */ @@ -20,14 +20,14 @@ void TableInit(struct Table *Tbl, struct TableEntry **HashTable, int Size, int O } /* check a hash table entry for a key */ -static int TableSearch(struct Table *Tbl, const Str *Key, int *AddAt) +static int TableSearch(struct Table *Tbl, const char *Key, int Len, int *AddAt) { struct TableEntry *Entry; - int HashValue = TableHash(Key) % Tbl->Size;; + int HashValue = TableHash(Key, Len) % Tbl->Size; for (Entry = Tbl->HashTable[HashValue]; Entry != NULL; Entry = Entry->Next) { - if (StrEqual(&Entry->Key, Key)) + if (strncmp(Entry->Key, Key, Len) == 0 && Entry->Key[Len] == '\0') return HashValue; /* found */ } @@ -36,17 +36,16 @@ static int TableSearch(struct Table *Tbl, const Str *Key, int *AddAt) } /* set an identifier to a value. returns FALSE if it already exists */ -int TableSet(struct Table *Tbl, const Str *Key, struct Value *Val) +int TableSet(struct Table *Tbl, const char *Key, struct Value *Val) { - int HashPos; int AddAt; + int KeyLen = strlen(Key); + int HashPos = TableSearch(Tbl, Key, KeyLen, &AddAt); - HashPos = TableSearch(Tbl, Key, &AddAt); - if (HashPos == -1) { /* add it to the table */ struct TableEntry *NewEntry = VariableAlloc(NULL, sizeof(struct TableEntry), Tbl->OnHeap); - NewEntry->Key = *Key; + NewEntry->Key = Key; NewEntry->Val = Val; NewEntry->Next = Tbl->HashTable[AddAt]; Tbl->HashTable[AddAt] = NewEntry; @@ -57,16 +56,33 @@ int TableSet(struct Table *Tbl, const Str *Key, struct Value *Val) } /* find a value in a table. returns FALSE if not found */ -int TableGet(struct Table *Tbl, const Str *Key, struct Value **Val) +int TableGet(struct Table *Tbl, const char *Key, struct Value **Val) { - int HashPos; int AddAt; - - HashPos = TableSearch(Tbl, Key, &AddAt); - + int HashPos = TableSearch(Tbl, Key, strlen(Key), &AddAt); if (HashPos == -1) return FALSE; *Val = Tbl->HashTable[HashPos]->Val; return TRUE; } + +/* set an identifier and return the identifier. share if possible */ +const char *TableSetKey(struct Table *Tbl, const char *Ident, int IdentLen) +{ + int AddAt; + int HashPos = TableSearch(Tbl, Ident, IdentLen, &AddAt); + + if (HashPos != -1) + return Tbl->HashTable[HashPos]->Key; + else + { /* add it to the table */ + struct TableEntry *NewEntry = HeapAlloc(sizeof(struct TableEntry) + IdentLen + 1); + NewEntry->Key = (void *)NewEntry + sizeof(struct TableEntry); + strncpy((char *)NewEntry->Key, Ident, IdentLen); + NewEntry->Val = NULL; + NewEntry->Next = Tbl->HashTable[AddAt]; + Tbl->HashTable[AddAt] = NewEntry; + return NewEntry->Key; + } +} diff --git a/variable.c b/variable.c index b708747..b5f60ea 100644 --- a/variable.c +++ b/variable.c @@ -18,7 +18,7 @@ void VariableInit() } /* allocate some memory, either on the heap or the stack and check if we've run out */ -void *VariableAlloc(struct LexState *Lexer, int Size, int OnHeap) +void *VariableAlloc(struct ParseState *Parser, int Size, int OnHeap) { void *NewValue; @@ -28,15 +28,15 @@ void *VariableAlloc(struct LexState *Lexer, int Size, int OnHeap) NewValue = HeapAllocStack(Size); if (NewValue == NULL) - ProgramFail(Lexer, "out of memory"); + ProgramFail(Parser, "out of memory"); return NewValue; } /* allocate a value either on the heap or the stack using space dependent on what type we want */ -struct Value *VariableAllocValueAndData(struct LexState *Lexer, int DataSize, int OnHeap) +struct Value *VariableAllocValueAndData(struct ParseState *Parser, int DataSize, int OnHeap) { - struct Value *NewValue = VariableAlloc(Lexer, DataSize, OnHeap); + struct Value *NewValue = VariableAlloc(Parser, DataSize, OnHeap); NewValue->Val = (union AnyValue *)((void *)NewValue + sizeof(struct Value)); NewValue->ValOnHeap = OnHeap; NewValue->ValOnStack = !OnHeap; @@ -45,31 +45,31 @@ struct Value *VariableAllocValueAndData(struct LexState *Lexer, int DataSize, in } /* allocate a value given its type */ -struct Value *VariableAllocValueFromType(struct LexState *Lexer, struct ValueType *Typ, int OnHeap) +struct Value *VariableAllocValueFromType(struct ParseState *Parser, struct ValueType *Typ, int OnHeap) { - struct Value *NewValue = VariableAllocValueAndData(Lexer, Typ->Sizeof, OnHeap); + struct Value *NewValue = VariableAllocValueAndData(Parser, Typ->Sizeof, OnHeap); NewValue->Typ = Typ; return NewValue; } /* allocate a value either on the heap or the stack and copy its value */ -struct Value *VariableAllocValueAndCopy(struct LexState *Lexer, struct Value *FromValue, int OnHeap) +struct Value *VariableAllocValueAndCopy(struct ParseState *Parser, struct Value *FromValue, int OnHeap) { - struct Value *NewValue = VariableAllocValueAndData(Lexer, FromValue->Typ->Sizeof, OnHeap); + struct Value *NewValue = VariableAllocValueAndData(Parser, FromValue->Typ->Sizeof, OnHeap); NewValue->Typ = FromValue->Typ; memcpy(NewValue->Val, FromValue->Val, FromValue->Typ->Sizeof); return NewValue; } /* define a variable */ -void VariableDefine(struct LexState *Lexer, const Str *Ident, struct Value *InitValue) +void VariableDefine(struct ParseState *Parser, const char *Ident, struct Value *InitValue) { - if (!TableSet((TopStackFrame == NULL) ? &GlobalTable : &TopStackFrame->LocalTable, Ident, VariableAllocValueAndCopy(Lexer, InitValue, TopStackFrame == NULL))) - ProgramFail(Lexer, "'%S' is already defined", Ident); + if (!TableSet((TopStackFrame == NULL) ? &GlobalTable : &TopStackFrame->LocalTable, Ident, VariableAllocValueAndCopy(Parser, InitValue, TopStackFrame == NULL))) + ProgramFail(Parser, "'%S' is already defined", Ident); } /* check if a variable with a given name is defined */ -int VariableDefined(Str *Ident) +int VariableDefined(const char *Ident) { struct Value *FoundValue; @@ -83,17 +83,17 @@ int VariableDefined(Str *Ident) } /* get the value of a variable. must be defined */ -void VariableGet(struct LexState *Lexer, Str *Ident, struct Value **LVal) +void VariableGet(struct ParseState *Parser, const char *Ident, struct Value **LVal) { if (TopStackFrame == NULL || !TableGet(&TopStackFrame->LocalTable, Ident, LVal)) { if (!TableGet(&GlobalTable, Ident, LVal)) - ProgramFail(Lexer, "'%S' is undefined", Ident); + ProgramFail(Parser, "'%S' is undefined", Ident); } } /* free and/or pop the top value off the stack. Var must be the top value on the stack! */ -void VariableStackPop(struct LexState *Lexer, struct Value *Var) +void VariableStackPop(struct ParseState *Parser, struct Value *Var) { int Success; @@ -108,29 +108,29 @@ void VariableStackPop(struct LexState *Lexer, struct Value *Var) Success = HeapPopStack(Var, sizeof(struct Value)); /* value isn't our problem */ if (!Success) - ProgramFail(Lexer, "stack underrun"); + ProgramFail(Parser, "stack underrun"); } /* add a stack frame when doing a function call */ -void VariableStackFrameAdd(struct LexState *Lexer) +void VariableStackFrameAdd(struct ParseState *Parser) { struct StackFrame *NewFrame; HeapPushStackFrame(); NewFrame = HeapAllocStack(sizeof(struct StackFrame)); - NewFrame->ReturnLex = *Lexer; + NewFrame->ReturnParser = *Parser; TableInit(&NewFrame->LocalTable, &NewFrame->LocalHashTable[0], LOCAL_TABLE_SIZE, FALSE); NewFrame->PreviousStackFrame = TopStackFrame; TopStackFrame = NewFrame; } /* remove a stack frame */ -void VariableStackFramePop(struct LexState *Lexer) +void VariableStackFramePop(struct ParseState *Parser) { if (TopStackFrame == NULL) - ProgramFail(Lexer, "stack is empty - can't go back"); + ProgramFail(Parser, "stack is empty - can't go back"); TopStackFrame = TopStackFrame->PreviousStackFrame; - *Lexer = TopStackFrame->ReturnLex; + *Parser = TopStackFrame->ReturnParser; HeapPopStackFrame(); }