Bulko lexer change for more efficient pre-scanned tokens.

Removed Str type - replaced with standard C strings.
Added hashed string tables for efficient string storage.


git-svn-id: http://picoc.googlecode.com/svn/trunk@43 21eae674-98b7-11dd-bd71-f92a316d2d60
This commit is contained in:
zik.saleeba 2009-02-01 11:31:18 +00:00
parent 921ccdf17d
commit 86af5318da
10 changed files with 463 additions and 424 deletions

View file

@ -3,7 +3,7 @@ CFLAGS=-Wall -g
LIBS=-lm
TARGET = picoc
SRCS = picoc.c table.c str.c parse.c lex.c intrinsic.c heap.c type.c variable.c
SRCS = picoc.c table.c lex.c parse.c intrinsic.c heap.c type.c variable.c str.c
OBJS := $(SRCS:%.c=%.o)
all: $(TARGET)

8
heap.c
View file

@ -88,7 +88,10 @@ void *HeapAlloc(int Size)
struct AllocNode *NewMem = NULL;
struct AllocNode **FreeNode;
int AllocSize = MEM_ALIGN(Size) + sizeof(NewMem->Size);
int Bucket = AllocSize >> 2;
int Bucket = AllocSize >> 2;
if (Size == 0)
return NULL;
if (Bucket < FREELIST_BUCKETS && FreeListBucket[Bucket] != NULL)
{ /* try to allocate from a freelist bucket first */
@ -137,6 +140,9 @@ void HeapFree(void *Mem)
struct AllocNode *MemNode = (struct AllocNode *)(Mem-sizeof(int));
int Bucket = MemNode->Size >> 2;
if (Mem == NULL)
return;
if ((void *)MemNode == HeapBottom)
{ /* pop it off the bottom of the heap, reducing the heap size */
HeapBottom += sizeof(int) + MemNode->Size;

View file

@ -44,9 +44,7 @@ void IntrinsicInit(struct Table *GlobalTable)
for (Count = 0; Count < sizeof(Intrinsics) / sizeof(struct IntrinsicFunction); Count++)
{
Source.Str = Intrinsics[Count].Prototype;
Source.Len = strlen(Source.Str);
LexInit(&Lexer, &Source, &IntrinsicFilename, Count+1);
LexInit(&Lexer, Intrinsics[Count].Prototype, strlen(Source.Str), &IntrinsicFilename, Count+1);
TypeParse(&Lexer, &Typ, &Identifier);
IntrinsicReferenceNo[Count] = -1 - Count;
IntrinsicValue[Count].Typ = &FunctionType;

171
lex.c
View file

@ -1,5 +1,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <math.h>
#include <string.h>
#include <stdarg.h>
#include "picoc.h"
@ -49,28 +53,47 @@ static struct ReservedWord ReservedWords[] =
{ "while", TokenWhile }
};
void LexInit(struct LexState *Lexer, const Str *Source, const Str *FileName, int Line)
struct LexState
{
Lexer->Pos = Source->Str;
Lexer->End = Source->Str + Source->Len;
Lexer->Line = Line;
Lexer->FileName = FileName;
const char *Pos;
const char *End;
int Line;
const char *FileName;
};
void LexInit(struct ParseState *Parser, const char *Source, int SourceLen, const char *FileName, int Line)
{
Parser->Pos = Source;
Parser->End = Source + SourceLen;
Parser->Line = Line;
Parser->FileName = FileName;
}
enum LexToken LexCheckReservedWord(const Str *Word)
void LexFail(struct LexState *Lexer, const char *Message, ...)
{
va_list Args;
printf("%s:%d: ", Lexer->FileName, Lexer->Line);
va_start(Args, Message);
vprintf(Message, Args);
printf("\n");
exit(1);
}
enum LexToken LexCheckReservedWord(const char *Word)
{
int Count;
for (Count = 0; Count < sizeof(ReservedWords) / sizeof(struct ReservedWord); Count++)
{
if (StrEqualC(Word, ReservedWords[Count].Word))
if (strcmp(Word, ReservedWords[Count].Word) == 0)
return ReservedWords[Count].Token;
}
return TokenNone;
}
enum LexToken LexGetNumber(struct LexState *Lexer, struct Value **Value)
enum LexToken LexGetNumber(struct LexState *Lexer, struct Value *Value)
{
int Result = 0;
double FPResult;
@ -79,12 +102,12 @@ enum LexToken LexGetNumber(struct LexState *Lexer, struct Value **Value)
for (; Lexer->Pos != Lexer->End && isdigit(*Lexer->Pos); Lexer->Pos++)
Result = Result * 10 + (*Lexer->Pos - '0');
(*Value)->Typ = &IntType;
(*Value)->Val->Integer = Result;
Value->Typ = &IntType;
Value->Val->Integer = Result;
if (Lexer->Pos == Lexer->End || *Lexer->Pos != '.')
return TokenIntegerConstant;
(*Value)->Typ = &FPType;
Value->Typ = &FPType;
Lexer->Pos++;
for (FPDiv = 0.1, FPResult = (double)Result; Lexer->Pos != Lexer->End && isdigit(*Lexer->Pos); Lexer->Pos++, FPDiv /= 10.0)
FPResult += (*Lexer->Pos - '0') * FPDiv;
@ -101,7 +124,7 @@ enum LexToken LexGetNumber(struct LexState *Lexer, struct Value **Value)
return TokenFPConstant;
}
enum LexToken LexGetWord(struct LexState *Lexer, struct Value **Value)
enum LexToken LexGetWord(struct LexState *Lexer, struct Value *Value)
{
const char *Pos = Lexer->Pos + 1;
enum LexToken Token;
@ -109,24 +132,24 @@ enum LexToken LexGetWord(struct LexState *Lexer, struct Value **Value)
while (Lexer->Pos != Lexer->End && isCident(*Pos))
Pos++;
(*Value)->Typ = &StringType;
(*Value)->Val->String.Str = Lexer->Pos;
(*Value)->Val->String.Len = Pos - Lexer->Pos;
Value->Typ = &StringType;
Value->Val->String = (char *)StrRegister2(Lexer->Pos, Pos - Lexer->Pos);
Lexer->Pos = Pos;
Token = LexCheckReservedWord(&(*Value)->Val->String);
Token = LexCheckReservedWord(Value->Val->String);
if (Token != TokenNone)
return Token;
return TokenIdentifier;
}
enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value **Value)
enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value *Value)
{
int Escape = FALSE;
const char *StartPos = Lexer->Pos;
(*Value)->Typ = &StringType;
(*Value)->Val->String.Str = Lexer->Pos;
// XXX - do escaping here
Value->Typ = &StringType;
while (Lexer->Pos != Lexer->End && (*Lexer->Pos != '"' || Escape))
{
if (Escape)
@ -136,25 +159,25 @@ enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value **Value)
Lexer->Pos++;
}
(*Value)->Val->String.Len = Lexer->Pos - (*Value)->Val->String.Str;
Value->Val->String = (char *)StrRegister2(StartPos, Lexer->Pos - StartPos);
if (*Lexer->Pos == '"')
Lexer->Pos++;
return TokenStringConstant;
}
enum LexToken LexGetCharacterConstant(struct LexState *Lexer, struct Value **Value)
enum LexToken LexGetCharacterConstant(struct LexState *Lexer, struct Value *Value)
{
(*Value)->Typ = &IntType;
(*Value)->Val->Integer = Lexer->Pos[1];
Value->Typ = &IntType;
Value->Val->Integer = Lexer->Pos[1];
if (Lexer->Pos[2] != '\'')
ProgramFail(Lexer, "illegal character '%c'", Lexer->Pos[2]);
LexFail(Lexer, "illegal character '%c'", Lexer->Pos[2]);
Lexer->Pos += 3;
return TokenCharacterConstant;
}
enum LexToken LexGetComment(struct LexState *Lexer, char NextChar, struct Value **Value)
enum LexToken LexGetComment(struct LexState *Lexer, char NextChar, struct Value *Value)
{
Lexer->Pos++;
if (NextChar == '*')
@ -174,11 +197,18 @@ enum LexToken LexGetComment(struct LexState *Lexer, char NextChar, struct Value
return LexGetToken(Lexer, Value);
}
enum LexToken LexGetTokenUncached(struct LexState *Lexer, struct Value **Value)
enum LexToken LexGetTokenToStack(struct LexState *Lexer, struct Value **Value)
{
char ThisChar;
char NextChar;
if (Lexer->Pos == Lexer->End)
{
char LineBuffer[LINEBUFFER_MAX];
if (fgets(&LineBuffer[0], LINEBUFFER_MAX, stdin) == NULL)
return TokenEOF;
}
*Value = &LexValue;
while (Lexer->Pos != Lexer->End && isspace(*Lexer->Pos))
{
@ -188,29 +218,26 @@ enum LexToken LexGetTokenUncached(struct LexState *Lexer, struct Value **Value)
Lexer->Pos++;
}
if (Lexer->Pos == Lexer->End)
return TokenEOF;
ThisChar = *Lexer->Pos;
if (isCidstart(ThisChar))
return LexGetWord(Lexer, Value);
return LexGetWord(Lexer, *Value);
if (isdigit(ThisChar))
return LexGetNumber(Lexer, Value);
return LexGetNumber(Lexer, *Value);
NextChar = (Lexer->Pos+1 != Lexer->End) ? *(Lexer->Pos+1) : 0;
Lexer->Pos++;
switch (ThisChar)
{
case '"': return LexGetStringConstant(Lexer, Value);
case '\'': return LexGetCharacterConstant(Lexer, Value);
case '"': return LexGetStringConstant(Lexer, *Value);
case '\'': return LexGetCharacterConstant(Lexer, *Value);
case '(': return TokenOpenBracket;
case ')': return TokenCloseBracket;
case '=': NEXTIS('=', TokenEquality, TokenAssign);
case '+': NEXTIS3('=', TokenAddAssign, '+', TokenIncrement, TokenPlus);
case '-': NEXTIS4('=', TokenSubtractAssign, '>', TokenArrow, '-', TokenDecrement, TokenMinus);
case '*': return TokenAsterisk;
case '/': if (NextChar == '/' || NextChar == '*') return LexGetComment(Lexer, NextChar, Value); else return TokenSlash;
case '/': if (NextChar == '/' || NextChar == '*') return LexGetComment(Lexer, NextChar, *Value); else return TokenSlash;
case '<': NEXTIS('=', TokenLessEqual, TokenLessThan);
case '>': NEXTIS('=', TokenGreaterEqual, TokenGreaterThan);
case ';': return TokenSemicolon;
@ -227,59 +254,47 @@ enum LexToken LexGetTokenUncached(struct LexState *Lexer, struct Value **Value)
case '.': return TokenDot;
}
ProgramFail(Lexer, "illegal character '%c'", ThisChar);
LexFail(Lexer, "illegal character '%c'", ThisChar);
return TokenEOF;
}
enum LexToken LexGetToken(struct LexState *Lexer, struct Value **Value)
void LexTokeniseToStack(struct LexState *Lexer, struct Value **Value)
{
static const char *CachedPos = NULL;
static union AnyValue CachedAnyValue;
static struct Value CachedValue;
static struct LexState CachedLexer;
static enum LexToken CachedToken;
XXX - finish this
}
enum LexToken LexGetToken(struct ParseState *Parser, struct Value **Value, int IncPos)
{
enum LexToken;
while (Parser->Pos != Parser->End && (enum LexToken)*(unsigned char *)Parser->Pos == TokenEndOfLine)
{ /* skip leading newlines */
Pos->Line++;
Pos++;
}
if (Lexer->Pos == CachedPos)
{
*Value = &CachedValue;
CachedValue.Val = &CachedAnyValue;
*Lexer = CachedLexer;
if (Parser->Pos == Parser->End)
return TokenEOF;
LexToken = (enum LexToken)*(unsigned char *)Parser->Pos;
if (LexToken >= TokenIdentifier && LexToken <= TokenCharacterConstant)
{ /* this token requires a value */
int ValueLen = sizeof(struct Value) + ((struct Value *)Parser->Pos)->Typ->Sizeof;
if (Value != NULL)
{ /* copy the value out (aligns it in the process) */
memcpy(LexValue, (struct Value *)Parser->Pos, ValueLen);
*Value = &LexValue;
}
if (IncPos)
Parser->Pos += ValueLen + 1;
}
else
{
CachedPos = Lexer->Pos;
CachedToken = LexGetTokenUncached(Lexer, Value);
CachedLexer = *Lexer;
CachedValue = **Value;
CachedAnyValue = *(*Value)->Val;
if (IncPos)
Parser->Pos++;
}
return CachedToken;
return LexToken;
}
enum LexToken LexGetPlainToken(struct LexState *Lexer)
{
struct Value *Value;
return LexGetToken(Lexer, &Value);
}
/* look at the next token without changing the lexer state */
enum LexToken LexPeekToken(struct LexState *Lexer, struct Value **Value)
{
struct LexState LocalState = *Lexer;
return LexGetToken(&LocalState, Value);
}
enum LexToken LexPeekPlainToken(struct LexState *Lexer)
{
struct LexState LocalState = *Lexer;
struct Value *Value;
return LexGetToken(&LocalState, &Value);
}
/* skip everything up to the end of the line */
void LexToEndOfLine(struct LexState *Lexer)
{
while (Lexer->Pos != Lexer->End && *Lexer->Pos != '\n')
Lexer->Pos++;
}

386
parse.c
View file

@ -8,25 +8,27 @@ int ParameterUsed = 0;
struct Value *ReturnValue;
/* local prototypes */
int ParseIntExpression(struct LexState *Lexer, int RunIt);
int ParseStatement(struct LexState *Lexer, int RunIt);
int ParseArguments(struct LexState *Lexer, int RunIt);
int ParseIntExpression(struct ParseState *Parser, int RunIt);
int ParseStatement(struct ParseState *Parser, int RunIt);
int ParseArguments(struct ParseState *Parser, int RunIt);
/* initialise the parser */
void ParseInit()
{
StrInit();
VariableInit();
IntrinsicInit(&GlobalTable);
TypeInit();
}
/* parse a parameter list, defining parameters as local variables in the current scope */
void ParseParameterList(struct LexState *CallLexer, struct LexState *FuncLexer, int RunIt)
void ParseParameterList(struct ParseState *CallLexer, struct FuncDef *Func, int RunIt)
{
XXX - fix this
struct ValueType *Typ;
Str Identifier;
enum LexToken Token = LexGetPlainToken(FuncLexer); /* open bracket */
enum LexToken Token = LexGetToken(FuncLexer, NULL, TRUE); /* open bracket */
int ParamCount;
for (ParamCount = 0; ParamCount < ParameterUsed; ParamCount++)
@ -43,7 +45,7 @@ void ParseParameterList(struct LexState *CallLexer, struct LexState *FuncLexer,
}
}
Token = LexGetPlainToken(FuncLexer);
Token = LexGetToken(FuncLexer, NULL, TRUE);
if (ParamCount < ParameterUsed-1 && Token != TokenComma)
ProgramFail(FuncLexer, "comma expected");
}
@ -52,96 +54,97 @@ void ParseParameterList(struct LexState *CallLexer, struct LexState *FuncLexer,
ProgramFail(FuncLexer, "')' expected");
if (ParameterUsed == 0)
Token = LexGetPlainToken(FuncLexer);
Token = LexGetToken(FuncLexer, NULL, TRUE);
if (Token != TokenCloseBracket)
ProgramFail(CallLexer, "wrong number of arguments");
}
/* do a function call */
void ParseFunctionCall(struct LexState *Lexer, struct Value **Result, int ResultOnHeap, Str *FuncName, int RunIt)
void ParseFunctionCall(struct ParseState *Parser, struct Value **Result, int ResultOnHeap, Str *FuncName, int RunIt)
{
enum LexToken Token = LexGetPlainToken(Lexer); /* open bracket */
XXX - fix this
enum LexToken Token = LexGetToken(Parser, NULL, TRUE); /* open bracket */
/* parse arguments */
ParameterUsed = 0;
do {
if (ParseExpression(Lexer, &Parameter[ParameterUsed], FALSE, RunIt))
if (ParseExpression(Parser, &Parameter[ParameterUsed], FALSE, RunIt))
{
if (RunIt && ParameterUsed >= PARAMETER_MAX)
ProgramFail(Lexer, "too many arguments");
ProgramFail(Parser, "too many arguments");
ParameterUsed++;
Token = LexGetPlainToken(Lexer);
Token = LexGetToken(Parser, NULL, TRUE);
if (Token != TokenComma && Token != TokenCloseBracket)
ProgramFail(Lexer, "comma expected");
ProgramFail(Parser, "comma expected");
}
else
{
Token = LexGetPlainToken(Lexer);
Token = LexGetToken(Parser, NULL, TRUE);
if (!TokenCloseBracket)
ProgramFail(Lexer, "bad argument");
ProgramFail(Parser, "bad argument");
}
} while (Token != TokenCloseBracket);
if (RunIt)
{
struct LexState FuncLexer;
struct ParseState FuncLexer;
struct ValueType *ReturnType;
struct Value *FuncValue;
Str FuncName;
int Count;
VariableGet(Lexer, &FuncName, &FuncValue);
VariableGet(Parser, &FuncName, &FuncValue);
if ((*Result)->Typ->Base != TypeFunction)
ProgramFail(Lexer, "not a function - can't call");
ProgramFail(Parser, "not a function - can't call");
VariableStackFrameAdd(Lexer);
if (FuncValue->Val->Lexer.Line >= 0)
FuncLexer = FuncValue->Val->Lexer;
VariableStackFrameAdd(Parser);
if (FuncValue->Val->Parser.Line >= 0)
FuncLexer = FuncValue->Val->Parser;
else
IntrinsicGetLexer(&FuncLexer, FuncValue->Val->Lexer.Line);
IntrinsicGetLexer(&FuncLexer, FuncValue->Val->Parser.Line);
TypeParse(&FuncLexer, &ReturnType, &FuncName); /* get the return type */
*Result = VariableAllocValueFromType(Lexer, ReturnType, ResultOnHeap);
ParseParameterList(Lexer, &FuncLexer, TRUE); /* parameters */
if (FuncValue->Val->Lexer.Line >= 0)
*Result = VariableAllocValueFromType(Parser, ReturnType, ResultOnHeap);
ParseParameterList(Parser, &FuncLexer, TRUE); /* parameters */
if (FuncValue->Val->Parser.Line >= 0)
{ /* run a user-defined function */
if (LexPeekPlainToken(&FuncLexer) != TokenLeftBrace || !ParseStatement(&FuncLexer, TRUE))
if (LexGetToken(&FuncLexer, NULL, FALSE) != TokenLeftBrace || !ParseStatement(&FuncLexer, TRUE))
ProgramFail(&FuncLexer, "function body expected");
if (ReturnType != (*Result)->Typ)
ProgramFail(&FuncLexer, "bad return value");
}
else
IntrinsicCall(Lexer, *Result, ReturnType, (*Result)->Val->Lexer.Line);
IntrinsicCall(Parser, *Result, ReturnType, (*Result)->Val->Parser.Line);
VariableStackFramePop(Lexer);
VariableStackFramePop(Parser);
for (Count = ParameterUsed-1; Count >= 0; Count--) /* free stack space used by parameters */
VariableStackPop(Lexer, Parameter[Count]);
VariableStackPop(Parser, Parameter[Count]);
}
}
/* parse a single value */
int ParseValue(struct LexState *Lexer, struct Value **Result, int ResultOnHeap, int RunIt)
int ParseValue(struct ParseState *Parser, struct Value **Result, int ResultOnHeap, int RunIt)
{
struct LexState PreState = *Lexer;
struct ParseState PreState = *Parser;
struct Value *LexValue;
int IntValue;
enum LexToken Token = LexGetToken(Lexer, &LexValue);
enum LexToken Token = LexGetToken(Parser, &LexValue, TRUE);
switch (Token)
{
case TokenIntegerConstant: case TokenCharacterConstant: case TokenFPConstant: case TokenStringConstant:
*Result = VariableAllocValueAndCopy(Lexer, LexValue, ResultOnHeap);
*Result = VariableAllocValueAndCopy(Parser, LexValue, ResultOnHeap);
break;
case TokenMinus: case TokenUnaryExor: case TokenUnaryNot:
IntValue = ParseIntExpression(Lexer, RunIt);
IntValue = ParseIntExpression(Parser, RunIt);
if (RunIt)
{
*Result = VariableAllocValueFromType(Lexer, &IntType, ResultOnHeap);
*Result = VariableAllocValueFromType(Parser, &IntType, ResultOnHeap);
switch(Token)
{
case TokenMinus: (*Result)->Val->Integer = -IntValue; break;
@ -153,73 +156,73 @@ int ParseValue(struct LexState *Lexer, struct Value **Result, int ResultOnHeap,
break;
case TokenOpenBracket:
if (!ParseExpression(Lexer, Result, ResultOnHeap, RunIt))
ProgramFail(Lexer, "invalid expression");
if (!ParseExpression(Parser, Result, ResultOnHeap, RunIt))
ProgramFail(Parser, "invalid expression");
if (LexGetPlainToken(Lexer) != TokenCloseBracket)
ProgramFail(Lexer, "')' expected");
if (LexGetToken(Parser, NULL, TRUE) != TokenCloseBracket)
ProgramFail(Parser, "')' expected");
break;
case TokenAsterisk:
case TokenAmpersand:
ProgramFail(Lexer, "not implemented");
ProgramFail(Parser, "not implemented");
case TokenIdentifier:
if (LexPeekPlainToken(Lexer) == TokenOpenBracket)
ParseFunctionCall(Lexer, Result, ResultOnHeap, &LexValue->Val->String, RunIt);
if (LexGetToken(Parser, NULL, FALSE) == TokenOpenBracket)
ParseFunctionCall(Parser, Result, ResultOnHeap, &LexValue->Val->String, RunIt);
else
{
if (RunIt)
{
struct Value *IdentValue;
VariableGet(Lexer, &LexValue->Val->String, &IdentValue);
VariableGet(Parser, &LexValue->Val->String, &IdentValue);
if (IdentValue->Typ->Base == TypeMacro)
{
struct LexState MacroLexer = IdentValue->Val->Lexer;
struct ParseState MacroLexer = IdentValue->Val->Parser;
if (!ParseExpression(&MacroLexer, Result, ResultOnHeap, TRUE))
ProgramFail(&MacroLexer, "expression expected");
}
else if (!ISVALUETYPE(IdentValue->Typ))
ProgramFail(Lexer, "bad variable type");
ProgramFail(Parser, "bad variable type");
}
}
break;
default:
*Lexer = PreState;
*Parser = PreState;
return FALSE;
}
return TRUE;
}
struct Value *ParsePushFP(struct LexState *Lexer, int ResultOnHeap, double NewFP)
struct Value *ParsePushFP(struct ParseState *Parser, int ResultOnHeap, double NewFP)
{
struct Value *Val = VariableAllocValueFromType(Lexer, &FPType, ResultOnHeap);
struct Value *Val = VariableAllocValueFromType(Parser, &FPType, ResultOnHeap);
Val->Val->FP = NewFP;
return Val;
}
struct Value *ParsePushInt(struct LexState *Lexer, int ResultOnHeap, int NewInt)
struct Value *ParsePushInt(struct ParseState *Parser, int ResultOnHeap, int NewInt)
{
struct Value *Val = VariableAllocValueFromType(Lexer, &IntType, ResultOnHeap);
struct Value *Val = VariableAllocValueFromType(Parser, &IntType, ResultOnHeap);
Val->Val->Integer = NewInt;
return Val;
}
/* parse an expression. operator precedence is not supported */
int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnHeap, int RunIt)
int ParseExpression(struct ParseState *Parser, struct Value **Result, int ResultOnHeap, int RunIt)
{
struct Value *CurrentValue;
struct Value *TotalValue;
if (!ParseValue(Lexer, &TotalValue, ResultOnHeap, RunIt))
if (!ParseValue(Parser, &TotalValue, ResultOnHeap, RunIt))
return FALSE;
while (TRUE)
{
enum LexToken Token = LexPeekPlainToken(Lexer);
enum LexToken Token = LexGetToken(Parser, NULL, FALSE);
switch (Token)
{
case TokenPlus: case TokenMinus: case TokenAsterisk: case TokenSlash:
@ -227,18 +230,18 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH
case TokenLessEqual: case TokenGreaterEqual: case TokenLogicalAnd:
case TokenLogicalOr: case TokenAmpersand: case TokenArithmeticOr:
case TokenArithmeticExor: case TokenDot:
LexGetPlainToken(Lexer);
LexGetToken(Parser, NULL, TRUE);
break;
case TokenAssign: case TokenAddAssign: case TokenSubtractAssign:
LexGetPlainToken(Lexer);
if (!ParseExpression(Lexer, &CurrentValue, ResultOnHeap, RunIt))
ProgramFail(Lexer, "expression expected");
LexGetToken(Parser, NULL, TRUE);
if (!ParseExpression(Parser, &CurrentValue, ResultOnHeap, RunIt))
ProgramFail(Parser, "expression expected");
if (RunIt)
{
if (CurrentValue->Typ->Base != TypeInt || TotalValue->Typ->Base != TypeInt)
ProgramFail(Lexer, "can't assign");
ProgramFail(Parser, "can't assign");
switch (Token)
{
@ -246,7 +249,7 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH
case TokenSubtractAssign: TotalValue->Val->Integer -= CurrentValue->Val->Integer; break;
default: TotalValue->Val->Integer = CurrentValue->Val->Integer; break;
}
VariableStackPop(Lexer, CurrentValue);
VariableStackPop(Parser, CurrentValue);
}
// fallthrough
@ -256,7 +259,7 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH
return TRUE;
}
if (!ParseValue(Lexer, &CurrentValue, ResultOnHeap, RunIt))
if (!ParseValue(Parser, &CurrentValue, ResultOnHeap, RunIt))
return FALSE;
if (RunIt)
@ -272,39 +275,39 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH
else if (CurrentValue->Typ->Base == TypeFP)
FPCurrent = CurrentValue->Val->FP;
else
ProgramFail(Lexer, "bad type for operator");
ProgramFail(Parser, "bad type for operator");
if (TotalValue->Typ->Base == TypeInt)
FPTotal = (double)TotalValue->Val->Integer;
else if (TotalValue->Typ->Base == TypeFP)
FPTotal = TotalValue->Val->FP;
else
ProgramFail(Lexer, "bad type for operator");
ProgramFail(Parser, "bad type for operator");
}
VariableStackPop(Lexer, CurrentValue);
VariableStackPop(Lexer, TotalValue);
VariableStackPop(Parser, CurrentValue);
VariableStackPop(Parser, TotalValue);
switch (Token)
{
case TokenPlus: TotalValue = ParsePushFP(Lexer, ResultOnHeap, FPTotal + FPCurrent); break;
case TokenMinus: TotalValue = ParsePushFP(Lexer, ResultOnHeap, FPTotal - FPCurrent); break;
case TokenAsterisk: TotalValue = ParsePushFP(Lexer, ResultOnHeap, FPTotal * FPCurrent); break;
case TokenSlash: TotalValue = ParsePushFP(Lexer, ResultOnHeap, FPTotal / FPCurrent); break;
case TokenEquality: TotalValue = ParsePushInt(Lexer, ResultOnHeap, FPTotal == FPCurrent); break;
case TokenLessThan: TotalValue = ParsePushInt(Lexer, ResultOnHeap, FPTotal < FPCurrent); break;
case TokenGreaterThan: TotalValue = ParsePushInt(Lexer, ResultOnHeap, FPTotal > FPCurrent); break;
case TokenLessEqual: TotalValue = ParsePushInt(Lexer, ResultOnHeap, FPTotal <= FPCurrent); break;
case TokenGreaterEqual: TotalValue = ParsePushInt(Lexer, ResultOnHeap, FPTotal >= FPCurrent); break;
case TokenLogicalAnd: case TokenLogicalOr: case TokenAmpersand: case TokenArithmeticOr: case TokenArithmeticExor: ProgramFail(Lexer, "bad type for operator"); break;
case TokenDot: ProgramFail(Lexer, "operator not supported"); break;
case TokenPlus: TotalValue = ParsePushFP(Parser, ResultOnHeap, FPTotal + FPCurrent); break;
case TokenMinus: TotalValue = ParsePushFP(Parser, ResultOnHeap, FPTotal - FPCurrent); break;
case TokenAsterisk: TotalValue = ParsePushFP(Parser, ResultOnHeap, FPTotal * FPCurrent); break;
case TokenSlash: TotalValue = ParsePushFP(Parser, ResultOnHeap, FPTotal / FPCurrent); break;
case TokenEquality: TotalValue = ParsePushInt(Parser, ResultOnHeap, FPTotal == FPCurrent); break;
case TokenLessThan: TotalValue = ParsePushInt(Parser, ResultOnHeap, FPTotal < FPCurrent); break;
case TokenGreaterThan: TotalValue = ParsePushInt(Parser, ResultOnHeap, FPTotal > FPCurrent); break;
case TokenLessEqual: TotalValue = ParsePushInt(Parser, ResultOnHeap, FPTotal <= FPCurrent); break;
case TokenGreaterEqual: TotalValue = ParsePushInt(Parser, ResultOnHeap, FPTotal >= FPCurrent); break;
case TokenLogicalAnd: case TokenLogicalOr: case TokenAmpersand: case TokenArithmeticOr: case TokenArithmeticExor: ProgramFail(Parser, "bad type for operator"); break;
case TokenDot: ProgramFail(Parser, "operator not supported"); break;
default: break;
}
}
else
{
if (CurrentValue->Typ->Base != TypeInt || TotalValue->Typ->Base != TypeInt)
ProgramFail(Lexer, "bad operand types");
ProgramFail(Parser, "bad operand types");
/* integer arithmetic */
switch (Token)
@ -323,11 +326,11 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH
case TokenAmpersand: TotalValue->Val->Integer = TotalValue->Val->Integer & CurrentValue->Val->Integer; break;
case TokenArithmeticOr: TotalValue->Val->Integer = TotalValue->Val->Integer | CurrentValue->Val->Integer; break;
case TokenArithmeticExor: TotalValue->Val->Integer = TotalValue->Val->Integer ^ CurrentValue->Val->Integer; break;
case TokenDot: ProgramFail(Lexer, "operator not supported"); break;
case TokenDot: ProgramFail(Parser, "operator not supported"); break;
default: break;
}
}
VariableStackPop(Lexer, CurrentValue);
VariableStackPop(Parser, CurrentValue);
*Result = TotalValue;
}
}
@ -336,123 +339,157 @@ int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnH
}
/* parse an expression. operator precedence is not supported */
int ParseIntExpression(struct LexState *Lexer, int RunIt)
int ParseIntExpression(struct ParseState *Parser, int RunIt)
{
struct Value *Val;
int Result = 0;
if (!ParseExpression(Lexer, &Val, FALSE, RunIt))
ProgramFail(Lexer, "expression expected");
if (!ParseExpression(Parser, &Val, FALSE, RunIt))
ProgramFail(Parser, "expression expected");
if (RunIt)
{
if (Val->Typ->Base != TypeInt)
ProgramFail(Lexer, "integer value expected");
ProgramFail(Parser, "integer value expected");
Result = Val->Val->Integer;
VariableStackPop(Lexer, Val);
VariableStackPop(Parser, Val);
}
return Result;
}
/* parse a function definition and store it for later */
void ParseFunctionDefinition(struct LexState *Lexer, Str *Identifier, struct LexState *PreState)
void ParseFunctionDefinition(struct ParseState *Parser, struct ValueType *ReturnType, Str *Identifier)
{
struct Value *FuncValue = VariableAllocValueAndData(Lexer, sizeof(struct LexState), TRUE);
struct ValueType *ParamTyp;
Str Identifier;
enum LexToken Token;
struct Value *FuncValue;
struct ParseState ParamParser;
int ParamCount = 0;
FuncValue->Val->Lexer = *PreState;
LexGetPlainToken(Lexer);
if (LexGetPlainToken(Lexer) != TokenCloseBracket || LexPeekPlainToken(Lexer) != TokenLeftBrace)
ProgramFail(Lexer, "bad function definition");
LexGetToken(Parser, NULL, TRUE); /* open bracket */
ParamParser = *Parser;
Token = LexGetToken(Parser, NULL, TRUE);
if (Token != TokenCloseBracket && Token != TokenEOF)
{ /* count the number of parameters */
ParamCount++;
while ((Token = LexGetToken(Parser, NULL, TRUE)) != TokenCloseBracket && Token != TokenEOF)
{
if (Token == TokenComma)
ParamCount++;
}
}
if (!ParseStatement(Lexer, FALSE))
ProgramFail(Lexer, "function definition expected");
FuncValue->Val->Lexer.End = Lexer->Pos;
FuncValue = VariableAllocValueAndData(Parser, sizeof(struct FuncDef) + sizeof(struct ValueType *) * ParamCount + sizeof(Str *) * ParamCount, TRUE);
FuncValue->Typ = &FunctionType;
FuncValue->Val->FuncDef.ReturnType = ReturnType;
FuncValue->Val->FuncDef.NumParams = ParamCount;
FuncValue->Val->FuncDef.ParamType = (void *)FuncValue->Val + sizeof(struct FuncDef);
FuncValue->Val->FuncDef.ParamName = (void *)FuncValue->Val->FuncDef.ParamType + sizeof(struct ValueType *) * ParamCount;
FuncValue->Val->FuncDef.Body = *Parser;
for (ParamCount = 0; ParamCount < FuncValue->Val->FuncDef.NumParams; ParamCount++)
{ /* harvest the parameters into the function definition */
TypeParse(ParamParser, &Typ, &Identifier);
FuncValue->Val->FuncDef.ParamType[ParamCount] = Typ;
FuncValue->Val->FuncDef.ParamName[ParamCount] = Typ;
Token = LexGetToken(ParamParser, NULL, TRUE);
if (Token != TokenComma)
ProgramFail(ParamParser, "comma expected");
}
if (LexGetToken(Parser, NULL, FALSE) != TokenLeftBrace)
ProgramFail(Parser, "bad function definition");
if (!ParseStatement(Parser, FALSE))
ProgramFail(Parser, "function definition expected");
FuncValue->Val->FuncDef.Body.End = Parser->Pos;
if (!TableSet(&GlobalTable, Identifier, FuncValue))
ProgramFail(Lexer, "'%S' is already defined", Identifier);
ProgramFail(Parser, "'%S' is already defined", Identifier);
}
/* parse a #define macro definition and store it for later */
void ParseMacroDefinition(struct LexState *Lexer)
void ParseMacroDefinition(struct ParseState *Parser)
{
XXX - fix this
struct Value *MacroName;
struct Value *MacroValue = VariableAllocValueAndData(Lexer, sizeof(struct LexState), TRUE);
struct Value *MacroValue = VariableAllocValueAndData(Parser, sizeof(struct ParseState), TRUE);
if (LexGetToken(Lexer, &MacroName) != TokenIdentifier)
ProgramFail(Lexer, "identifier expected");
if (LexGetToken(Parser, &MacroName, TRUE) != TokenIdentifier)
ProgramFail(Parser, "identifier expected");
MacroValue->Val->Lexer = *Lexer;
LexToEndOfLine(Lexer);
MacroValue->Val->Lexer.End = Lexer->Pos;
MacroValue->Val->Parser = *Parser;
MacroValue->Val->Parser.End = Parser->Pos;
MacroValue->Typ = &MacroType;
if (!TableSet(&GlobalTable, &MacroName->Val->String, MacroValue))
ProgramFail(Lexer, "'%S' is already defined", &MacroName->Val->String);
ProgramFail(Parser, "'%S' is already defined", &MacroName->Val->String);
}
void ParseFor(struct LexState *Lexer, int RunIt)
void ParseFor(struct ParseState *Parser, int RunIt)
{
int Condition;
struct LexState PreConditional;
struct LexState PreIncrement;
struct LexState PreStatement;
struct LexState After;
struct ParseState PreConditional;
struct ParseState PreIncrement;
struct ParseState PreStatement;
struct ParseState After;
if (LexGetPlainToken(Lexer) != TokenOpenBracket)
ProgramFail(Lexer, "'(' expected");
if (LexGetToken(Parser, NULL, TRUE) != TokenOpenBracket)
ProgramFail(Parser, "'(' expected");
if (!ParseStatement(Lexer, RunIt))
ProgramFail(Lexer, "statement expected");
if (!ParseStatement(Parser, RunIt))
ProgramFail(Parser, "statement expected");
PreConditional = *Lexer;
Condition = ParseIntExpression(Lexer, RunIt);
PreConditional = *Parser;
Condition = ParseIntExpression(Parser, RunIt);
if (LexGetPlainToken(Lexer) != TokenSemicolon)
ProgramFail(Lexer, "';' expected");
if (LexGetToken(Parser, NULL, TRUE) != TokenSemicolon)
ProgramFail(Parser, "';' expected");
PreIncrement = *Lexer;
ParseStatement(Lexer, FALSE);
PreIncrement = *Parser;
ParseStatement(Parser, FALSE);
if (LexGetPlainToken(Lexer) != TokenCloseBracket)
ProgramFail(Lexer, "')' expected");
if (LexGetToken(Parser, NULL, TRUE) != TokenCloseBracket)
ProgramFail(Parser, "')' expected");
PreStatement = *Lexer;
if (!ParseStatement(Lexer, RunIt && Condition))
ProgramFail(Lexer, "statement expected");
PreStatement = *Parser;
if (!ParseStatement(Parser, RunIt && Condition))
ProgramFail(Parser, "statement expected");
After = *Lexer;
After = *Parser;
while (Condition && RunIt)
{
*Lexer = PreIncrement;
ParseStatement(Lexer, TRUE);
*Parser = PreIncrement;
ParseStatement(Parser, TRUE);
*Lexer = PreConditional;
Condition = ParseIntExpression(Lexer, RunIt);
*Parser = PreConditional;
Condition = ParseIntExpression(Parser, RunIt);
if (Condition)
{
*Lexer = PreStatement;
ParseStatement(Lexer, TRUE);
*Parser = PreStatement;
ParseStatement(Parser, TRUE);
}
}
*Lexer = After;
*Parser = After;
}
/* parse a statement */
int ParseStatement(struct LexState *Lexer, int RunIt)
int ParseStatement(struct ParseState *Parser, int RunIt)
{
struct Value *CValue;
int Condition;
struct LexState PreState = *Lexer;
struct ParseState PreState = *Parser;
Str Identifier;
struct ValueType *Typ;
enum LexToken Token = LexGetPlainToken(Lexer);
enum LexToken Token = LexGetToken(Parser, NULL, TRUE);
switch (Token)
{
@ -460,43 +497,43 @@ int ParseStatement(struct LexState *Lexer, int RunIt)
return FALSE;
case TokenIdentifier:
*Lexer = PreState;
ParseExpression(Lexer, &CValue, FALSE, RunIt);
if (RunIt) VariableStackPop(Lexer, CValue);
*Parser = PreState;
ParseExpression(Parser, &CValue, FALSE, RunIt);
if (RunIt) VariableStackPop(Parser, CValue);
break;
case TokenLeftBrace:
while (ParseStatement(Lexer, RunIt))
while (ParseStatement(Parser, RunIt))
{}
if (LexGetPlainToken(Lexer) != TokenRightBrace)
ProgramFail(Lexer, "'}' expected");
if (LexGetToken(Parser, NULL, TRUE) != TokenRightBrace)
ProgramFail(Parser, "'}' expected");
break;
case TokenIf:
Condition = ParseIntExpression(Lexer, RunIt);
Condition = ParseIntExpression(Parser, RunIt);
if (!ParseStatement(Lexer, RunIt && Condition))
ProgramFail(Lexer, "statement expected");
if (!ParseStatement(Parser, RunIt && Condition))
ProgramFail(Parser, "statement expected");
if (LexPeekPlainToken(Lexer) == TokenElse)
if (LexGetToken(Parser, NULL, FALSE) == TokenElse)
{
LexGetPlainToken(Lexer);
if (!ParseStatement(Lexer, RunIt && !Condition))
ProgramFail(Lexer, "statement expected");
LexGetToken(Parser, NULL, TRUE);
if (!ParseStatement(Parser, RunIt && !Condition))
ProgramFail(Parser, "statement expected");
}
break;
case TokenWhile:
{
struct LexState PreConditional = *Lexer;
struct ParseState PreConditional = *Parser;
do
{
*Lexer = PreConditional;
Condition = ParseIntExpression(Lexer, RunIt);
*Parser = PreConditional;
Condition = ParseIntExpression(Parser, RunIt);
if (!ParseStatement(Lexer, RunIt && Condition))
ProgramFail(Lexer, "statement expected");
if (!ParseStatement(Parser, RunIt && Condition))
ProgramFail(Parser, "statement expected");
} while (RunIt && Condition);
}
@ -504,21 +541,21 @@ int ParseStatement(struct LexState *Lexer, int RunIt)
case TokenDo:
{
struct LexState PreStatement = *Lexer;
struct ParseState PreStatement = *Parser;
do
{
*Lexer = PreStatement;
if (!ParseStatement(Lexer, RunIt))
ProgramFail(Lexer, "statement expected");
*Parser = PreStatement;
if (!ParseStatement(Parser, RunIt))
ProgramFail(Parser, "statement expected");
Condition = ParseIntExpression(Lexer, RunIt);
Condition = ParseIntExpression(Parser, RunIt);
} while (Condition && RunIt);
}
break;
case TokenFor:
ParseFor(Lexer, RunIt);
ParseFor(Parser, RunIt);
break;
case TokenSemicolon: break;
@ -528,30 +565,29 @@ int ParseStatement(struct LexState *Lexer, int RunIt)
case TokenFloatType:
case TokenDoubleType:
case TokenVoidType:
*Lexer = PreState;
TypeParse(Lexer, &Typ, &Identifier);
*Parser = PreState;
TypeParse(Parser, &Typ, &Identifier);
if (Identifier.Len == 0)
ProgramFail(Lexer, "identifier expected");
ProgramFail(Parser, "identifier expected");
/* handle function definitions */
if (LexPeekPlainToken(Lexer) == TokenOpenBracket)
ParseFunctionDefinition(Lexer, &Identifier, &PreState);
if (LexGetToken(Parser, NULL, FALSE) == TokenOpenBracket)
ParseFunctionDefinition(Parser, &Typ, &Identifier);
else
VariableDefine(Lexer, &Identifier, VariableAllocValueFromType(Lexer, Typ, FALSE));
VariableDefine(Parser, &Identifier, VariableAllocValueFromType(Parser, Typ, FALSE));
break;
case TokenHashDefine:
ParseMacroDefinition(Lexer);
ParseMacroDefinition(Parser);
break;
case TokenHashInclude:
{
struct Value *LexerValue;
if (LexGetToken(Lexer, &LexerValue) != TokenStringConstant)
ProgramFail(Lexer, "\"filename.h\" expected");
if (LexGetToken(Parser, &LexerValue, TRUE) != TokenStringConstant)
ProgramFail(Parser, "\"filename.h\" expected");
ScanFile(&LexerValue->Val->String);
LexToEndOfLine(Lexer);
break;
}
@ -560,11 +596,11 @@ int ParseStatement(struct LexState *Lexer, int RunIt)
case TokenBreak:
case TokenReturn:
case TokenDefault:
ProgramFail(Lexer, "not implemented yet");
ProgramFail(Parser, "not implemented yet");
break;
default:
*Lexer = PreState;
*Parser = PreState;
return FALSE;
}
@ -572,15 +608,15 @@ int ParseStatement(struct LexState *Lexer, int RunIt)
}
/* quick scan a source file for definitions */
void Parse(const Str *FileName, const Str *Source, int RunIt)
void Parse(const Str *FileName, const Str *Source, int SourceLen, int RunIt)
{
struct LexState Lexer;
struct ParseState Parser;
LexInit(&Lexer, Source, FileName, 1);
LexInit(&Parser, Source, SourceLen, FileName, 1);
while (ParseStatement(&Lexer, RunIt))
while (ParseStatement(&Parser, RunIt))
{}
if (Lexer.Pos != Lexer.End)
ProgramFail(&Lexer, "parse error");
if (Parser.Pos != Parser.End)
ProgramFail(&Parser, "parse error");
}

62
picoc.c
View file

@ -9,79 +9,63 @@
/* all platform-dependent code is in this file */
void Fail(const char *Message, ...)
{
va_list Args;
va_start(Args, Message);
vStrPrintf(Message, Args);
exit(1);
}
void ProgramFail(struct LexState *Lexer, const char *Message, ...)
void ProgramFail(struct ParseState *Parser, const char *Message, ...)
{
va_list Args;
if (Lexer != NULL)
StrPrintf("%S:%d: ", Lexer->FileName, Lexer->Line);
if (Parser != NULL)
printf("%s:%d: ", Parser->FileName, Parser->Line);
va_start(Args, Message);
vStrPrintf(Message, Args);
StrPrintf("\n");
vprintf(Message, Args);
printf("\n");
exit(1);
}
/* read a file into memory. this is the only function using malloc().
* do it differently for embedded devices without malloc */
Str ReadFile(const Str *FileName)
/* read a file into memory */
char *ReadFile(const char *FileName)
{
struct stat FileInfo;
char *ReadText;
Str Text;
FILE *InFile;
char CFileName[PATH_MAX];
StrToC(CFileName, PATH_MAX, FileName);
if (stat(FileName, &FileInfo))
ProgramFail(NULL, "can't read file %s\n", FileName);
if (stat(CFileName, &FileInfo))
Fail("can't read file %s\n", CFileName);
ReadText = malloc(FileInfo.st_size);
ReadText = HeapAlloc(FileInfo.st_size);
if (ReadText == NULL)
Fail("out of memory\n");
ProgramFail(NULL, "out of memory\n");
InFile = fopen(CFileName, "r");
InFile = fopen(FileName, "r");
if (InFile == NULL)
Fail("can't read file %s\n", CFileName);
ProgramFail(NULL, "can't read file %s\n", FileName);
if (fread(ReadText, 1, FileInfo.st_size, InFile) != FileInfo.st_size)
Fail("can't read file %s\n", CFileName);
ProgramFail(NULL, "can't read file %s\n", FileName);
Text.Str = ReadText;
Text.Len = FileInfo.st_size;
fclose(InFile);
return Text;
return ReadText;
}
/* read and scan a file for definitions */
void ScanFile(const Str *FileName)
void ScanFile(const char *FileName)
{
Str SourceStr = ReadFile(FileName);
Parse(FileName, &SourceStr, TRUE);
char *SourceStr = ReadFile(FileName);
Parse(FileName, SourceStr, TRUE);
HeapFree(SourceStr);
}
int main(int argc, char **argv)
{
Str FileName;
if (argc < 2)
Fail("Format: picoc <program.c> <args>...\n");
ProgramFail(NULL, "Format: picoc <program.c> <args>...\n");
HeapInit();
StrInit();
ParseInit();
StrFromC(&FileName, argv[1]);
ScanFile(&FileName);
ScanFile(argv[1]);
return 0;
}

127
picoc.h
View file

@ -5,15 +5,16 @@
/* configurable options */
#define HEAP_SIZE 2048 /* space for the heap and the stack */
#define GLOBAL_TABLE_SIZE 397 /* global variable table */
#define FUNCTION_STORE_MAX 200 /* maximum number of used-defined functions and macros */
#define STACK_MAX 10 /* maximum function call stack depth */
#define PARAMETER_MAX 10 /* maximum number of parameters to a function */
#define LOCAL_TABLE_SIZE 11 /* maximum number of local variables */
#define STRUCT_TABLE_SIZE 11 /* maximum number of struct/union members */
#define LARGE_INT_POWER_OF_TEN 1000000000 /* the largest power of ten which fits in an int on this architecture */
#define ARCH_ALIGN_WORDSIZE sizeof(int) /* memory alignment boundary on this architecture */
#define GLOBAL_TABLE_SIZE 397 /* global variable table */
#define STRING_TABLE_SIZE 97 /* shared string table size */
#define PARAMETER_MAX 10 /* maximum number of parameters to a function */
#define LINEBUFFER_MAX 256 /* maximum number of characters on a line */
#define LOCAL_TABLE_SIZE 11 /* size of local variable table (can expand) */
#define STRUCT_TABLE_SIZE 11 /* size of struct/union member table (can expand) */
/* handy definitions */
#ifndef TRUE
#define TRUE 1
@ -61,28 +62,13 @@ enum LexToken
TokenHashDefine, TokenHashInclude
};
/* string type so we can use source file strings */
typedef struct _Str
{
int Len;
const char *Str;
} Str;
/* lexer state - so we can lex nested files */
struct LexState
/* parser state - has all this detail so we can parse nested files */
struct ParseState
{
const void *Pos;
const void *End;
int Line;
const char *Pos;
const char *End;
const Str *FileName;
};
/* function definition - really just where it is in the source file */
struct FuncDef
{
Str Source;
Str FileName;
int StartLine;
const char *FileName;
};
/* values */
@ -103,18 +89,30 @@ enum BaseType
TypeType /* a type (eg. typedef) */
};
/* data type */
struct ValueType
{
enum BaseType Base; /* what kind of type this is */
int ArraySize; /* the size of an array type */
int Sizeof; /* the storage required */
Str Identifier; /* the name of a struct or union */
const char *Identifier; /* the name of a struct or union */
struct ValueType *FromType; /* the type we're derived from (or NULL) */
struct ValueType *DerivedTypeList; /* first in a list of types derived from this one */
struct ValueType *Next; /* next item in the derived type list */
struct Table *Members; /* members of a struct, union or enum */
};
/* function definition */
struct FuncDef
{
struct ValueType *ReturnType; /* the return value type */
int NumParams; /* the number of parameters */
struct Typ *ParamType; /* array of parameter types */
const char **ParamName; /* array of parameter names */
void (*Intrinsic)(); /* intrinsic call address or NULL */
struct ParseState Body; /* lexical tokens of the function body if not intrinsic */
};
struct ArrayValue
{
unsigned int Size; /* the number of elements in the array */
@ -136,11 +134,12 @@ union AnyValue
short ShortInteger;
int Integer;
double FP;
Str String;
char *String;
struct ArrayValue Array;
struct PointerValue Pointer;
struct LexState Lexer;
struct ParseState Parser;
struct ValueType *Typ;
struct FuncDef FuncDef;
};
struct Value
@ -154,7 +153,7 @@ struct Value
/* hash table data structure */
struct TableEntry
{
Str Key;
const char *Key;
struct Value *Val;
struct TableEntry *Next;
};
@ -169,7 +168,7 @@ struct Table
/* stack frame for function calls */
struct StackFrame
{
struct LexState ReturnLex; /* how we got here */
struct ParseState ReturnParser; /* how we got here */
struct Table LocalTable; /* the local variables and parameters */
struct TableEntry *LocalHashTable[LOCAL_TABLE_SIZE];
struct StackFrame *PreviousStackFrame; /* the next lower stack frame */
@ -188,49 +187,38 @@ extern struct ValueType FPType;
extern struct ValueType VoidType;
extern struct ValueType FunctionType;
extern struct ValueType MacroType;
extern Str StrEmpty;
/* str.c */
void StrToC(char *Dest, int DestSize, const Str *Source);
void StrFromC(Str *Dest, const char *Source);
int StrEqual(const Str *Str1, const Str *Str2);
int StrEqualC(const Str *Str1, const char *Str2);
void StrPrintf(const char *Format, ...);
void vStrPrintf(const char *Format, va_list Args);
/* picoc.c */
void Fail(const char *Message, ...);
void ProgramFail(struct LexState *Lexer, const char *Message, ...);
void ScanFile(const Str *FileName);
void ProgramFail(struct ParseState *Parser, const char *Message, ...);
void ScanFile(const char *FileName);
/* table.c */
void TableInit(struct Table *Tbl, struct TableEntry **HashTable, int Size, int OnHeap);
int TableSet(struct Table *Tbl, const Str *Key, struct Value *Val);
int TableGet(struct Table *Tbl, const Str *Key, struct Value **Val);
int TableSet(struct Table *Tbl, const char *Key, struct Value *Val);
int TableGet(struct Table *Tbl, const char *Key, struct Value **Val);
const char *TableSetKey(struct Table *Tbl, const char *Ident, int IdentLen);
/* lex.c */
void LexInit(struct LexState *Lexer, const Str *Source, const Str *FileName, int Line);
enum LexToken LexGetToken(struct LexState *Lexer, struct Value **Value);
enum LexToken LexGetPlainToken(struct LexState *Lexer);
enum LexToken LexPeekToken(struct LexState *Lexer, struct Value **Value);
enum LexToken LexPeekPlainToken(struct LexState *Lexer);
void LexToEndOfLine(struct LexState *Lexer);
void LexInit(struct ParseState *Parser, const char *Source, int SourceLen, const char *FileName, int Line);
enum LexToken LexGetToken(struct ParseState *Parser, struct Value **Value, int IncPos);
void LexToEndOfLine(struct ParseState *Parser);
/* parse.c */
void ParseInit(void);
int ParseExpression(struct LexState *Lexer, struct Value **Result, int ResultOnHeap, int RunIt);
int ParseIntExpression(struct LexState *Lexer, int RunIt);
void Parse(const Str *FileName, const Str *Source, int RunIt);
int ParseExpression(struct ParseState *Parser, struct Value **Result, int ResultOnHeap, int RunIt);
int ParseIntExpression(struct ParseState *Parser, int RunIt);
int ParseStatement(struct ParseState *Parser, int RunIt);
void Parse(const char *FileName, const char *Source, int SourceLen, int RunIt);
/* type.c */
void TypeInit();
int TypeSizeof(struct ValueType *Typ);
void TypeParse(struct LexState *Lexer, struct ValueType **Typ, Str *Identifier);
void TypeParse(struct ParseState *Parser, struct ValueType **Typ, const char **Identifier);
/* intrinsic.c */
void IntrinsicInit(struct Table *GlobalTable);
void IntrinsicGetLexer(struct LexState *Lexer, int IntrinsicId);
void IntrinsicCall(struct LexState *Lexer, struct Value *Result, struct ValueType *ReturnType, int IntrinsicId);
void IntrinsicGetLexer(struct ParseState *Parser, int IntrinsicId);
void IntrinsicCall(struct ParseState *Parser, struct Value *Result, struct ValueType *ReturnType, int IntrinsicId);
/* heap.c */
void HeapInit();
@ -243,15 +231,20 @@ void HeapFree(void *Mem);
/* variable.c */
void VariableInit();
void *VariableAlloc(struct LexState *Lexer, int Size, int OnHeap);
void VariableStackPop(struct LexState *Lexer, struct Value *Var);
struct Value *VariableAllocValueAndData(struct LexState *Lexer, int DataSize, int OnHeap);
struct Value *VariableAllocValueAndCopy(struct LexState *Lexer, struct Value *FromValue, int OnHeap);
struct Value *VariableAllocValueFromType(struct LexState *Lexer, struct ValueType *Typ, int OnHeap);
void VariableDefine(struct LexState *Lexer, const Str *Ident, struct Value *InitValue);
int VariableDefined(Str *Ident);
void VariableGet(struct LexState *Lexer, Str *Ident, struct Value **LVal);
void VariableStackFrameAdd(struct LexState *Lexer);
void VariableStackFramePop(struct LexState *Lexer);
void *VariableAlloc(struct ParseState *Parser, int Size, int OnHeap);
void VariableStackPop(struct ParseState *Parser, struct Value *Var);
struct Value *VariableAllocValueAndData(struct ParseState *Parser, int DataSize, int OnHeap);
struct Value *VariableAllocValueAndCopy(struct ParseState *Parser, struct Value *FromValue, int OnHeap);
struct Value *VariableAllocValueFromType(struct ParseState *Parser, struct ValueType *Typ, int OnHeap);
void VariableDefine(struct ParseState *Parser, const char *Ident, struct Value *InitValue);
int VariableDefined(const char *Ident);
void VariableGet(struct ParseState *Parser, const char *Ident, struct Value **LVal);
void VariableStackFrameAdd(struct ParseState *Parser);
void VariableStackFramePop(struct ParseState *Parser);
/* str.c */
void StrInit();
const char *StrRegister(const char *Str);
const char *StrRegister2(const char *Str, int Len);
#endif /* PICOC_H */

37
str.c
View file

@ -1,41 +1,29 @@
/* maintains a shared string table so we don't have to worry about string allocation */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <math.h>
#include "picoc.h"
Str StrEmpty = { 0, "" };
struct Table StringTable;
struct TableEntry *StringHashTable[STRING_TABLE_SIZE];
/* convert a Str to a C string */
void StrToC(char *Dest, int DestSize, const Str *Source)
/* initialise the shared string system */
void StrInit()
{
int CopyLen = min(DestSize-1, Source->Len);
memcpy(Dest, Source->Str, CopyLen);
Dest[CopyLen] = '\0';
TableInit(&StringTable, &StringHashTable[0], STRING_TABLE_SIZE, TRUE);
}
/* convert a C string to a Str */
void StrFromC(Str *Dest, const char *Source)
/* register a string in the shared string store */
const char *StrRegister2(const char *Str, int Len)
{
Dest->Str = Source;
Dest->Len = strlen(Source);
return TableSetKey(&StringTable, Str, Len);
}
/* compare two Strs for equality */
int StrEqual(const Str *Str1, const Str *Str2)
const char *StrRegister(const char *Str)
{
if (Str1->Len != Str2->Len)
return FALSE;
return memcmp(Str1->Str, Str2->Str, Str1->Len) == 0;
}
/* compare a Str to a C string */
int StrEqualC(const Str *Str1, const char *Str2)
{
return strncmp(Str1->Str, Str2, Str1->Len) == 0 && Str2[Str1->Len] == '\0';
return StrRegister2(Str, strlen(Str));
}
/* print an integer to a stream without using printf/sprintf */
@ -93,6 +81,7 @@ void StrPrintFP(double Num, FILE *Stream)
}
}
#if 0
/* Str version of printf */
void StrPrintf(const char *Format, ...)
{
@ -128,3 +117,5 @@ void vStrPrintf(const char *Format, va_list Args)
putchar(*FPos);
}
}
#endif

48
table.c
View file

@ -2,12 +2,12 @@
#include "picoc.h"
/* quick hash function */
static unsigned int TableHash(const Str *Key)
static unsigned int TableHash(const char *Key, int KeyLen)
{
if (Key->Len == 0)
if (KeyLen == 0)
return 0;
else
return ((*Key->Str << 24) | (Key->Str[Key->Len-1] << 16) | (Key->Str[Key->Len >> 1] << 8)) ^ Key->Len;
return ((*Key << 24) | (Key[KeyLen-1] << 16) | (Key[KeyLen >> 1] << 8)) ^ KeyLen;
}
/* initialise a table */
@ -20,14 +20,14 @@ void TableInit(struct Table *Tbl, struct TableEntry **HashTable, int Size, int O
}
/* check a hash table entry for a key */
static int TableSearch(struct Table *Tbl, const Str *Key, int *AddAt)
static int TableSearch(struct Table *Tbl, const char *Key, int Len, int *AddAt)
{
struct TableEntry *Entry;
int HashValue = TableHash(Key) % Tbl->Size;;
int HashValue = TableHash(Key, Len) % Tbl->Size;
for (Entry = Tbl->HashTable[HashValue]; Entry != NULL; Entry = Entry->Next)
{
if (StrEqual(&Entry->Key, Key))
if (strncmp(Entry->Key, Key, Len) == 0 && Entry->Key[Len] == '\0')
return HashValue; /* found */
}
@ -36,17 +36,16 @@ static int TableSearch(struct Table *Tbl, const Str *Key, int *AddAt)
}
/* set an identifier to a value. returns FALSE if it already exists */
int TableSet(struct Table *Tbl, const Str *Key, struct Value *Val)
int TableSet(struct Table *Tbl, const char *Key, struct Value *Val)
{
int HashPos;
int AddAt;
int KeyLen = strlen(Key);
int HashPos = TableSearch(Tbl, Key, KeyLen, &AddAt);
HashPos = TableSearch(Tbl, Key, &AddAt);
if (HashPos == -1)
{ /* add it to the table */
struct TableEntry *NewEntry = VariableAlloc(NULL, sizeof(struct TableEntry), Tbl->OnHeap);
NewEntry->Key = *Key;
NewEntry->Key = Key;
NewEntry->Val = Val;
NewEntry->Next = Tbl->HashTable[AddAt];
Tbl->HashTable[AddAt] = NewEntry;
@ -57,16 +56,33 @@ int TableSet(struct Table *Tbl, const Str *Key, struct Value *Val)
}
/* find a value in a table. returns FALSE if not found */
int TableGet(struct Table *Tbl, const Str *Key, struct Value **Val)
int TableGet(struct Table *Tbl, const char *Key, struct Value **Val)
{
int HashPos;
int AddAt;
HashPos = TableSearch(Tbl, Key, &AddAt);
int HashPos = TableSearch(Tbl, Key, strlen(Key), &AddAt);
if (HashPos == -1)
return FALSE;
*Val = Tbl->HashTable[HashPos]->Val;
return TRUE;
}
/* set an identifier and return the identifier. share if possible */
const char *TableSetKey(struct Table *Tbl, const char *Ident, int IdentLen)
{
int AddAt;
int HashPos = TableSearch(Tbl, Ident, IdentLen, &AddAt);
if (HashPos != -1)
return Tbl->HashTable[HashPos]->Key;
else
{ /* add it to the table */
struct TableEntry *NewEntry = HeapAlloc(sizeof(struct TableEntry) + IdentLen + 1);
NewEntry->Key = (void *)NewEntry + sizeof(struct TableEntry);
strncpy((char *)NewEntry->Key, Ident, IdentLen);
NewEntry->Val = NULL;
NewEntry->Next = Tbl->HashTable[AddAt];
Tbl->HashTable[AddAt] = NewEntry;
return NewEntry->Key;
}
}

View file

@ -18,7 +18,7 @@ void VariableInit()
}
/* allocate some memory, either on the heap or the stack and check if we've run out */
void *VariableAlloc(struct LexState *Lexer, int Size, int OnHeap)
void *VariableAlloc(struct ParseState *Parser, int Size, int OnHeap)
{
void *NewValue;
@ -28,15 +28,15 @@ void *VariableAlloc(struct LexState *Lexer, int Size, int OnHeap)
NewValue = HeapAllocStack(Size);
if (NewValue == NULL)
ProgramFail(Lexer, "out of memory");
ProgramFail(Parser, "out of memory");
return NewValue;
}
/* allocate a value either on the heap or the stack using space dependent on what type we want */
struct Value *VariableAllocValueAndData(struct LexState *Lexer, int DataSize, int OnHeap)
struct Value *VariableAllocValueAndData(struct ParseState *Parser, int DataSize, int OnHeap)
{
struct Value *NewValue = VariableAlloc(Lexer, DataSize, OnHeap);
struct Value *NewValue = VariableAlloc(Parser, DataSize, OnHeap);
NewValue->Val = (union AnyValue *)((void *)NewValue + sizeof(struct Value));
NewValue->ValOnHeap = OnHeap;
NewValue->ValOnStack = !OnHeap;
@ -45,31 +45,31 @@ struct Value *VariableAllocValueAndData(struct LexState *Lexer, int DataSize, in
}
/* allocate a value given its type */
struct Value *VariableAllocValueFromType(struct LexState *Lexer, struct ValueType *Typ, int OnHeap)
struct Value *VariableAllocValueFromType(struct ParseState *Parser, struct ValueType *Typ, int OnHeap)
{
struct Value *NewValue = VariableAllocValueAndData(Lexer, Typ->Sizeof, OnHeap);
struct Value *NewValue = VariableAllocValueAndData(Parser, Typ->Sizeof, OnHeap);
NewValue->Typ = Typ;
return NewValue;
}
/* allocate a value either on the heap or the stack and copy its value */
struct Value *VariableAllocValueAndCopy(struct LexState *Lexer, struct Value *FromValue, int OnHeap)
struct Value *VariableAllocValueAndCopy(struct ParseState *Parser, struct Value *FromValue, int OnHeap)
{
struct Value *NewValue = VariableAllocValueAndData(Lexer, FromValue->Typ->Sizeof, OnHeap);
struct Value *NewValue = VariableAllocValueAndData(Parser, FromValue->Typ->Sizeof, OnHeap);
NewValue->Typ = FromValue->Typ;
memcpy(NewValue->Val, FromValue->Val, FromValue->Typ->Sizeof);
return NewValue;
}
/* define a variable */
void VariableDefine(struct LexState *Lexer, const Str *Ident, struct Value *InitValue)
void VariableDefine(struct ParseState *Parser, const char *Ident, struct Value *InitValue)
{
if (!TableSet((TopStackFrame == NULL) ? &GlobalTable : &TopStackFrame->LocalTable, Ident, VariableAllocValueAndCopy(Lexer, InitValue, TopStackFrame == NULL)))
ProgramFail(Lexer, "'%S' is already defined", Ident);
if (!TableSet((TopStackFrame == NULL) ? &GlobalTable : &TopStackFrame->LocalTable, Ident, VariableAllocValueAndCopy(Parser, InitValue, TopStackFrame == NULL)))
ProgramFail(Parser, "'%S' is already defined", Ident);
}
/* check if a variable with a given name is defined */
int VariableDefined(Str *Ident)
int VariableDefined(const char *Ident)
{
struct Value *FoundValue;
@ -83,17 +83,17 @@ int VariableDefined(Str *Ident)
}
/* get the value of a variable. must be defined */
void VariableGet(struct LexState *Lexer, Str *Ident, struct Value **LVal)
void VariableGet(struct ParseState *Parser, const char *Ident, struct Value **LVal)
{
if (TopStackFrame == NULL || !TableGet(&TopStackFrame->LocalTable, Ident, LVal))
{
if (!TableGet(&GlobalTable, Ident, LVal))
ProgramFail(Lexer, "'%S' is undefined", Ident);
ProgramFail(Parser, "'%S' is undefined", Ident);
}
}
/* free and/or pop the top value off the stack. Var must be the top value on the stack! */
void VariableStackPop(struct LexState *Lexer, struct Value *Var)
void VariableStackPop(struct ParseState *Parser, struct Value *Var)
{
int Success;
@ -108,29 +108,29 @@ void VariableStackPop(struct LexState *Lexer, struct Value *Var)
Success = HeapPopStack(Var, sizeof(struct Value)); /* value isn't our problem */
if (!Success)
ProgramFail(Lexer, "stack underrun");
ProgramFail(Parser, "stack underrun");
}
/* add a stack frame when doing a function call */
void VariableStackFrameAdd(struct LexState *Lexer)
void VariableStackFrameAdd(struct ParseState *Parser)
{
struct StackFrame *NewFrame;
HeapPushStackFrame();
NewFrame = HeapAllocStack(sizeof(struct StackFrame));
NewFrame->ReturnLex = *Lexer;
NewFrame->ReturnParser = *Parser;
TableInit(&NewFrame->LocalTable, &NewFrame->LocalHashTable[0], LOCAL_TABLE_SIZE, FALSE);
NewFrame->PreviousStackFrame = TopStackFrame;
TopStackFrame = NewFrame;
}
/* remove a stack frame */
void VariableStackFramePop(struct LexState *Lexer)
void VariableStackFramePop(struct ParseState *Parser)
{
if (TopStackFrame == NULL)
ProgramFail(Lexer, "stack is empty - can't go back");
ProgramFail(Parser, "stack is empty - can't go back");
TopStackFrame = TopStackFrame->PreviousStackFrame;
*Lexer = TopStackFrame->ReturnLex;
*Parser = TopStackFrame->ReturnParser;
HeapPopStackFrame();
}