214 lines
9.7 KiB
C#
214 lines
9.7 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Threading.Tasks;
|
|
|
|
namespace GASi {
|
|
class Lexer {
|
|
private enum kTokenState {
|
|
UNSURE, ALPHA, NUMERIC, OPERATOR, PREPROCESSOR, RAW, STRING
|
|
}
|
|
|
|
private static string[] Scopes = { "global", "static", "local" };
|
|
private static string[] Types = { "string", "date", "bool", "float", "int", "void" };
|
|
private static string[] Controls = { "if", "else", "elseif",
|
|
"switch", "case", "default",
|
|
"for", "while" };
|
|
private static string[] Keywords = { "return", "ref", "break" };
|
|
private static string[] Operators = { "+", "-", "*", "/", "%",
|
|
"==", "!=", ">", ">=", "<", "<=", "&&", "||",
|
|
"&", "|", "^", "~",
|
|
"++", "--" };
|
|
private static string[] Assigners = { "=", "+=", "-=", "*=", "/=" };
|
|
|
|
private static string Significants = null;
|
|
|
|
private static char Peek(int column, string line) {
|
|
return (column + 1 >= line.Length) ? '\0' : line[column + 1];
|
|
}
|
|
|
|
private static char Rewind(int column, string line) {
|
|
return (column - 1 < 0) ? '\0' : line[column - 1];
|
|
}
|
|
|
|
public static IEnumerable<Token> Interpret(string[] rawProgramLines) {
|
|
List<Token> tokenList = new List<Token>();
|
|
Token current = new Token();
|
|
kTokenState state = kTokenState.UNSURE;
|
|
var rawStartLine = -0xFF;
|
|
|
|
for(var line = 0; line < rawProgramLines.Length; ++line) {
|
|
var rawProgramLine = rawProgramLines[line];
|
|
bool firstCharFound = false;
|
|
var rawStartCol = -0xFF;
|
|
|
|
for(var column = 0; column < rawProgramLine.Length; ++column) {
|
|
var glyph = rawProgramLine[column];
|
|
var curWord = current.Value;
|
|
var tmpWord = current.Value + glyph;
|
|
bool rerunGlyph = false;
|
|
|
|
if(state == kTokenState.UNSURE) {
|
|
// ignore tabs and spaces when nothing is being interpreted yet
|
|
if(glyph == 0x20 || glyph == 0x09)
|
|
continue;
|
|
|
|
if(glyph == '/' && Peek(column, rawProgramLine) == '/')
|
|
break;
|
|
else if(glyph == '#' && !firstCharFound)
|
|
state = kTokenState.PREPROCESSOR;
|
|
else if(glyph == ':' && !firstCharFound && Peek(column, rawProgramLine) == ':') {
|
|
state = kTokenState.RAW;
|
|
rawStartCol = column;
|
|
rawStartLine = line;
|
|
} else if(glyph == '"')
|
|
state = kTokenState.STRING;
|
|
else if(IsAlpha(glyph))
|
|
state = kTokenState.ALPHA;
|
|
else if(IsNumeric(glyph))
|
|
state = kTokenState.NUMERIC;
|
|
else if(IsSignificant(glyph))
|
|
state = kTokenState.OPERATOR;
|
|
else
|
|
throw Transpiler.Exception("Unexpected glyph " + glyph, line, column);
|
|
|
|
firstCharFound = true;
|
|
}
|
|
|
|
switch(state) {
|
|
case kTokenState.RAW:
|
|
if(glyph == ':' && Rewind(column, rawProgramLine) == ':' && rawStartCol != column - 1)
|
|
current.Type = Token.kType.RAW;
|
|
else if(column == 0 && line != rawStartLine)
|
|
current.Value += '\n';
|
|
break;
|
|
case kTokenState.ALPHA:
|
|
if(!IsAlpha(glyph) && !IsNumeric(glyph)) {
|
|
if(Scopes.Contains(curWord.Trim()))
|
|
current.Type = Token.kType.SCOPE;
|
|
else if(Types.Contains(curWord.Trim()))
|
|
current.Type = Token.kType.TYPE;
|
|
else if(Controls.Contains(curWord.Trim()))
|
|
current.Type = Token.kType.CONTROL;
|
|
else if(Keywords.Contains(curWord.Trim()))
|
|
current.Type = Token.kType.KEYWORD;
|
|
else
|
|
current.Type = Token.kType.IDENTIFIER;
|
|
|
|
rerunGlyph = true;
|
|
}
|
|
break;
|
|
case kTokenState.NUMERIC:
|
|
if(!IsNumeric(glyph) && !(glyph == '.' && !current.Value.Contains('.'))) {
|
|
current.Type = Token.kType.NUMBER;
|
|
rerunGlyph = true;
|
|
}
|
|
break;
|
|
case kTokenState.PREPROCESSOR:
|
|
if(!IsAlpha(glyph) && !(glyph == '#' && curWord == "")) {
|
|
current.Type = Token.kType.PREPROC;
|
|
rerunGlyph = true;
|
|
}
|
|
break;
|
|
case kTokenState.OPERATOR:
|
|
if("[]".Contains(glyph))
|
|
current.Type = glyph == '[' ? Token.kType.LBRACKET : Token.kType.RBRACKET;
|
|
else if("()".Contains(glyph))
|
|
current.Type = glyph == '(' ? Token.kType.LPAREN : Token.kType.RPAREN;
|
|
else if("{}".Contains(glyph))
|
|
current.Type = glyph == '{' ? Token.kType.LBRACE : Token.kType.RBRACE;
|
|
else {
|
|
switch(glyph) {
|
|
case '.':
|
|
current.Type = Token.kType.PERIOD;
|
|
break;
|
|
case ',':
|
|
current.Type = Token.kType.COMMA;
|
|
break;
|
|
case ';':
|
|
current.Type = Token.kType.SEMICOL;
|
|
break;
|
|
default:
|
|
if(Operators.Contains(curWord) && !Operators.Contains(tmpWord) && !Assigners.Contains(tmpWord)) {
|
|
current.Type = Token.kType.OPERATOR;
|
|
rerunGlyph = true;
|
|
} else if(Assigners.Contains(curWord) && !Assigners.Contains(tmpWord) && !Operators.Contains(tmpWord)) {
|
|
current.Type = Token.kType.ASSIGNER;
|
|
rerunGlyph = true;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
case kTokenState.STRING:
|
|
// TODO determine if you can escape double quotes in gas using \"
|
|
if(curWord != "") {
|
|
if((glyph == '"' && !curWord.EndsWith("\\")) || column == rawProgramLine.Length - 1)
|
|
current.Type = Token.kType.STRING;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if(!rerunGlyph)
|
|
current.Value += glyph;
|
|
else
|
|
--column;
|
|
|
|
if(current.Type != Token.kType.UNDECIDED) {
|
|
current.Value = current.Value.Trim();
|
|
tokenList.Add(current);
|
|
current = new Token();
|
|
state = kTokenState.UNSURE;
|
|
}
|
|
}
|
|
}
|
|
|
|
return tokenList;
|
|
}
|
|
|
|
private static bool IsAlpha(char glyph) {
|
|
return (glyph >= 0x41 && glyph <= 0x5A) || (glyph >= 0x61 && glyph <= 0x7A);
|
|
}
|
|
|
|
private static bool IsNumeric(char glyph) {
|
|
return glyph >= 0x30 && glyph <= 0x39;
|
|
}
|
|
|
|
private static bool IsSignificant(char glyph) {
|
|
AssembleSignificantCharactersTable();
|
|
return Significants.Contains(glyph);
|
|
}
|
|
|
|
private static void AssembleSignificantCharactersTable() {
|
|
if(Significants != null)
|
|
return;
|
|
|
|
Significants = "[]{}(),.;";
|
|
List<string> mergedList = Operators.ToList();
|
|
mergedList.AddRange(Assigners.ToList());
|
|
foreach(var op in mergedList) {
|
|
foreach(var glyph in op) {
|
|
if(!Significants.Contains(glyph))
|
|
Significants += glyph;
|
|
}
|
|
}
|
|
}
|
|
|
|
public class Token {
|
|
public kType Type { get; set; } = kType.UNDECIDED;
|
|
public string Value { get; set; } = "";
|
|
|
|
public enum kType {
|
|
UNDECIDED,
|
|
PREPROC, RAW,
|
|
SCOPE, TYPE, IDENTIFIER,
|
|
OPERATOR, ASSIGNER, CONTROL, KEYWORD,
|
|
LPAREN, RPAREN, LBRACKET, RBRACKET, LBRACE, RBRACE,
|
|
PERIOD, COMMA, SEMICOL,
|
|
NUMBER, STRING, BOOL
|
|
}
|
|
}
|
|
}
|
|
}
|