using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace GASi { class Lexer { private enum kTokenState { UNSURE, ALPHA, NUMERIC, OPERATOR, PREPROCESSOR, RAW, STRING } private static string[] Scopes = { "global", "static", "local" }; private static string[] Types = { "string", "date", "bool", "float", "int", "void" }; private static string[] Controls = { "if", "else", "elseif", "switch", "case", "default", "for", "while" }; private static string[] Keywords = { "return", "ref", "break" }; private static string[] Operators = { "+", "-", "*", "/", "%", "==", "!=", ">", ">=", "<", "<=", "&&", "||", "&", "|", "^", "~", "++", "--" }; private static string[] Assigners = { "=", "+=", "-=", "*=", "/=" }; private static string Significants = null; private static char Peek(int column, string line) { return (column + 1 >= line.Length) ? '\0' : line[column + 1]; } private static char Rewind(int column, string line) { return (column - 1 < 0) ? '\0' : line[column - 1]; } public static IEnumerable Interpret(string[] rawProgramLines) { List tokenList = new List(); Token current = new Token(); kTokenState state = kTokenState.UNSURE; var rawStartLine = -0xFF; for(var line = 0; line < rawProgramLines.Length; ++line) { var rawProgramLine = rawProgramLines[line]; bool firstCharFound = false; var rawStartCol = -0xFF; for(var column = 0; column < rawProgramLine.Length; ++column) { var glyph = rawProgramLine[column]; var curWord = current.Value; var tmpWord = current.Value + glyph; bool rerunGlyph = false; if(state == kTokenState.UNSURE) { // ignore tabs and spaces when nothing is being interpreted yet if(glyph == 0x20 || glyph == 0x09) continue; if(glyph == '/' && Peek(column, rawProgramLine) == '/') break; else if(glyph == '#' && !firstCharFound) state = kTokenState.PREPROCESSOR; else if(glyph == ':' && !firstCharFound && Peek(column, rawProgramLine) == ':') { state = kTokenState.RAW; rawStartCol = column; rawStartLine = line; } else if(glyph == '"') state = kTokenState.STRING; else if(IsAlpha(glyph)) state = kTokenState.ALPHA; else if(IsNumeric(glyph)) state = kTokenState.NUMERIC; else if(IsSignificant(glyph)) state = kTokenState.OPERATOR; else throw Transpiler.Exception("Unexpected glyph " + glyph, line, column); firstCharFound = true; } switch(state) { case kTokenState.RAW: if(glyph == ':' && Rewind(column, rawProgramLine) == ':' && rawStartCol != column - 1) current.Type = Token.kType.RAW; else if(column == 0 && line != rawStartLine) current.Value += '\n'; break; case kTokenState.ALPHA: if(!IsAlpha(glyph) && !IsNumeric(glyph)) { if(Scopes.Contains(curWord.Trim())) current.Type = Token.kType.SCOPE; else if(Types.Contains(curWord.Trim())) current.Type = Token.kType.TYPE; else if(Controls.Contains(curWord.Trim())) current.Type = Token.kType.CONTROL; else if(Keywords.Contains(curWord.Trim())) current.Type = Token.kType.KEYWORD; else current.Type = Token.kType.IDENTIFIER; rerunGlyph = true; } break; case kTokenState.NUMERIC: if(!IsNumeric(glyph) && !(glyph == '.' && !current.Value.Contains('.'))) { current.Type = Token.kType.NUMBER; rerunGlyph = true; } break; case kTokenState.PREPROCESSOR: if(!IsAlpha(glyph) && !(glyph == '#' && curWord == "")) { current.Type = Token.kType.PREPROC; rerunGlyph = true; } break; case kTokenState.OPERATOR: if("[]".Contains(glyph)) current.Type = glyph == '[' ? Token.kType.LBRACKET : Token.kType.RBRACKET; else if("()".Contains(glyph)) current.Type = glyph == '(' ? Token.kType.LPAREN : Token.kType.RPAREN; else if("{}".Contains(glyph)) current.Type = glyph == '{' ? Token.kType.LBRACE : Token.kType.RBRACE; else { switch(glyph) { case '.': current.Type = Token.kType.PERIOD; break; case ',': current.Type = Token.kType.COMMA; break; case ';': current.Type = Token.kType.SEMICOL; break; default: if(Operators.Contains(curWord) && !Operators.Contains(tmpWord) && !Assigners.Contains(tmpWord)) { current.Type = Token.kType.OPERATOR; rerunGlyph = true; } else if(Assigners.Contains(curWord) && !Assigners.Contains(tmpWord) && !Operators.Contains(tmpWord)) { current.Type = Token.kType.ASSIGNER; rerunGlyph = true; } break; } } break; case kTokenState.STRING: // TODO determine if you can escape double quotes in gas using \" if(curWord != "") { if((glyph == '"' && !curWord.EndsWith("\\")) || column == rawProgramLine.Length - 1) current.Type = Token.kType.STRING; } break; } if(!rerunGlyph) current.Value += glyph; else --column; if(current.Type != Token.kType.UNDECIDED) { current.Value = current.Value.Trim(); tokenList.Add(current); current = new Token(); state = kTokenState.UNSURE; } } } return tokenList; } private static bool IsAlpha(char glyph) { return (glyph >= 0x41 && glyph <= 0x5A) || (glyph >= 0x61 && glyph <= 0x7A); } private static bool IsNumeric(char glyph) { return glyph >= 0x30 && glyph <= 0x39; } private static bool IsSignificant(char glyph) { AssembleSignificantCharactersTable(); return Significants.Contains(glyph); } private static void AssembleSignificantCharactersTable() { if(Significants != null) return; Significants = "[]{}(),.;"; List mergedList = Operators.ToList(); mergedList.AddRange(Assigners.ToList()); foreach(var op in mergedList) { foreach(var glyph in op) { if(!Significants.Contains(glyph)) Significants += glyph; } } } public class Token { public kType Type { get; set; } = kType.UNDECIDED; public string Value { get; set; } = ""; public enum kType { UNDECIDED, PREPROC, RAW, SCOPE, TYPE, IDENTIFIER, OPERATOR, ASSIGNER, CONTROL, KEYWORD, LPAREN, RPAREN, LBRACKET, RBRACKET, LBRACE, RBRACE, PERIOD, COMMA, SEMICOL, NUMBER, STRING, BOOL } } } }