some basic parsing

This commit is contained in:
lachrymaLF 2025-04-20 19:56:18 -04:00
parent 0bae4381c8
commit 71373df9d5
6 changed files with 349 additions and 34 deletions

View file

@ -1,9 +1,9 @@
// This new syntax should be easier to parse although parsing wasn't really the issue with the last syntax
// I really want the syntax to feel a lot more like the actual script for a play or something of the sort
# This new syntax should be easier to parse although parsing wasn't really the issue with the last syntax
# I really want the syntax to feel a lot more like the actual script for a play or something of the sort
// THINK ABOUT THIS Newlines are the end of a statement in this syntax, I think this mirrors text formatting pretty well
# THINK ABOUT THIS Newlines are the end of a statement in this syntax, I think this mirrors text formatting pretty well
// I find that the "BEGIN" things in some older languages are actually quite fitting
# I find that the "BEGIN" things in some older languages are actually quite fitting
BEGIN Scene1
Command "test"
@ -11,16 +11,16 @@ Do 5 [2, 3, 6]
Set var 2
// Make the actual commands like Lisp, not the entire language
// strip the outermost brackets since this syntax will be parsed line by line
# Make the actual commands like Lisp, not the entire language
# strip the outermost brackets since this syntax will be parsed line by line
Set var2 (=? var 1)
Set var3 (+ var var2)
// Enter and exit dialogue mode with <<- and ->>
# Enter and exit dialogue mode with <<- and ->>
<<-
// Comments are only legal at the beginning like this in Dialogue mode
// IDEA: once NVL is parsed, set up indices for each line
// Make another program to match audio
# Comments are only legal at the beginning like this in Dialogue mode
# IDEA: once NVL is parsed, set up indices for each line
# Make another program to match audio
[Alex]
Hello. Welcome to dialogue mode.
Every new line is a new "click". I discarded the "pause" idea.
@ -45,7 +45,7 @@ To grab a value from the environment, do it like this: ${var}.
This is also the syntax to evaluate a command from dialogue mode.
If the return is void it will say "undefined" or something like that.
*! This "is a command in dialogue mode."
*! This "is a command in dialogue mode."
*! Set var (+ var 1)
[NARRATION]
@ -55,13 +55,13 @@ Thanks!
->>
// something like Choice can load the index of the choice the user selects into a variable
# something like Choice can load the index of the choice the user selects into a variable
Choice var4 ["Apple", "Orange"]
// After thinking about this for a while I think it would be a good idea
// to have ways to terminate a scene other than END, for instance JUMP can be an alternative like this:
// JUMP Scene2
// we can equally do something like this
// JUMP (switch var4 [[0, RouteA], [1, RouteB], [default, RouteC]])
# After thinking about this for a while I think it would be a good idea
# to have ways to terminate a scene other than END, for instance JUMP can be an alternative like this:
# JUMP Scene2
# we can equally do something like this
# JUMP (switch var4 [[0, RouteA], [1, RouteB], [default, RouteC]])
END

View file

@ -4,7 +4,7 @@ pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const lib_mod = b.createModule(.{
.root_source_file = b.path("src/NouVeL.zig"),
.root_source_file = b.path("src/parser.zig"),
.target = target,
.optimize = optimize,
});

View file

@ -1,6 +0,0 @@
const std = @import("std");
const parser = @import("parser.zig");
pub export fn add(a: i32, b: i32) i32 {
return a + b;
}

View file

@ -1,4 +1,5 @@
const lib = @import("NouVeL_lib");
const std = @import("std");
const nvl = @import("NouVeL_lib");
const c = @cImport({
@cDefine("SDL_DISABLE_OLD_NAMES", {});
@cInclude("SDL3/SDL.h");
@ -11,7 +12,6 @@ const c = @cImport({
pub fn main() u8 {
c.SDL_SetMainReady();
const status = c.SDL_EnterAppMainCallbacks(0, null, init, iterate, on_event, quit);
return @bitCast(@as(i8, @truncate(status)));
}
@ -33,6 +33,19 @@ fn init(appstate: ?*?*anyopaque, argc: c_int, argv: ?[*:null]?[*:0]u8) callconv(
c.SDL_Log("Couldn't create window/renderer: %s", c.SDL_GetError());
return c.SDL_APP_FAILURE;
}
// var gpa = std.heap.GeneralPurposeAllocator(.{}){};
// const ally = gpa.allocator();
// defer {
// const deinit_status = gpa.deinit();
// if (deinit_status == .leak) @panic("TEST FAIL");
// }
// var list = std.ArrayList(nvl.Command).init(ally);
// defer list.deinit();
// @embedFile("test.nvl")
nvl.parse() catch {};
return c.SDL_APP_CONTINUE;
}

View file

@ -1,9 +1,4 @@
const Object = union(enum) {
symbol: []u8,
number: f32,
string: []u8,
array: []Object,
};
const std = @import("std");
const numeric = "1234567890";
const decimal_dot = ".";
@ -15,7 +10,7 @@ const array_delim = ",";
const group_open = "(";
const group_close = ")";
const quote = "\\";
const comment_begin = "#";
const comment_begin = '#';
const dialogue_open = "<<-";
const dialogue_close = "->>";
const begin = "BEGIN";
@ -24,7 +19,7 @@ const symbol = alpha ++ numeric ++ "_";
const special_symbols = .{ "+", "-", "*", "/", "=?", ">?", "<?", "<=?", ">=?" };
const ws = " \t\r\n"; // there was also \v and \f in C++
const separator = ws ++ array_open ++ array_close ++ group_open ++ group_close ++ array_delim ++ comment_begin;
const newline = "\n";
const newline = '\n';
const escaped = "\\\"";
const escape = "\\";
@ -41,3 +36,249 @@ const template_close = "}";
const command_escape = "*!";
const dialogue_escaped_single = escape ++ markup_open ++ markup_close ++ markup_text_open ++ markup_text_close ++ template_ind;
fn Parse(comptime T: type) type {
return ?struct {
parse: T,
rest: []const u8,
};
}
fn parsers_fields(comptime T: anytype) []const std.builtin.Type.StructField {
return @typeInfo(@TypeOf(T)).@"struct".fields;
}
fn alternative_tagged(comptime parsers: anytype) type {
const fields = parsers_fields(parsers);
std.debug.assert(fields.len > 1);
var out_types: [fields.len]std.builtin.Type.UnionField = undefined;
inline for (fields, 0..) |field, i| {
const name = @typeName(@field(parsers, field.name));
const last_dot = std.mem.indexOf(u8, name, ".") orelse -1;
const parser_out = @typeInfo(@TypeOf(@field(parsers, field.name).parse)).@"fn".return_type.?;
const parse_struct = @typeInfo(parser_out).optional.child;
const out = @typeInfo(parse_struct).@"struct".fields[0];
std.debug.assert(std.mem.eql(u8, out.name, "parse"));
out_types[i] = .{
.name = name[last_dot + 1..],
.type = out.type,
.alignment = 0,
};
}
const out_union = @Type(.{ .@"union" = .{
.layout = .auto,
.tag_type = null,
.fields = &out_types,
.decls = &.{}
}});
return struct {
fn parse(buf: []const u8) Parse(out_union) {
inline for (fields) |field| {
return @field(parsers, field.name).parse(buf) orelse {
continue;
};
}
return null;
}
};
}
fn alternative(comptime parsers: anytype) type {
const fields = parsers_fields(parsers);
std.debug.assert(fields.len > 1);
var out_types: [fields.len]std.builtin.Type.UnionField = undefined;
var unique_out = 0;
outer: inline for (fields) |field| {
const parser_out = @typeInfo(@TypeOf(@field(parsers, field.name).parse)).@"fn".return_type.?;
const parse_struct = @typeInfo(parser_out).optional.child;
const out = @typeInfo(parse_struct).@"struct".fields[0];
std.debug.assert(std.mem.eql(u8, out.name, "parse"));
for (0..unique_out) |i| {
if (out_types[i].type == out.type) {
continue :outer;
}
}
out_types[unique_out] = .{
.name = @typeName(out.type),
.type = out.type,
.alignment = 0,
};
unique_out += 1;
}
const combinator_out = if (unique_out == 1) out_types[0].type else @Type(.{ .@"union" = .{
.layout = .auto,
.tag_type = null,
.fields = out_types[0..unique_out],
.decls = &.{}
}});
return struct {
fn parse(buf: []const u8) Parse(combinator_out) {
inline for (fields) |field| {
return @field(parsers, field.name).parse(buf) orelse {
continue;
};
}
return null;
}
};
}
fn sequence(comptime parsers: anytype) type {
const fields = parsers_fields(parsers);
std.debug.assert(fields.len > 1);
const parser_out = @typeInfo(@TypeOf(parsers.@"0".parse)).@"fn".return_type.?;
const parse_struct = @typeInfo(parser_out).optional.child;
const out = @typeInfo(parse_struct).@"struct".fields[0];
std.debug.assert(std.mem.eql(u8, out.name, "parse"));
return struct {
fn parse(buf: []const u8) Parse(out.type) {
var rest = buf;
inline for (fields) |field| {
if (@field(parsers, field.name).parse(rest)) |p| {
rest = p.rest;
}
else return null;
}
return .{ .parse = buf[0..buf.len - rest.len], .rest = rest };
}
};
}
fn LiteralParser(comptime s: []const u8) type {
return struct {
fn parse(buf: []const u8) Parse([]const u8) {
return if (std.mem.startsWith(u8, buf, s)) .{
.parse = s, .rest = buf[s.len..]
} else null;
}
};
}
fn AnyParser(comptime cs: []const u8) type {
return struct {
fn parse(buf: []const u8) Parse([]const u8) {
for (0..buf.len) |i| {
if (std.mem.containsAtLeastScalar(u8, cs, 1, buf[i])) {
continue;
} else {
return .{ .parse = buf[0..i], .rest = buf[i..]};
}
}
return .{ .parse = buf, .rest = &[0]u8{}};
}
};
}
const IntParser = AnyParser(numeric);
const FloatParser = sequence(.{ IntParser, LiteralParser(decimal_dot), IntParser });
// const SymbolParser = struct {
// fn parse(buf: []u8) Parse([]u8) {
// return .{ .parse = 0, .rest = buf };
// }
// };
// const NumberParser = struct {
// fn parse(buf: []u8) Parse([]u8) {
// return .{ .parse = 0, .rest = buf };
// }
// };
// const asdf = sequence(.{ ParseSymbol, ParseNumber });
// fn skip_whitespace(buf: []const u8) usize {
// for (0..buf.len) |i| {
// if (std.mem.containsAtLeastScalar(u8, ws, 1, buf[i])) {
// continue;
// } else {
// return i;
// }
// }
// return buf.len;
// }
// fn match_scalar(buf: []const u8, match: u8) ?[]const u8 {
// const i = skip_whitespace(buf);
// if (buf.len > i and buf[i] == match) {
// return buf[i + 1..];
// } else {
// return null;
// }
// }
// fn match_sequence(buf: []const u8, match: []const u8) ?[]const u8 {
// const i = skip_whitespace(buf);
// if (buf.len > i and std.mem.startsWith(u8, buf[i..], match)) {
// return buf[i + match.len..];
// } else {
// return null;
// }
// }
// fn match_comment(buf: []const u8) ?[]const u8 {
// return match_scalar(buf, '#');
// }
// fn parse_symbol() !Object {
// }
// fn parse_args(buf: []const u8) ![]Object {
// _ = buf;
// }
// fn parse_command(buf: []const u8) !Command {
// _ = buf;
// return ParseError.BadParse;
// }
const WhitespaceParser = AnyParser(ws);
pub fn parse() !void {
if (comptime FloatParser.parse("23.45325asdasd")) |p| {
std.debug.print("parse: {s}, rest: {s}\n", .{p.parse, p.rest});
}
if (comptime IntParser.parse("114514 errr...")) |p| {
std.debug.print("parse: {s}, rest: {s}\n", .{p.parse, p.rest});
}
// var dialogue = false;
// var it = std.mem.tokenizeScalar(u8, nvl, newline);
// while (it.next()) |line| {
// _ = match_comment(line) orelse {
// if (dialogue) {
// _ = match_sequence(line, dialogue_close) orelse {
// dialogue = false;
// continue;
// };
// if (match_sequence(line, command_escape)) |cmd| {
// try commands.append(try parse_command(cmd));
// } else {
// return ParseError.BadParse;
// }
// }
// else {
// _ = match_sequence(line, dialogue_open) orelse {
// dialogue = true;
// continue;
// };
// try commands.append(try parse_command(line));
// }
// };
// }
}

67
src/test.nvl Normal file
View file

@ -0,0 +1,67 @@
# This new syntax should be easier to parse although parsing wasn't really the issue with the last syntax
# I really want the syntax to feel a lot more like the actual script for a play or something of the sort
# THINK ABOUT THIS Newlines are the end of a statement in this syntax, I think this mirrors text formatting pretty well
# I find that the "BEGIN" things in some older languages are actually quite fitting
BEGIN Scene1
Command "test"
Do 5 [2, 3, 6]
Set var 2
# Make the actual commands like Lisp, not the entire language
# strip the outermost brackets since this syntax will be parsed line by line
Set var2 (=? var 1)
Set var3 (+ var var2)
# Enter and exit dialogue mode with <<- and ->>
<<-
# Comments are only legal at the beginning like this in Dialogue mode
# IDEA: once NVL is parsed, set up indices for each line
# Make another program to match audio
[Alex]
Hello. Welcome to dialogue mode.
Every new line is a new "click". I discarded the "pause" idea.
[Bailey]
Je ne suis plus Alex.
Ça ne m'arrête pas de vous démontrer notre support multilingue noblement distingué.
[Catherine]
CJKテスト
夏はマシンガン。
[Dick]
I present my idea for markup syntax here.
It is something like [b,i]{this}.
It's kind of like KP's TyperTags, only that you can specify more than one effect in the square brackets in front.
If the effect is parametrized in some way invoke it like [wiggle_y(5)]{this}.
Then naturally we would have ruby like [rb("that")]{this} (important feature, trust me).
[Elliot]
To grab a value from the environment, do it like this: ${var}.
This is also the syntax to evaluate a command from dialogue mode.
If the return is void it will say "undefined" or something like that.
*! This "is a command in dialogue mode."
*! Set var (+ var 1)
[NARRATION]
Thanks!
*! ClearDialog
->>
# something like Choice can load the index of the choice the user selects into a variable
Choice var4 ["Apple", "Orange"]
# After thinking about this for a while I think it would be a good idea
# to have ways to terminate a scene other than END, for instance JUMP can be an alternative like this:
# JUMP Scene2
# we can equally do something like this
# JUMP (switch var4 [[0, RouteA], [1, RouteB], [default, RouteC]])
END