diff --git a/.editorconfig b/.editorconfig
index b3102b7..beced30 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -15,12 +15,12 @@ indent_size = 2
indent_style = space
indent_size = 2
-[*.rs]
-indent_style = space
-indent_size = 4
-
[*.{c,cc,h}]
indent_style = space
+indent_size = 2
+
+[*.rs]
+indent_style = space
indent_size = 4
[*.{py,pyi}]
diff --git a/.gitattributes b/.gitattributes
index ffb52ab..78fcbf8 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -4,6 +4,9 @@ src/*.json linguist-generated
src/parser.c linguist-generated
src/tree_sitter/* linguist-generated
+src/schema.generated.c linguist-generated
+schema/src/** linguist-generated
+
bindings/** linguist-generated
binding.gyp linguist-generated
setup.py linguist-generated
diff --git a/Makefile b/Makefile
index 6abb10a..0944cb0 100644
--- a/Makefile
+++ b/Makefile
@@ -27,11 +27,13 @@ INCLUDEDIR ?= $(PREFIX)/include
LIBDIR ?= $(PREFIX)/lib
PCLIBDIR ?= $(LIBDIR)/pkgconfig
-# object files
-OBJS := $(patsubst %.c,%.o,$(wildcard $(SRC_DIR)/*.c))
+# source/object files
+PARSER := src/parser.c
+SCANNER := src/scanner.c
+OBJS := $(patsubst %.c,%.o,$(PARSER) $(SCANNER))
# flags
-ARFLAGS := rcs
+ARFLAGS ?= rcs
override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC
# OS-specific bits
@@ -81,8 +83,8 @@ $(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in
-e 's|=$(PREFIX)|=$${prefix}|' \
-e 's|@PREFIX@|$(PREFIX)|' $< > $@
-$(SRC_DIR)/parser.c: grammar.js
- $(TS) generate --no-bindings
+$(PARSER): $(SRC_DIR)/grammar.json
+ $(TS) generate --no-bindings $^
install: all
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
diff --git a/grammar-schema.js b/grammar-schema.js
deleted file mode 100644
index bbb0389..0000000
--- a/grammar-schema.js
+++ /dev/null
@@ -1,25 +0,0 @@
-// Ref: https://yaml.org/spec/1.2/spec.html#schema/core/
-
-module.exports = grammar({
- name: "schema",
- extras: $ => [],
- rules: {
- pln: $ => choice($.nul, $.bol, $.int, $.flt),
- nul: $ => /~|null|Null|NULL/,
- bol: $ => /true|True|TRUE|false|False|FALSE/,
- int: $ => or([
- /[-+]?[0-9]+/, // base 10
- /0o[0-7]+/, // base 8
- /0x[0-9a-fA-F]+/, // base 16
- ]),
- flt: $ => or([
- /[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?/, // number
- /[-+]?(\.inf|\.Inf|\.INF)/, // infinity
- /(\.nan|\.NaN|\.NAN)/, // not a number
- ]),
- },
-});
-
-function or(regexList) {
- return new RegExp(regexList.map(regex => `(${regex.source})`).join('|'));
-}
diff --git a/package.json b/package.json
index d371c9e..d2fb394 100644
--- a/package.json
+++ b/package.json
@@ -29,7 +29,8 @@
"prebuilds/**",
"bindings/node/*",
"queries/*",
- "src/**"
+ "src/**",
+ "schema/**"
],
"dependencies": {
"node-addon-api": "^8.0.0",
@@ -49,6 +50,7 @@
},
"scripts": {
"build": "tree-sitter generate --no-bindings",
+ "postbuild": "node schema/update-schema.js",
"test": "tree-sitter test",
"install": "node-gyp-build",
"prebuildify": "prebuildify --napi --strip"
@@ -63,7 +65,10 @@
"yml",
"yaml"
],
- "injection-regex": "^ya?ml$"
+ "injection-regex": "^ya?ml$",
+ "external-files": [
+ "src/schema.generated.c"
+ ]
}
]
}
diff --git a/schema/core/grammar.js b/schema/core/grammar.js
new file mode 100644
index 0000000..f4bbede
--- /dev/null
+++ b/schema/core/grammar.js
@@ -0,0 +1,34 @@
+/**
+ * @see {@link Core Schema|https://yaml.org/spec/1.2/spec.html#schema/core}
+ */
+
+///
+
+module.exports = grammar({
+ name: "core_schema",
+
+ extras: _ => [],
+
+ rules: {
+ scalar: $ => choice($.null, $.bool, $.int, $.float),
+
+ null: _ => token(choice("~", "null", "Null", "NULL")),
+
+ bool: _ => token(choice("true", "True", "TRUE", "false", "False", "FALSE")),
+
+ int: _ => token(choice(
+ /[-+]?[0-9]+/, // base 10
+ /0o[0-7]+/, // base 8
+ /0x[0-9a-fA-F]+/, // base 12
+ )),
+
+ float: _ => token(choice(
+ /[-+]?(\.\d+|\d+(\.\d*)?)([eE][-+]?\d+)?/, // number
+ seq(
+ optional(choice("-", "+")),
+ choice(".inf", ".Inf", ".INF")
+ ), // infinity
+ choice(".nan", ".NaN", ".NAN"), // not a number
+ )),
+ },
+});
diff --git a/schema/core/package.json b/schema/core/package.json
new file mode 100644
index 0000000..90ca874
--- /dev/null
+++ b/schema/core/package.json
@@ -0,0 +1,6 @@
+{
+ "private": true,
+ "scripts": {
+ "build": "tree-sitter generate --no-bindings"
+ }
+}
diff --git a/schema/core/src/grammar.json b/schema/core/src/grammar.json
new file mode 100644
index 0000000..5b48018
--- /dev/null
+++ b/schema/core/src/grammar.json
@@ -0,0 +1,180 @@
+{
+ "name": "core_schema",
+ "rules": {
+ "scalar": {
+ "type": "CHOICE",
+ "members": [
+ {
+ "type": "SYMBOL",
+ "name": "null"
+ },
+ {
+ "type": "SYMBOL",
+ "name": "bool"
+ },
+ {
+ "type": "SYMBOL",
+ "name": "int"
+ },
+ {
+ "type": "SYMBOL",
+ "name": "float"
+ }
+ ]
+ },
+ "null": {
+ "type": "TOKEN",
+ "content": {
+ "type": "CHOICE",
+ "members": [
+ {
+ "type": "STRING",
+ "value": "~"
+ },
+ {
+ "type": "STRING",
+ "value": "null"
+ },
+ {
+ "type": "STRING",
+ "value": "Null"
+ },
+ {
+ "type": "STRING",
+ "value": "NULL"
+ }
+ ]
+ }
+ },
+ "bool": {
+ "type": "TOKEN",
+ "content": {
+ "type": "CHOICE",
+ "members": [
+ {
+ "type": "STRING",
+ "value": "true"
+ },
+ {
+ "type": "STRING",
+ "value": "True"
+ },
+ {
+ "type": "STRING",
+ "value": "TRUE"
+ },
+ {
+ "type": "STRING",
+ "value": "false"
+ },
+ {
+ "type": "STRING",
+ "value": "False"
+ },
+ {
+ "type": "STRING",
+ "value": "FALSE"
+ }
+ ]
+ }
+ },
+ "int": {
+ "type": "TOKEN",
+ "content": {
+ "type": "CHOICE",
+ "members": [
+ {
+ "type": "PATTERN",
+ "value": "[-+]?[0-9]+"
+ },
+ {
+ "type": "PATTERN",
+ "value": "0o[0-7]+"
+ },
+ {
+ "type": "PATTERN",
+ "value": "0x[0-9a-fA-F]+"
+ }
+ ]
+ }
+ },
+ "float": {
+ "type": "TOKEN",
+ "content": {
+ "type": "CHOICE",
+ "members": [
+ {
+ "type": "PATTERN",
+ "value": "[-+]?(\\.\\d+|\\d+(\\.\\d*)?)([eE][-+]?\\d+)?"
+ },
+ {
+ "type": "SEQ",
+ "members": [
+ {
+ "type": "CHOICE",
+ "members": [
+ {
+ "type": "CHOICE",
+ "members": [
+ {
+ "type": "STRING",
+ "value": "-"
+ },
+ {
+ "type": "STRING",
+ "value": "+"
+ }
+ ]
+ },
+ {
+ "type": "BLANK"
+ }
+ ]
+ },
+ {
+ "type": "CHOICE",
+ "members": [
+ {
+ "type": "STRING",
+ "value": ".inf"
+ },
+ {
+ "type": "STRING",
+ "value": ".Inf"
+ },
+ {
+ "type": "STRING",
+ "value": ".INF"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "type": "CHOICE",
+ "members": [
+ {
+ "type": "STRING",
+ "value": ".nan"
+ },
+ {
+ "type": "STRING",
+ "value": ".NaN"
+ },
+ {
+ "type": "STRING",
+ "value": ".NAN"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ },
+ "extras": [],
+ "conflicts": [],
+ "precedences": [],
+ "externals": [],
+ "inline": [],
+ "supertypes": []
+}
diff --git a/schema/core/src/node-types.json b/schema/core/src/node-types.json
new file mode 100644
index 0000000..29cca72
--- /dev/null
+++ b/schema/core/src/node-types.json
@@ -0,0 +1,45 @@
+[
+ {
+ "type": "scalar",
+ "named": true,
+ "fields": {},
+ "children": {
+ "multiple": false,
+ "required": true,
+ "types": [
+ {
+ "type": "bool",
+ "named": true
+ },
+ {
+ "type": "float",
+ "named": true
+ },
+ {
+ "type": "int",
+ "named": true
+ },
+ {
+ "type": "null",
+ "named": true
+ }
+ ]
+ }
+ },
+ {
+ "type": "bool",
+ "named": true
+ },
+ {
+ "type": "float",
+ "named": true
+ },
+ {
+ "type": "int",
+ "named": true
+ },
+ {
+ "type": "null",
+ "named": true
+ }
+]
\ No newline at end of file
diff --git a/schema/core/src/parser.c b/schema/core/src/parser.c
new file mode 100644
index 0000000..d60811e
--- /dev/null
+++ b/schema/core/src/parser.c
@@ -0,0 +1,360 @@
+#include "tree_sitter/parser.h"
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif
+
+#define LANGUAGE_VERSION 14
+#define STATE_COUNT 4
+#define LARGE_STATE_COUNT 2
+#define SYMBOL_COUNT 6
+#define ALIAS_COUNT 0
+#define TOKEN_COUNT 5
+#define EXTERNAL_TOKEN_COUNT 0
+#define FIELD_COUNT 0
+#define MAX_ALIAS_SEQUENCE_LENGTH 1
+#define PRODUCTION_ID_COUNT 1
+
+enum ts_symbol_identifiers {
+ sym_null = 1,
+ sym_bool = 2,
+ sym_int = 3,
+ sym_float = 4,
+ sym_scalar = 5,
+};
+
+static const char * const ts_symbol_names[] = {
+ [ts_builtin_sym_end] = "end",
+ [sym_null] = "null",
+ [sym_bool] = "bool",
+ [sym_int] = "int",
+ [sym_float] = "float",
+ [sym_scalar] = "scalar",
+};
+
+static const TSSymbol ts_symbol_map[] = {
+ [ts_builtin_sym_end] = ts_builtin_sym_end,
+ [sym_null] = sym_null,
+ [sym_bool] = sym_bool,
+ [sym_int] = sym_int,
+ [sym_float] = sym_float,
+ [sym_scalar] = sym_scalar,
+};
+
+static const TSSymbolMetadata ts_symbol_metadata[] = {
+ [ts_builtin_sym_end] = {
+ .visible = false,
+ .named = true,
+ },
+ [sym_null] = {
+ .visible = true,
+ .named = true,
+ },
+ [sym_bool] = {
+ .visible = true,
+ .named = true,
+ },
+ [sym_int] = {
+ .visible = true,
+ .named = true,
+ },
+ [sym_float] = {
+ .visible = true,
+ .named = true,
+ },
+ [sym_scalar] = {
+ .visible = true,
+ .named = true,
+ },
+};
+
+static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {
+ [0] = {0},
+};
+
+static const uint16_t ts_non_terminal_alias_map[] = {
+ 0,
+};
+
+static const TSStateId ts_primary_state_ids[STATE_COUNT] = {
+ [0] = 0,
+ [1] = 1,
+ [2] = 2,
+ [3] = 3,
+};
+
+static bool ts_lex(TSLexer *lexer, TSStateId state) {
+ START_LEXER();
+ eof = lexer->eof(lexer);
+ switch (state) {
+ case 0:
+ if (eof) ADVANCE(34);
+ if (lookahead == '+' ||
+ lookahead == '-') ADVANCE(1);
+ if (lookahead == '.') ADVANCE(6);
+ if (lookahead == '0') ADVANCE(37);
+ if (lookahead == 'F') ADVANCE(2);
+ if (lookahead == 'N') ADVANCE(16);
+ if (lookahead == 'T') ADVANCE(13);
+ if (lookahead == 'f') ADVANCE(17);
+ if (lookahead == 'n') ADVANCE(29);
+ if (lookahead == 't') ADVANCE(26);
+ if (lookahead == '~') ADVANCE(35);
+ if (('1' <= lookahead && lookahead <= '9')) ADVANCE(38);
+ END_STATE();
+ case 1:
+ if (lookahead == '.') ADVANCE(7);
+ if (('0' <= lookahead && lookahead <= '9')) ADVANCE(38);
+ END_STATE();
+ case 2:
+ if (lookahead == 'A') ADVANCE(9);
+ if (lookahead == 'a') ADVANCE(22);
+ END_STATE();
+ case 3:
+ if (lookahead == 'A') ADVANCE(12);
+ if (lookahead == 'a') ADVANCE(12);
+ END_STATE();
+ case 4:
+ if (lookahead == 'E') ADVANCE(36);
+ END_STATE();
+ case 5:
+ if (lookahead == 'F') ADVANCE(41);
+ END_STATE();
+ case 6:
+ if (lookahead == 'I') ADVANCE(11);
+ if (lookahead == 'N') ADVANCE(3);
+ if (lookahead == 'i') ADVANCE(24);
+ if (lookahead == 'n') ADVANCE(18);
+ if (('0' <= lookahead && lookahead <= '9')) ADVANCE(42);
+ END_STATE();
+ case 7:
+ if (lookahead == 'I') ADVANCE(11);
+ if (lookahead == 'i') ADVANCE(24);
+ if (('0' <= lookahead && lookahead <= '9')) ADVANCE(42);
+ END_STATE();
+ case 8:
+ if (lookahead == 'L') ADVANCE(35);
+ END_STATE();
+ case 9:
+ if (lookahead == 'L') ADVANCE(14);
+ END_STATE();
+ case 10:
+ if (lookahead == 'L') ADVANCE(8);
+ END_STATE();
+ case 11:
+ if (lookahead == 'N') ADVANCE(5);
+ if (lookahead == 'n') ADVANCE(20);
+ END_STATE();
+ case 12:
+ if (lookahead == 'N') ADVANCE(41);
+ END_STATE();
+ case 13:
+ if (lookahead == 'R') ADVANCE(15);
+ if (lookahead == 'r') ADVANCE(28);
+ END_STATE();
+ case 14:
+ if (lookahead == 'S') ADVANCE(4);
+ END_STATE();
+ case 15:
+ if (lookahead == 'U') ADVANCE(4);
+ END_STATE();
+ case 16:
+ if (lookahead == 'U') ADVANCE(10);
+ if (lookahead == 'u') ADVANCE(23);
+ END_STATE();
+ case 17:
+ if (lookahead == 'a') ADVANCE(22);
+ END_STATE();
+ case 18:
+ if (lookahead == 'a') ADVANCE(25);
+ END_STATE();
+ case 19:
+ if (lookahead == 'e') ADVANCE(36);
+ END_STATE();
+ case 20:
+ if (lookahead == 'f') ADVANCE(41);
+ END_STATE();
+ case 21:
+ if (lookahead == 'l') ADVANCE(35);
+ END_STATE();
+ case 22:
+ if (lookahead == 'l') ADVANCE(27);
+ END_STATE();
+ case 23:
+ if (lookahead == 'l') ADVANCE(21);
+ END_STATE();
+ case 24:
+ if (lookahead == 'n') ADVANCE(20);
+ END_STATE();
+ case 25:
+ if (lookahead == 'n') ADVANCE(41);
+ END_STATE();
+ case 26:
+ if (lookahead == 'r') ADVANCE(28);
+ END_STATE();
+ case 27:
+ if (lookahead == 's') ADVANCE(19);
+ END_STATE();
+ case 28:
+ if (lookahead == 'u') ADVANCE(19);
+ END_STATE();
+ case 29:
+ if (lookahead == 'u') ADVANCE(23);
+ END_STATE();
+ case 30:
+ if (lookahead == '+' ||
+ lookahead == '-') ADVANCE(32);
+ if (('0' <= lookahead && lookahead <= '9')) ADVANCE(43);
+ END_STATE();
+ case 31:
+ if (('0' <= lookahead && lookahead <= '7')) ADVANCE(39);
+ END_STATE();
+ case 32:
+ if (('0' <= lookahead && lookahead <= '9')) ADVANCE(43);
+ END_STATE();
+ case 33:
+ if (('0' <= lookahead && lookahead <= '9') ||
+ ('A' <= lookahead && lookahead <= 'F') ||
+ ('a' <= lookahead && lookahead <= 'f')) ADVANCE(40);
+ END_STATE();
+ case 34:
+ ACCEPT_TOKEN(ts_builtin_sym_end);
+ END_STATE();
+ case 35:
+ ACCEPT_TOKEN(sym_null);
+ END_STATE();
+ case 36:
+ ACCEPT_TOKEN(sym_bool);
+ END_STATE();
+ case 37:
+ ACCEPT_TOKEN(sym_int);
+ if (lookahead == '.') ADVANCE(42);
+ if (lookahead == 'o') ADVANCE(31);
+ if (lookahead == 'x') ADVANCE(33);
+ if (lookahead == 'E' ||
+ lookahead == 'e') ADVANCE(30);
+ if (('0' <= lookahead && lookahead <= '9')) ADVANCE(38);
+ END_STATE();
+ case 38:
+ ACCEPT_TOKEN(sym_int);
+ if (lookahead == '.') ADVANCE(42);
+ if (lookahead == 'E' ||
+ lookahead == 'e') ADVANCE(30);
+ if (('0' <= lookahead && lookahead <= '9')) ADVANCE(38);
+ END_STATE();
+ case 39:
+ ACCEPT_TOKEN(sym_int);
+ if (('0' <= lookahead && lookahead <= '7')) ADVANCE(39);
+ END_STATE();
+ case 40:
+ ACCEPT_TOKEN(sym_int);
+ if (('0' <= lookahead && lookahead <= '9') ||
+ ('A' <= lookahead && lookahead <= 'F') ||
+ ('a' <= lookahead && lookahead <= 'f')) ADVANCE(40);
+ END_STATE();
+ case 41:
+ ACCEPT_TOKEN(sym_float);
+ END_STATE();
+ case 42:
+ ACCEPT_TOKEN(sym_float);
+ if (lookahead == 'E' ||
+ lookahead == 'e') ADVANCE(30);
+ if (('0' <= lookahead && lookahead <= '9')) ADVANCE(42);
+ END_STATE();
+ case 43:
+ ACCEPT_TOKEN(sym_float);
+ if (('0' <= lookahead && lookahead <= '9')) ADVANCE(43);
+ END_STATE();
+ default:
+ return false;
+ }
+}
+
+static const TSLexMode ts_lex_modes[STATE_COUNT] = {
+ [0] = {.lex_state = 0},
+ [1] = {.lex_state = 0},
+ [2] = {.lex_state = 0},
+ [3] = {.lex_state = 0},
+};
+
+static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
+ [0] = {
+ [ts_builtin_sym_end] = ACTIONS(1),
+ [sym_null] = ACTIONS(1),
+ [sym_bool] = ACTIONS(1),
+ [sym_int] = ACTIONS(1),
+ [sym_float] = ACTIONS(1),
+ },
+ [1] = {
+ [sym_scalar] = STATE(3),
+ [sym_null] = ACTIONS(3),
+ [sym_bool] = ACTIONS(3),
+ [sym_int] = ACTIONS(5),
+ [sym_float] = ACTIONS(5),
+ },
+};
+
+static const uint16_t ts_small_parse_table[] = {
+ [0] = 1,
+ ACTIONS(7), 1,
+ ts_builtin_sym_end,
+ [4] = 1,
+ ACTIONS(9), 1,
+ ts_builtin_sym_end,
+};
+
+static const uint32_t ts_small_parse_table_map[] = {
+ [SMALL_STATE(2)] = 0,
+ [SMALL_STATE(3)] = 4,
+};
+
+static const TSParseActionEntry ts_parse_actions[] = {
+ [0] = {.entry = {.count = 0, .reusable = false}},
+ [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(),
+ [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
+ [5] = {.entry = {.count = 1, .reusable = false}}, SHIFT(2),
+ [7] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_scalar, 1),
+ [9] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(),
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef TREE_SITTER_HIDE_SYMBOLS
+#define TS_PUBLIC
+#elif defined(_WIN32)
+#define TS_PUBLIC __declspec(dllexport)
+#else
+#define TS_PUBLIC __attribute__((visibility("default")))
+#endif
+
+TS_PUBLIC const TSLanguage *tree_sitter_core_schema() {
+ static const TSLanguage language = {
+ .version = LANGUAGE_VERSION,
+ .symbol_count = SYMBOL_COUNT,
+ .alias_count = ALIAS_COUNT,
+ .token_count = TOKEN_COUNT,
+ .external_token_count = EXTERNAL_TOKEN_COUNT,
+ .state_count = STATE_COUNT,
+ .large_state_count = LARGE_STATE_COUNT,
+ .production_id_count = PRODUCTION_ID_COUNT,
+ .field_count = FIELD_COUNT,
+ .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,
+ .parse_table = &ts_parse_table[0][0],
+ .small_parse_table = ts_small_parse_table,
+ .small_parse_table_map = ts_small_parse_table_map,
+ .parse_actions = ts_parse_actions,
+ .symbol_names = ts_symbol_names,
+ .symbol_metadata = ts_symbol_metadata,
+ .public_symbol_map = ts_symbol_map,
+ .alias_map = ts_non_terminal_alias_map,
+ .alias_sequences = &ts_alias_sequences[0][0],
+ .lex_modes = ts_lex_modes,
+ .lex_fn = ts_lex,
+ .primary_state_ids = ts_primary_state_ids,
+ };
+ return &language;
+}
+#ifdef __cplusplus
+}
+#endif
diff --git a/schema/core/src/tree_sitter/alloc.h b/schema/core/src/tree_sitter/alloc.h
new file mode 100644
index 0000000..1f4466d
--- /dev/null
+++ b/schema/core/src/tree_sitter/alloc.h
@@ -0,0 +1,54 @@
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+#include
+#include
+
+// Allow clients to override allocation functions
+#ifdef TREE_SITTER_REUSE_ALLOCATOR
+
+extern void *(*ts_current_malloc)(size_t);
+extern void *(*ts_current_calloc)(size_t, size_t);
+extern void *(*ts_current_realloc)(void *, size_t);
+extern void (*ts_current_free)(void *);
+
+#ifndef ts_malloc
+#define ts_malloc ts_current_malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc ts_current_calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc ts_current_realloc
+#endif
+#ifndef ts_free
+#define ts_free ts_current_free
+#endif
+
+#else
+
+#ifndef ts_malloc
+#define ts_malloc malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc realloc
+#endif
+#ifndef ts_free
+#define ts_free free
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_ALLOC_H_
diff --git a/schema/core/src/tree_sitter/array.h b/schema/core/src/tree_sitter/array.h
new file mode 100644
index 0000000..15a3b23
--- /dev/null
+++ b/schema/core/src/tree_sitter/array.h
@@ -0,0 +1,290 @@
+#ifndef TREE_SITTER_ARRAY_H_
+#define TREE_SITTER_ARRAY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./alloc.h"
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#define Array(T) \
+ struct { \
+ T *contents; \
+ uint32_t size; \
+ uint32_t capacity; \
+ }
+
+/// Initialize an array.
+#define array_init(self) \
+ ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
+
+/// Create an empty array.
+#define array_new() \
+ { NULL, 0, 0 }
+
+/// Get a pointer to the element at a given `index` in the array.
+#define array_get(self, _index) \
+ (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
+
+/// Get a pointer to the first element in the array.
+#define array_front(self) array_get(self, 0)
+
+/// Get a pointer to the last element in the array.
+#define array_back(self) array_get(self, (self)->size - 1)
+
+/// Clear the array, setting its size to zero. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_clear(self) ((self)->size = 0)
+
+/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
+/// less than the array's current capacity, this function has no effect.
+#define array_reserve(self, new_capacity) \
+ _array__reserve((Array *)(self), array_elem_size(self), new_capacity)
+
+/// Free any memory allocated for this array. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_delete(self) _array__delete((Array *)(self))
+
+/// Push a new `element` onto the end of the array.
+#define array_push(self, element) \
+ (_array__grow((Array *)(self), 1, array_elem_size(self)), \
+ (self)->contents[(self)->size++] = (element))
+
+/// Increase the array's size by `count` elements.
+/// New elements are zero-initialized.
+#define array_grow_by(self, count) \
+ do { \
+ if ((count) == 0) break; \
+ _array__grow((Array *)(self), count, array_elem_size(self)); \
+ memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
+ (self)->size += (count); \
+ } while (0)
+
+/// Append all elements from one array to the end of another.
+#define array_push_all(self, other) \
+ array_extend((self), (other)->size, (other)->contents)
+
+/// Append `count` elements to the end of the array, reading their values from the
+/// `contents` pointer.
+#define array_extend(self, count, contents) \
+ _array__splice( \
+ (Array *)(self), array_elem_size(self), (self)->size, \
+ 0, count, contents \
+ )
+
+/// Remove `old_count` elements from the array starting at the given `index`. At
+/// the same index, insert `new_count` new elements, reading their values from the
+/// `new_contents` pointer.
+#define array_splice(self, _index, old_count, new_count, new_contents) \
+ _array__splice( \
+ (Array *)(self), array_elem_size(self), _index, \
+ old_count, new_count, new_contents \
+ )
+
+/// Insert one `element` into the array at the given `index`.
+#define array_insert(self, _index, element) \
+ _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
+
+/// Remove one element from the array at the given `index`.
+#define array_erase(self, _index) \
+ _array__erase((Array *)(self), array_elem_size(self), _index)
+
+/// Pop the last element off the array, returning the element by value.
+#define array_pop(self) ((self)->contents[--(self)->size])
+
+/// Assign the contents of one array to another, reallocating if necessary.
+#define array_assign(self, other) \
+ _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
+
+/// Swap one array with another
+#define array_swap(self, other) \
+ _array__swap((Array *)(self), (Array *)(other))
+
+/// Get the size of the array contents
+#define array_elem_size(self) (sizeof *(self)->contents)
+
+/// Search a sorted array for a given `needle` value, using the given `compare`
+/// callback to determine the order.
+///
+/// If an existing element is found to be equal to `needle`, then the `index`
+/// out-parameter is set to the existing value's index, and the `exists`
+/// out-parameter is set to true. Otherwise, `index` is set to an index where
+/// `needle` should be inserted in order to preserve the sorting, and `exists`
+/// is set to false.
+#define array_search_sorted_with(self, compare, needle, _index, _exists) \
+ _array__search_sorted(self, 0, compare, , needle, _index, _exists)
+
+/// Search a sorted array for a given `needle` value, using integer comparisons
+/// of a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_with`.
+#define array_search_sorted_by(self, field, needle, _index, _exists) \
+ _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
+
+/// Insert a given `value` into a sorted array, using the given `compare`
+/// callback to determine the order.
+#define array_insert_sorted_with(self, compare, value) \
+ do { \
+ unsigned _index, _exists; \
+ array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
+ if (!_exists) array_insert(self, _index, value); \
+ } while (0)
+
+/// Insert a given `value` into a sorted array, using integer comparisons of
+/// a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_by`.
+#define array_insert_sorted_by(self, field, value) \
+ do { \
+ unsigned _index, _exists; \
+ array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
+ if (!_exists) array_insert(self, _index, value); \
+ } while (0)
+
+// Private
+
+typedef Array(void) Array;
+
+/// This is not what you're looking for, see `array_delete`.
+static inline void _array__delete(Array *self) {
+ if (self->contents) {
+ ts_free(self->contents);
+ self->contents = NULL;
+ self->size = 0;
+ self->capacity = 0;
+ }
+}
+
+/// This is not what you're looking for, see `array_erase`.
+static inline void _array__erase(Array *self, size_t element_size,
+ uint32_t index) {
+ assert(index < self->size);
+ char *contents = (char *)self->contents;
+ memmove(contents + index * element_size, contents + (index + 1) * element_size,
+ (self->size - index - 1) * element_size);
+ self->size--;
+}
+
+/// This is not what you're looking for, see `array_reserve`.
+static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
+ if (new_capacity > self->capacity) {
+ if (self->contents) {
+ self->contents = ts_realloc(self->contents, new_capacity * element_size);
+ } else {
+ self->contents = ts_malloc(new_capacity * element_size);
+ }
+ self->capacity = new_capacity;
+ }
+}
+
+/// This is not what you're looking for, see `array_assign`.
+static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
+ _array__reserve(self, element_size, other->size);
+ self->size = other->size;
+ memcpy(self->contents, other->contents, self->size * element_size);
+}
+
+/// This is not what you're looking for, see `array_swap`.
+static inline void _array__swap(Array *self, Array *other) {
+ Array swap = *other;
+ *other = *self;
+ *self = swap;
+}
+
+/// This is not what you're looking for, see `array_push` or `array_grow_by`.
+static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
+ uint32_t new_size = self->size + count;
+ if (new_size > self->capacity) {
+ uint32_t new_capacity = self->capacity * 2;
+ if (new_capacity < 8) new_capacity = 8;
+ if (new_capacity < new_size) new_capacity = new_size;
+ _array__reserve(self, element_size, new_capacity);
+ }
+}
+
+/// This is not what you're looking for, see `array_splice`.
+static inline void _array__splice(Array *self, size_t element_size,
+ uint32_t index, uint32_t old_count,
+ uint32_t new_count, const void *elements) {
+ uint32_t new_size = self->size + new_count - old_count;
+ uint32_t old_end = index + old_count;
+ uint32_t new_end = index + new_count;
+ assert(old_end <= self->size);
+
+ _array__reserve(self, element_size, new_size);
+
+ char *contents = (char *)self->contents;
+ if (self->size > old_end) {
+ memmove(
+ contents + new_end * element_size,
+ contents + old_end * element_size,
+ (self->size - old_end) * element_size
+ );
+ }
+ if (new_count > 0) {
+ if (elements) {
+ memcpy(
+ (contents + index * element_size),
+ elements,
+ new_count * element_size
+ );
+ } else {
+ memset(
+ (contents + index * element_size),
+ 0,
+ new_count * element_size
+ );
+ }
+ }
+ self->size += new_count - old_count;
+}
+
+/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
+/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
+#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
+ do { \
+ *(_index) = start; \
+ *(_exists) = false; \
+ uint32_t size = (self)->size - *(_index); \
+ if (size == 0) break; \
+ int comparison; \
+ while (size > 1) { \
+ uint32_t half_size = size / 2; \
+ uint32_t mid_index = *(_index) + half_size; \
+ comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
+ if (comparison <= 0) *(_index) = mid_index; \
+ size -= half_size; \
+ } \
+ comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
+ if (comparison == 0) *(_exists) = true; \
+ else if (comparison < 0) *(_index) += 1; \
+ } while (0)
+
+/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
+/// parameter by reference in order to work with the generic sorting function above.
+#define _compare_int(a, b) ((int)*(a) - (int)(b))
+
+#ifdef _MSC_VER
+#pragma warning(default : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_ARRAY_H_
diff --git a/schema/core/src/tree_sitter/parser.h b/schema/core/src/tree_sitter/parser.h
new file mode 100644
index 0000000..17b4fde
--- /dev/null
+++ b/schema/core/src/tree_sitter/parser.h
@@ -0,0 +1,230 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+#include
+#include
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSStateId;
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+ TSFieldId field_id;
+ uint8_t child_index;
+ bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+ uint16_t index;
+ uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+ bool visible;
+ bool named;
+ bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+ int32_t lookahead;
+ TSSymbol result_symbol;
+ void (*advance)(TSLexer *, bool);
+ void (*mark_end)(TSLexer *);
+ uint32_t (*get_column)(TSLexer *);
+ bool (*is_at_included_range_start)(const TSLexer *);
+ bool (*eof)(const TSLexer *);
+};
+
+typedef enum {
+ TSParseActionTypeShift,
+ TSParseActionTypeReduce,
+ TSParseActionTypeAccept,
+ TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+ struct {
+ uint8_t type;
+ TSStateId state;
+ bool extra;
+ bool repetition;
+ } shift;
+ struct {
+ uint8_t type;
+ uint8_t child_count;
+ TSSymbol symbol;
+ int16_t dynamic_precedence;
+ uint16_t production_id;
+ } reduce;
+ uint8_t type;
+} TSParseAction;
+
+typedef struct {
+ uint16_t lex_state;
+ uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+ TSParseAction action;
+ struct {
+ uint8_t count;
+ bool reusable;
+ } entry;
+} TSParseActionEntry;
+
+struct TSLanguage {
+ uint32_t version;
+ uint32_t symbol_count;
+ uint32_t alias_count;
+ uint32_t token_count;
+ uint32_t external_token_count;
+ uint32_t state_count;
+ uint32_t large_state_count;
+ uint32_t production_id_count;
+ uint32_t field_count;
+ uint16_t max_alias_sequence_length;
+ const uint16_t *parse_table;
+ const uint16_t *small_parse_table;
+ const uint32_t *small_parse_table_map;
+ const TSParseActionEntry *parse_actions;
+ const char * const *symbol_names;
+ const char * const *field_names;
+ const TSFieldMapSlice *field_map_slices;
+ const TSFieldMapEntry *field_map_entries;
+ const TSSymbolMetadata *symbol_metadata;
+ const TSSymbol *public_symbol_map;
+ const uint16_t *alias_map;
+ const TSSymbol *alias_sequences;
+ const TSLexMode *lex_modes;
+ bool (*lex_fn)(TSLexer *, TSStateId);
+ bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+ TSSymbol keyword_capture_token;
+ struct {
+ const bool *states;
+ const TSSymbol *symbol_map;
+ void *(*create)(void);
+ void (*destroy)(void *);
+ bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+ unsigned (*serialize)(void *, char *);
+ void (*deserialize)(void *, const char *, unsigned);
+ } external_scanner;
+ const TSStateId *primary_state_ids;
+};
+
+/*
+ * Lexer Macros
+ */
+
+#ifdef _MSC_VER
+#define UNUSED __pragma(warning(suppress : 4101))
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#define START_LEXER() \
+ bool result = false; \
+ bool skip = false; \
+ UNUSED \
+ bool eof = false; \
+ int32_t lookahead; \
+ goto start; \
+ next_state: \
+ lexer->advance(lexer, skip); \
+ start: \
+ skip = false; \
+ lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+ { \
+ state = state_value; \
+ goto next_state; \
+ }
+
+#define SKIP(state_value) \
+ { \
+ skip = true; \
+ state = state_value; \
+ goto next_state; \
+ }
+
+#define ACCEPT_TOKEN(symbol_value) \
+ result = true; \
+ lexer->result_symbol = symbol_value; \
+ lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ * Parse Table Macros
+ */
+
+#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value) \
+ {{ \
+ .shift = { \
+ .type = TSParseActionTypeShift, \
+ .state = (state_value) \
+ } \
+ }}
+
+#define SHIFT_REPEAT(state_value) \
+ {{ \
+ .shift = { \
+ .type = TSParseActionTypeShift, \
+ .state = (state_value), \
+ .repetition = true \
+ } \
+ }}
+
+#define SHIFT_EXTRA() \
+ {{ \
+ .shift = { \
+ .type = TSParseActionTypeShift, \
+ .extra = true \
+ } \
+ }}
+
+#define REDUCE(symbol_val, child_count_val, ...) \
+ {{ \
+ .reduce = { \
+ .type = TSParseActionTypeReduce, \
+ .symbol = symbol_val, \
+ .child_count = child_count_val, \
+ __VA_ARGS__ \
+ }, \
+ }}
+
+#define RECOVER() \
+ {{ \
+ .type = TSParseActionTypeRecover \
+ }}
+
+#define ACCEPT_INPUT() \
+ {{ \
+ .type = TSParseActionTypeAccept \
+ }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_PARSER_H_
diff --git a/schema/update-schema.js b/schema/update-schema.js
new file mode 100755
index 0000000..22fa805
--- /dev/null
+++ b/schema/update-schema.js
@@ -0,0 +1,89 @@
+#!/usr/bin/env node
+
+// @ts-nocheck
+
+const { writeFileSync } = require("fs");
+const { readFile } = require("fs/promises");
+const { join } = require("path");
+
+readFile(join(__dirname, process.argv[2] ?? "core", "src", "parser.c"), "utf8").then(input => {
+ const cases = extractCases(input);
+ const enums = ["RS_STR"];
+ const content = "switch (sch_stt) " + block([
+ "case SCH_STT_FRZ:\n break;",
+ cases
+ .map(([key, { content }]) => `${(key === "default" ? "default:" : `case ${key}:`)}\n${indent(content)}`)
+ .join("\n END_STATE();\n")
+ .replace(/ADVANCE\((\d+)\);/g, (_, state) => {
+ const stateCase = cases.find(([key]) => key === state);
+ if (stateCase) {
+ const [, { acceptToken }] = stateCase;
+ if (acceptToken) {
+ return `{${acceptToken} return ${state};}`;
+ }
+ }
+ return `{*rlt_sch = RS_STR; return ${state};}`;
+ })
+ .replace("ACCEPT_TOKEN(ts_builtin_sym_end);", "abort();")
+ .replace(/ACCEPT_TOKEN\((\w+)\);/g, (_, name) => {
+ const newName = "RS_" + convertName(name);
+ if (!enums.includes(newName)) {
+ enums.push(newName);
+ }
+ return `*rlt_sch = ${newName};`;
+ })
+ .replace(/END_STATE\(\);/g, `break;`)
+ .replace("return false;", '*rlt_sch = RS_STR;\n return SCH_STT_FRZ;')
+ .replace(/lookahead/g, "cur_chr"),
+ ]);
+ writeFileSync(
+ join(__dirname, "..", "src", "schema.generated.c"),
+ [
+ "#include ",
+ "#define SCH_STT_FRZ -1",
+ `typedef enum ${block(enums.map((k) => `${k},`))} ResultSchema;`,
+ `static int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) ${block([
+ content,
+ `if (cur_chr != '\\r' && cur_chr != '\\n' && cur_chr != ' ' && cur_chr != 0) *rlt_sch = RS_STR;`,
+ "return SCH_STT_FRZ;",
+ ])}`,
+ ].join("\n\n") + "\n",
+ );
+});
+
+function extractCases(input) {
+ const MAIN_SIGNATURE = "static bool ts_lex(TSLexer *lexer, TSStateId state) {";
+ const SWITCH_CASE = "switch (state) {\n";
+ const startIndex = input.indexOf(SWITCH_CASE, input.indexOf(MAIN_SIGNATURE)) + SWITCH_CASE.length;
+ const endIndex = input.indexOf("}\n}", startIndex);
+ const content = input.slice(startIndex, endIndex).replace(/^\s*if \(eof\).+\n/mg, "").trimEnd();
+ return dedent(dedent(content)).split("END_STATE();").map(text => {
+ const index = text.indexOf(":\n");
+ const key = text.slice(0, index).trim().replace(/^case /, "");
+ const content = dedent(text.slice(index + 2)).trim();
+ const matchAcceptToken = content.match(/^ACCEPT_TOKEN\(\w+\);/);
+ const acceptToken = matchAcceptToken && matchAcceptToken[0];
+ const hasAcceptTokenOnly = acceptToken && acceptToken.length === content.length;
+ return [key, { content, acceptToken, hasAcceptTokenOnly }];
+ });
+}
+
+function convertName(name) {
+ return name.replace("sym_", "").toUpperCase();
+}
+
+function block(contents) {
+ return `{\n${indent(contents)}\n}`;
+}
+
+function lines(contents) {
+ return [].concat(contents).join("\n").split("\n");
+}
+
+function indent(contents) {
+ return lines(contents).map(x => " ".repeat(2) + x).join("\n");
+}
+
+function dedent(contents) {
+ return lines(contents).map(x => x.replace(/^ /mg, "")).join("\n");
+}
diff --git a/scripts/update-schema.js b/scripts/update-schema.js
old mode 100644
new mode 100755
index 124c6b2..0689349
--- a/scripts/update-schema.js
+++ b/scripts/update-schema.js
@@ -1,14 +1,15 @@
+#!/usr/bin/env node
+
+// @ts-nocheck
+
const fs = require("fs");
const path = require("path");
-const getStdin = require("get-stdin");
-const STATE_FREEZE = -1;
-
-getStdin().then(stdin => {
- const cases = extractCases(stdin);
- const enums = ["RS_STR"];
+fs.promises.readFile("./schema/src/parser.c", "utf8").then(input => {
+ const cases = extractCases(input);
+ const enums = [];
const content = "switch (sch_stt) " + block([
- `case ${STATE_FREEZE}:\n break;`,
+ "case SCH_STT_FRZ:\n break;",
cases
.map(([key, { content }]) => `${(key === "default" ? "default:" : `case ${key}:`)}\n${indent(content)}`)
.join("\n END_STATE();\n")
@@ -22,41 +23,39 @@ getStdin().then(stdin => {
}
return `{*rlt_sch = RS_STR; return ${state};}`;
})
- .replace("ACCEPT_TOKEN(ts_builtin_sym_end);", "assert(false);")
+ .replace("ACCEPT_TOKEN(ts_builtin_sym_end);", "abort();")
.replace(/ACCEPT_TOKEN\((\w+)\);/g, (_, name) => {
- const newName = "RS_" + name.replace("sym_", "").toUpperCase();
+ const newName = "RS_" + convertName(name);
if (!enums.includes(newName)) {
enums.push(newName);
}
return `*rlt_sch = ${newName};`;
})
.replace(/END_STATE\(\);/g, `break;`)
- .replace("return false;", `*rlt_sch = RS_STR;\n return ${STATE_FREEZE};`)
+ .replace("return false;", '*rlt_sch = RS_STR;\n return SCH_STT_FRZ;')
.replace(/lookahead/g, "cur_chr"),
]);
fs.writeFileSync(
- path.resolve(__dirname, "../src/schema.generated.cc"),
+ path.resolve(__dirname, "../src/schema.generated.c"),
[
- `#include `,
- `namespace tree_sitter_yaml {`,
- `const int8_t SCH_STT_FRZ = ${STATE_FREEZE};`,
- `enum ResultSchema ${block(enums.map((k) => `${k},`))};`,
+ "#include ",
+ "#define SCH_STT_FRZ -1",
+ `typedef enum ${block(enums.map((k) => `${k},`))} ResultSchema;`,
`int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) ${block([
content,
`if (cur_chr != '\\r' && cur_chr != '\\n' && cur_chr != ' ' && cur_chr != 0) *rlt_sch = RS_STR;`,
- `return ${STATE_FREEZE};`,
+ "return SCH_STT_FRZ;",
])}`,
- `}`,
].join("\n\n"),
);
});
-function extractCases(stdin) {
+function extractCases(input) {
const MAIN_SIGNATURE = "static bool ts_lex(TSLexer *lexer, TSStateId state) {";
const SWITCH_CASE = "switch (state) {\n";
- const startIndex = stdin.indexOf(SWITCH_CASE, stdin.indexOf(MAIN_SIGNATURE)) + SWITCH_CASE.length;
- const endIndex = stdin.indexOf("}\n}", startIndex);
- const content = stdin.slice(startIndex, endIndex).replace(/^\s*if \(eof\).+\n/mg, "").trimEnd();
+ const startIndex = input.indexOf(SWITCH_CASE, input.indexOf(MAIN_SIGNATURE)) + SWITCH_CASE.length;
+ const endIndex = input.indexOf("}\n}", startIndex);
+ const content = input.slice(startIndex, endIndex).replace(/^\s*if \(eof\).+\n/mg, "").trimEnd();
return dedent(dedent(content)).split("END_STATE();").map(text => {
const index = text.indexOf(":\n");
const key = text.slice(0, index).trim().replace(/^case /, "");
@@ -68,6 +67,19 @@ function extractCases(stdin) {
});
}
+function convertName(name) {
+ return {
+ anon_sym_: "NULL",
+ sym__base_10: "INT",
+ sym__base_8: "INT",
+ sym__base_16: "INT",
+ sym__number: "FLOAT",
+ sym__infinity: "FLOAT",
+ sym__not_a_number: "FLOAT",
+ aux_sym_str_token1: "STR",
+ }[name] || name.replace("sym_", "").toUpperCase();
+}
+
function block(contents) {
return `{\n${indent(contents)}\n}`;
}
diff --git a/src/scanner.c b/src/scanner.c
index b037dce..b7d41f2 100644
--- a/src/scanner.c
+++ b/src/scanner.c
@@ -2,11 +2,10 @@
#include "tree_sitter/parser.h"
#include "./schema.generated.c"
-#include
// clang-format off
-typedef enum {
+typedef enum {
END_OF_FILE,
S_DIR_YML_BGN, R_DIR_YML_VER,
@@ -118,10 +117,10 @@ typedef enum {
}
#define SGL_PLN_SYM(POS, CTX) \
- (scanner->rlt_sch == RS_NUL ? POS##_SGL_PLN_NUL_##CTX \
- : scanner->rlt_sch == RS_BOL ? POS##_SGL_PLN_BOL_##CTX \
+ (scanner->rlt_sch == RS_NULL ? POS##_SGL_PLN_NUL_##CTX \
+ : scanner->rlt_sch == RS_BOOL ? POS##_SGL_PLN_BOL_##CTX \
: scanner->rlt_sch == RS_INT ? POS##_SGL_PLN_INT_##CTX \
- : scanner->rlt_sch == RS_FLT ? POS##_SGL_PLN_FLT_##CTX \
+ : scanner->rlt_sch == RS_FLOAT ? POS##_SGL_PLN_FLT_##CTX \
: POS##_SGL_PLN_STR_##CTX)
typedef struct {
@@ -549,6 +548,7 @@ static bool scn_dqt_esc_seq(Scanner *scanner, TSLexer *lexer, TSSymbol result_sy
case 'v':
case 'r':
case 'e':
+ case 'f':
case ' ':
case '"':
case '/':
diff --git a/src/schema.generated.c b/src/schema.generated.c
index 2b77b09..1e7e318 100644
--- a/src/schema.generated.c
+++ b/src/schema.generated.c
@@ -1,22 +1,22 @@
-#include
-#include
-#include
+#include
-const int8_t SCH_STT_FRZ = -1;
+#define SCH_STT_FRZ -1
typedef enum {
RS_STR,
RS_INT,
- RS_NUL,
- RS_BOL,
- RS_FLT,
+ RS_NULL,
+ RS_BOOL,
+ RS_FLOAT,
} ResultSchema;
-int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
+static int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
switch (sch_stt) {
- case -1:
+ case SCH_STT_FRZ:
break;
case 0:
+ if (cur_chr == '+' ||
+ cur_chr == '-') {*rlt_sch = RS_STR; return 1;}
if (cur_chr == '.') {*rlt_sch = RS_STR; return 6;}
if (cur_chr == '0') {*rlt_sch = RS_INT; return 37;}
if (cur_chr == 'F') {*rlt_sch = RS_STR; return 2;}
@@ -25,9 +25,7 @@ int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
if (cur_chr == 'f') {*rlt_sch = RS_STR; return 17;}
if (cur_chr == 'n') {*rlt_sch = RS_STR; return 29;}
if (cur_chr == 't') {*rlt_sch = RS_STR; return 26;}
- if (cur_chr == '~') {*rlt_sch = RS_NUL; return 35;}
- if (cur_chr == '+' ||
- cur_chr == '-') {*rlt_sch = RS_STR; return 1;}
+ if (cur_chr == '~') {*rlt_sch = RS_NULL; return 35;}
if (('1' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_INT; return 38;}
break;
case 1:
@@ -43,25 +41,25 @@ int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
if (cur_chr == 'a') {*rlt_sch = RS_STR; return 12;}
break;
case 4:
- if (cur_chr == 'E') {*rlt_sch = RS_BOL; return 36;}
+ if (cur_chr == 'E') {*rlt_sch = RS_BOOL; return 36;}
break;
case 5:
- if (cur_chr == 'F') {*rlt_sch = RS_FLT; return 41;}
+ if (cur_chr == 'F') {*rlt_sch = RS_FLOAT; return 41;}
break;
case 6:
if (cur_chr == 'I') {*rlt_sch = RS_STR; return 11;}
if (cur_chr == 'N') {*rlt_sch = RS_STR; return 3;}
if (cur_chr == 'i') {*rlt_sch = RS_STR; return 24;}
if (cur_chr == 'n') {*rlt_sch = RS_STR; return 18;}
- if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLT; return 42;}
+ if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLOAT; return 42;}
break;
case 7:
if (cur_chr == 'I') {*rlt_sch = RS_STR; return 11;}
if (cur_chr == 'i') {*rlt_sch = RS_STR; return 24;}
- if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLT; return 42;}
+ if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLOAT; return 42;}
break;
case 8:
- if (cur_chr == 'L') {*rlt_sch = RS_NUL; return 35;}
+ if (cur_chr == 'L') {*rlt_sch = RS_NULL; return 35;}
break;
case 9:
if (cur_chr == 'L') {*rlt_sch = RS_STR; return 14;}
@@ -74,7 +72,7 @@ int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
if (cur_chr == 'n') {*rlt_sch = RS_STR; return 20;}
break;
case 12:
- if (cur_chr == 'N') {*rlt_sch = RS_FLT; return 41;}
+ if (cur_chr == 'N') {*rlt_sch = RS_FLOAT; return 41;}
break;
case 13:
if (cur_chr == 'R') {*rlt_sch = RS_STR; return 15;}
@@ -97,13 +95,13 @@ int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
if (cur_chr == 'a') {*rlt_sch = RS_STR; return 25;}
break;
case 19:
- if (cur_chr == 'e') {*rlt_sch = RS_BOL; return 36;}
+ if (cur_chr == 'e') {*rlt_sch = RS_BOOL; return 36;}
break;
case 20:
- if (cur_chr == 'f') {*rlt_sch = RS_FLT; return 41;}
+ if (cur_chr == 'f') {*rlt_sch = RS_FLOAT; return 41;}
break;
case 21:
- if (cur_chr == 'l') {*rlt_sch = RS_NUL; return 35;}
+ if (cur_chr == 'l') {*rlt_sch = RS_NULL; return 35;}
break;
case 22:
if (cur_chr == 'l') {*rlt_sch = RS_STR; return 27;}
@@ -115,7 +113,7 @@ int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
if (cur_chr == 'n') {*rlt_sch = RS_STR; return 20;}
break;
case 25:
- if (cur_chr == 'n') {*rlt_sch = RS_FLT; return 41;}
+ if (cur_chr == 'n') {*rlt_sch = RS_FLOAT; return 41;}
break;
case 26:
if (cur_chr == 'r') {*rlt_sch = RS_STR; return 28;}
@@ -132,13 +130,13 @@ int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
case 30:
if (cur_chr == '+' ||
cur_chr == '-') {*rlt_sch = RS_STR; return 32;}
- if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLT; return 43;}
+ if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLOAT; return 43;}
break;
case 31:
if (('0' <= cur_chr && cur_chr <= '7')) {*rlt_sch = RS_INT; return 39;}
break;
case 32:
- if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLT; return 43;}
+ if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLOAT; return 43;}
break;
case 33:
if (('0' <= cur_chr && cur_chr <= '9') ||
@@ -146,17 +144,17 @@ int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
('a' <= cur_chr && cur_chr <= 'f')) {*rlt_sch = RS_INT; return 40;}
break;
case 34:
- assert(false);
+ abort();
break;
case 35:
- *rlt_sch = RS_NUL;
+ *rlt_sch = RS_NULL;
break;
case 36:
- *rlt_sch = RS_BOL;
+ *rlt_sch = RS_BOOL;
break;
case 37:
*rlt_sch = RS_INT;
- if (cur_chr == '.') {*rlt_sch = RS_FLT; return 42;}
+ if (cur_chr == '.') {*rlt_sch = RS_FLOAT; return 42;}
if (cur_chr == 'o') {*rlt_sch = RS_STR; return 31;}
if (cur_chr == 'x') {*rlt_sch = RS_STR; return 33;}
if (cur_chr == 'E' ||
@@ -165,7 +163,7 @@ int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
break;
case 38:
*rlt_sch = RS_INT;
- if (cur_chr == '.') {*rlt_sch = RS_FLT; return 42;}
+ if (cur_chr == '.') {*rlt_sch = RS_FLOAT; return 42;}
if (cur_chr == 'E' ||
cur_chr == 'e') {*rlt_sch = RS_STR; return 30;}
if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_INT; return 38;}
@@ -181,24 +179,22 @@ int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
('a' <= cur_chr && cur_chr <= 'f')) {*rlt_sch = RS_INT; return 40;}
break;
case 41:
- *rlt_sch = RS_FLT;
+ *rlt_sch = RS_FLOAT;
break;
case 42:
- *rlt_sch = RS_FLT;
+ *rlt_sch = RS_FLOAT;
if (cur_chr == 'E' ||
cur_chr == 'e') {*rlt_sch = RS_STR; return 30;}
- if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLT; return 42;}
+ if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLOAT; return 42;}
break;
case 43:
- *rlt_sch = RS_FLT;
- if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLT; return 43;}
+ *rlt_sch = RS_FLOAT;
+ if (('0' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_FLOAT; return 43;}
break;
default:
*rlt_sch = RS_STR;
- return -1;
+ return SCH_STT_FRZ;
}
- if (cur_chr != '\r' && cur_chr != '\n' && cur_chr != ' ' && cur_chr != 0) {
- *rlt_sch = RS_STR;
- }
- return -1;
+ if (cur_chr != '\r' && cur_chr != '\n' && cur_chr != ' ' && cur_chr != 0) *rlt_sch = RS_STR;
+ return SCH_STT_FRZ;
}