From 49b7f874e68fd2d1e2f4046912ebdd6e74fb1ea5 Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Tue, 30 Apr 2024 12:06:12 +0300 Subject: [PATCH] build: regenerate parser --- schema/core/src/parser.c | 28 ++++++++------- schema/core/src/tree_sitter/parser.h | 51 +++++++++++++++++++++++----- schema/json/src/parser.c | 4 +-- schema/json/src/tree_sitter/parser.h | 51 +++++++++++++++++++++++----- schema/update-schema.js | 3 ++ src/parser.c | 2 +- src/schema.core.c | 26 +++++++------- src/schema.json.c | 2 +- src/tree_sitter/parser.h | 7 ++-- 9 files changed, 125 insertions(+), 49 deletions(-) diff --git a/schema/core/src/parser.c b/schema/core/src/parser.c index d60811e..6a2bdcd 100644 --- a/schema/core/src/parser.c +++ b/schema/core/src/parser.c @@ -89,17 +89,19 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { switch (state) { case 0: if (eof) ADVANCE(34); - if (lookahead == '+' || - lookahead == '-') ADVANCE(1); - if (lookahead == '.') ADVANCE(6); - if (lookahead == '0') ADVANCE(37); - if (lookahead == 'F') ADVANCE(2); - if (lookahead == 'N') ADVANCE(16); - if (lookahead == 'T') ADVANCE(13); - if (lookahead == 'f') ADVANCE(17); - if (lookahead == 'n') ADVANCE(29); - if (lookahead == 't') ADVANCE(26); - if (lookahead == '~') ADVANCE(35); + ADVANCE_MAP( + '.', 6, + '0', 37, + 'F', 2, + 'N', 16, + 'T', 13, + 'f', 17, + 'n', 29, + 't', 26, + '~', 35, + '+', 1, + '-', 1, + ); if (('1' <= lookahead && lookahead <= '9')) ADVANCE(38); END_STATE(); case 1: @@ -313,7 +315,7 @@ static const TSParseActionEntry ts_parse_actions[] = { [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), [5] = {.entry = {.count = 1, .reusable = false}}, SHIFT(2), - [7] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_scalar, 1), + [7] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_scalar, 1, 0, 0), [9] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), }; @@ -328,7 +330,7 @@ extern "C" { #define TS_PUBLIC __attribute__((visibility("default"))) #endif -TS_PUBLIC const TSLanguage *tree_sitter_core_schema() { +TS_PUBLIC const TSLanguage *tree_sitter_core_schema(void) { static const TSLanguage language = { .version = LANGUAGE_VERSION, .symbol_count = SYMBOL_COUNT, diff --git a/schema/core/src/tree_sitter/parser.h b/schema/core/src/tree_sitter/parser.h index 17b4fde..17f0e94 100644 --- a/schema/core/src/tree_sitter/parser.h +++ b/schema/core/src/tree_sitter/parser.h @@ -86,6 +86,11 @@ typedef union { } entry; } TSParseActionEntry; +typedef struct { + int32_t start; + int32_t end; +} TSCharacterRange; + struct TSLanguage { uint32_t version; uint32_t symbol_count; @@ -125,6 +130,24 @@ struct TSLanguage { const TSStateId *primary_state_ids; }; +static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + uint32_t index = 0; + uint32_t size = len - index; + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = index + half_size; + TSCharacterRange *range = &ranges[mid_index]; + if (lookahead >= range->start && lookahead <= range->end) { + return true; + } else if (lookahead > range->end) { + index = mid_index; + } + size -= half_size; + } + TSCharacterRange *range = &ranges[index]; + return (lookahead >= range->start && lookahead <= range->end); +} + /* * Lexer Macros */ @@ -154,6 +177,17 @@ struct TSLanguage { goto next_state; \ } +#define ADVANCE_MAP(...) \ + { \ + static const uint16_t map[] = { __VA_ARGS__ }; \ + for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ + if (map[i] == lookahead) { \ + state = map[i + 1]; \ + goto next_state; \ + } \ + } \ + } + #define SKIP(state_value) \ { \ skip = true; \ @@ -203,14 +237,15 @@ struct TSLanguage { } \ }} -#define REDUCE(symbol_val, child_count_val, ...) \ - {{ \ - .reduce = { \ - .type = TSParseActionTypeReduce, \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - __VA_ARGS__ \ - }, \ +#define REDUCE(symbol_name, children, precedence, prod_id) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_name, \ + .child_count = children, \ + .dynamic_precedence = precedence, \ + .production_id = prod_id \ + }, \ }} #define RECOVER() \ diff --git a/schema/json/src/parser.c b/schema/json/src/parser.c index 30e364f..729a432 100644 --- a/schema/json/src/parser.c +++ b/schema/json/src/parser.c @@ -215,7 +215,7 @@ static const TSParseActionEntry ts_parse_actions[] = { [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), [5] = {.entry = {.count = 1, .reusable = false}}, SHIFT(2), - [7] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_scalar, 1), + [7] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_scalar, 1, 0, 0), [9] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), }; @@ -230,7 +230,7 @@ extern "C" { #define TS_PUBLIC __attribute__((visibility("default"))) #endif -TS_PUBLIC const TSLanguage *tree_sitter_json_schema() { +TS_PUBLIC const TSLanguage *tree_sitter_json_schema(void) { static const TSLanguage language = { .version = LANGUAGE_VERSION, .symbol_count = SYMBOL_COUNT, diff --git a/schema/json/src/tree_sitter/parser.h b/schema/json/src/tree_sitter/parser.h index 17b4fde..17f0e94 100644 --- a/schema/json/src/tree_sitter/parser.h +++ b/schema/json/src/tree_sitter/parser.h @@ -86,6 +86,11 @@ typedef union { } entry; } TSParseActionEntry; +typedef struct { + int32_t start; + int32_t end; +} TSCharacterRange; + struct TSLanguage { uint32_t version; uint32_t symbol_count; @@ -125,6 +130,24 @@ struct TSLanguage { const TSStateId *primary_state_ids; }; +static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + uint32_t index = 0; + uint32_t size = len - index; + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = index + half_size; + TSCharacterRange *range = &ranges[mid_index]; + if (lookahead >= range->start && lookahead <= range->end) { + return true; + } else if (lookahead > range->end) { + index = mid_index; + } + size -= half_size; + } + TSCharacterRange *range = &ranges[index]; + return (lookahead >= range->start && lookahead <= range->end); +} + /* * Lexer Macros */ @@ -154,6 +177,17 @@ struct TSLanguage { goto next_state; \ } +#define ADVANCE_MAP(...) \ + { \ + static const uint16_t map[] = { __VA_ARGS__ }; \ + for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ + if (map[i] == lookahead) { \ + state = map[i + 1]; \ + goto next_state; \ + } \ + } \ + } + #define SKIP(state_value) \ { \ skip = true; \ @@ -203,14 +237,15 @@ struct TSLanguage { } \ }} -#define REDUCE(symbol_val, child_count_val, ...) \ - {{ \ - .reduce = { \ - .type = TSParseActionTypeReduce, \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - __VA_ARGS__ \ - }, \ +#define REDUCE(symbol_name, children, precedence, prod_id) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_name, \ + .child_count = children, \ + .dynamic_precedence = precedence, \ + .production_id = prod_id \ + }, \ }} #define RECOVER() \ diff --git a/schema/update-schema.js b/schema/update-schema.js index 8c8cf27..73941ae 100755 --- a/schema/update-schema.js +++ b/schema/update-schema.js @@ -16,6 +16,9 @@ readFile(join(__dirname, schema, "src", "parser.c"), "utf8").then(input => { cases .map(([key, { content }]) => `${(key === "default" ? "default:" : `case ${key}:`)}\n${indent(content)}`) .join("\n END_STATE();\n") + .replace(/\s+ADVANCE_MAP\(([^]+?)\);\n/, (_, map) => { + return map.replace(/'(.)', (\d+),/g, "if (lookahead == '$1') ADVANCE($2);"); + }) .replace(/ADVANCE\((\d+)\);/g, (_, state) => { const stateCase = cases.find(([key]) => key === state); if (stateCase) { diff --git a/src/parser.c b/src/parser.c index 1e033ee..b427599 100644 --- a/src/parser.c +++ b/src/parser.c @@ -40504,7 +40504,7 @@ void tree_sitter_yaml_external_scanner_deserialize(void *, const char *, unsigne #define TS_PUBLIC __attribute__((visibility("default"))) #endif -TS_PUBLIC const TSLanguage *tree_sitter_yaml() { +TS_PUBLIC const TSLanguage *tree_sitter_yaml(void) { static const TSLanguage language = { .version = LANGUAGE_VERSION, .symbol_count = SYMBOL_COUNT, diff --git a/src/schema.core.c b/src/schema.core.c index cb53d7d..389061e 100644 --- a/src/schema.core.c +++ b/src/schema.core.c @@ -15,18 +15,18 @@ static int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch case SCH_STT_FRZ: break; case 0: - if (cur_chr == '+' || - cur_chr == '-') {*rlt_sch = RS_STR; return 1;} - if (cur_chr == '.') {*rlt_sch = RS_STR; return 6;} - if (cur_chr == '0') {*rlt_sch = RS_INT; return 37;} - if (cur_chr == 'F') {*rlt_sch = RS_STR; return 2;} - if (cur_chr == 'N') {*rlt_sch = RS_STR; return 16;} - if (cur_chr == 'T') {*rlt_sch = RS_STR; return 13;} - if (cur_chr == 'f') {*rlt_sch = RS_STR; return 17;} - if (cur_chr == 'n') {*rlt_sch = RS_STR; return 29;} - if (cur_chr == 't') {*rlt_sch = RS_STR; return 26;} - if (cur_chr == '~') {*rlt_sch = RS_NULL; return 35;} - if (('1' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_INT; return 38;} + if (cur_chr == '.') {*rlt_sch = RS_STR; return 6;} + if (cur_chr == '0') {*rlt_sch = RS_INT; return 37;} + if (cur_chr == 'F') {*rlt_sch = RS_STR; return 2;} + if (cur_chr == 'N') {*rlt_sch = RS_STR; return 16;} + if (cur_chr == 'T') {*rlt_sch = RS_STR; return 13;} + if (cur_chr == 'f') {*rlt_sch = RS_STR; return 17;} + if (cur_chr == 'n') {*rlt_sch = RS_STR; return 29;} + if (cur_chr == 't') {*rlt_sch = RS_STR; return 26;} + if (cur_chr == '~') {*rlt_sch = RS_NULL; return 35;} + if (cur_chr == '+') {*rlt_sch = RS_STR; return 1;} + if (cur_chr == '-') {*rlt_sch = RS_STR; return 1;} + if (('1' <= cur_chr && cur_chr <= '9')) {*rlt_sch = RS_INT; return 38;} break; case 1: if (cur_chr == '.') {*rlt_sch = RS_STR; return 7;} @@ -195,6 +195,6 @@ static int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch *rlt_sch = RS_STR; return SCH_STT_FRZ; } - if (cur_chr != '\r' && cur_chr != '\n' && cur_chr != ' ' && cur_chr != 0) {*rlt_sch = RS_STR;} + if (cur_chr != '\r' && cur_chr != '\n' && cur_chr != ' ' && cur_chr != 0) *rlt_sch = RS_STR; return SCH_STT_FRZ; } diff --git a/src/schema.json.c b/src/schema.json.c index ba879cb..ce809ac 100644 --- a/src/schema.json.c +++ b/src/schema.json.c @@ -97,6 +97,6 @@ static int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch *rlt_sch = RS_STR; return SCH_STT_FRZ; } - if (cur_chr != '\r' && cur_chr != '\n' && cur_chr != ' ' && cur_chr != 0) {*rlt_sch = RS_STR;} + if (cur_chr != '\r' && cur_chr != '\n' && cur_chr != ' ' && cur_chr != 0) *rlt_sch = RS_STR; return SCH_STT_FRZ; } diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h index 0a9843a..17f0e94 100644 --- a/src/tree_sitter/parser.h +++ b/src/tree_sitter/parser.h @@ -161,8 +161,9 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t #define START_LEXER() \ bool result = false; \ bool skip = false; \ - UNUSED bool eof = false; \ - UNUSED int32_t lookahead; \ + UNUSED \ + bool eof = false; \ + int32_t lookahead; \ goto start; \ next_state: \ lexer->advance(lexer, skip); \ @@ -244,7 +245,7 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t .child_count = children, \ .dynamic_precedence = precedence, \ .production_id = prod_id \ - } \ + }, \ }} #define RECOVER() \