fix: improve error recovery

This commit is contained in:
ObserverOfTime 2024-04-13 16:44:23 +03:00
parent 3975596d84
commit ee09311821
No known key found for this signature in database
GPG key ID: 8A2DEA1DBAEBCA9E
5 changed files with 655 additions and 602 deletions

View file

@ -64,6 +64,8 @@ module.exports = grammar({
$._bl, $._bl,
$.comment, $.comment,
$._err_rec,
], ],
extras: $ => [$.comment], extras: $ => [$.comment],

4
src/grammar.json generated
View file

@ -6950,6 +6950,10 @@
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "comment" "name": "comment"
},
{
"type": "SYMBOL",
"name": "_err_rec"
} }
], ],
"inline": [ "inline": [

1107
src/parser.c generated

File diff suppressed because it is too large Load diff

View file

@ -60,6 +60,8 @@ typedef enum {
BL, BL,
COMMENT, COMMENT,
ERR_REC,
} TokenType; } TokenType;
// clang-format on // clang-format on
@ -1355,7 +1357,7 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
return false; return false;
} }
return false; return !valid_symbols[ERR_REC];
} }
void *tree_sitter_yaml_external_scanner_create() { void *tree_sitter_yaml_external_scanner_create() {

View file

@ -86,6 +86,11 @@ typedef union {
} entry; } entry;
} TSParseActionEntry; } TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage { struct TSLanguage {
uint32_t version; uint32_t version;
uint32_t symbol_count; uint32_t symbol_count;
@ -125,6 +130,24 @@ struct TSLanguage {
const TSStateId *primary_state_ids; const TSStateId *primary_state_ids;
}; };
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/* /*
* Lexer Macros * Lexer Macros
*/ */
@ -138,9 +161,8 @@ struct TSLanguage {
#define START_LEXER() \ #define START_LEXER() \
bool result = false; \ bool result = false; \
bool skip = false; \ bool skip = false; \
UNUSED \ UNUSED bool eof = false; \
bool eof = false; \ UNUSED int32_t lookahead; \
int32_t lookahead; \
goto start; \ goto start; \
next_state: \ next_state: \
lexer->advance(lexer, skip); \ lexer->advance(lexer, skip); \
@ -154,6 +176,17 @@ struct TSLanguage {
goto next_state; \ goto next_state; \
} }
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \ #define SKIP(state_value) \
{ \ { \
skip = true; \ skip = true; \
@ -203,14 +236,15 @@ struct TSLanguage {
} \ } \
}} }}
#define REDUCE(symbol_val, child_count_val, ...) \ #define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \ {{ \
.reduce = { \ .reduce = { \
.type = TSParseActionTypeReduce, \ .type = TSParseActionTypeReduce, \
.symbol = symbol_val, \ .symbol = symbol_name, \
.child_count = child_count_val, \ .child_count = children, \
__VA_ARGS__ \ .dynamic_precedence = precedence, \
}, \ .production_id = prod_id \
} \
}} }}
#define RECOVER() \ #define RECOVER() \