feat!: rewrite the scanner in C
This commit is contained in:
parent
828d5c2e40
commit
9e59b9bbf8
7
src/parser.c
generated
7
src/parser.c
generated
|
@ -1,7 +1,6 @@
|
||||||
#include "tree_sitter/parser.h"
|
#include "tree_sitter/parser.h"
|
||||||
|
|
||||||
#if defined(__GNUC__) || defined(__clang__)
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
|
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -40287,10 +40286,12 @@ unsigned tree_sitter_yaml_external_scanner_serialize(void *, char *);
|
||||||
void tree_sitter_yaml_external_scanner_deserialize(void *, const char *, unsigned);
|
void tree_sitter_yaml_external_scanner_deserialize(void *, const char *, unsigned);
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define extern __declspec(dllexport)
|
#define TS_PUBLIC __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
#define TS_PUBLIC __attribute__((visibility("default")))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern const TSLanguage *tree_sitter_yaml(void) {
|
TS_PUBLIC const TSLanguage *tree_sitter_yaml() {
|
||||||
static const TSLanguage language = {
|
static const TSLanguage language = {
|
||||||
.version = LANGUAGE_VERSION,
|
.version = LANGUAGE_VERSION,
|
||||||
.symbol_count = SYMBOL_COUNT,
|
.symbol_count = SYMBOL_COUNT,
|
||||||
|
|
1371
src/scanner.c
generated
Normal file
1371
src/scanner.c
generated
Normal file
File diff suppressed because it is too large
Load diff
981
src/scanner.cc
981
src/scanner.cc
|
@ -1,981 +0,0 @@
|
||||||
#include <tree_sitter/parser.h>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
// tree-sitter does not support multiple files for external scanner
|
|
||||||
#include "./schema.generated.cc"
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
using std::vector;
|
|
||||||
using namespace tree_sitter_yaml;
|
|
||||||
|
|
||||||
enum TokenType {
|
|
||||||
END_OF_FILE,
|
|
||||||
|
|
||||||
S_DIR_YML_BGN, R_DIR_YML_VER,
|
|
||||||
S_DIR_TAG_BGN, R_DIR_TAG_HDL, R_DIR_TAG_PFX,
|
|
||||||
S_DIR_RSV_BGN, R_DIR_RSV_PRM,
|
|
||||||
S_DRS_END,
|
|
||||||
S_DOC_END,
|
|
||||||
R_BLK_SEQ_BGN, BR_BLK_SEQ_BGN, B_BLK_SEQ_BGN,
|
|
||||||
R_BLK_KEY_BGN, BR_BLK_KEY_BGN, B_BLK_KEY_BGN,
|
|
||||||
R_BLK_VAL_BGN, BR_BLK_VAL_BGN, B_BLK_VAL_BGN,
|
|
||||||
R_BLK_IMP_BGN,
|
|
||||||
R_BLK_LIT_BGN, BR_BLK_LIT_BGN,
|
|
||||||
R_BLK_FLD_BGN, BR_BLK_FLD_BGN,
|
|
||||||
BR_BLK_STR_CTN,
|
|
||||||
R_FLW_SEQ_BGN, BR_FLW_SEQ_BGN, B_FLW_SEQ_BGN,
|
|
||||||
R_FLW_SEQ_END, BR_FLW_SEQ_END,
|
|
||||||
R_FLW_MAP_BGN, BR_FLW_MAP_BGN, B_FLW_MAP_BGN,
|
|
||||||
R_FLW_MAP_END, BR_FLW_MAP_END,
|
|
||||||
R_FLW_SEP_BGN, BR_FLW_SEP_BGN,
|
|
||||||
R_FLW_KEY_BGN, BR_FLW_KEY_BGN,
|
|
||||||
R_FLW_JSV_BGN, BR_FLW_JSV_BGN,
|
|
||||||
R_FLW_NJV_BGN, BR_FLW_NJV_BGN,
|
|
||||||
R_DQT_STR_BGN, BR_DQT_STR_BGN, B_DQT_STR_BGN,
|
|
||||||
R_DQT_STR_CTN, BR_DQT_STR_CTN,
|
|
||||||
R_DQT_ESC_NWL, BR_DQT_ESC_NWL,
|
|
||||||
R_DQT_ESC_SEQ, BR_DQT_ESC_SEQ,
|
|
||||||
R_DQT_STR_END, BR_DQT_STR_END,
|
|
||||||
R_SQT_STR_BGN, BR_SQT_STR_BGN, B_SQT_STR_BGN,
|
|
||||||
R_SQT_STR_CTN, BR_SQT_STR_CTN,
|
|
||||||
R_SQT_ESC_SQT, BR_SQT_ESC_SQT,
|
|
||||||
R_SQT_STR_END, BR_SQT_STR_END,
|
|
||||||
|
|
||||||
R_SGL_PLN_NUL_BLK, BR_SGL_PLN_NUL_BLK, B_SGL_PLN_NUL_BLK, R_SGL_PLN_NUL_FLW, BR_SGL_PLN_NUL_FLW,
|
|
||||||
R_SGL_PLN_BOL_BLK, BR_SGL_PLN_BOL_BLK, B_SGL_PLN_BOL_BLK, R_SGL_PLN_BOL_FLW, BR_SGL_PLN_BOL_FLW,
|
|
||||||
R_SGL_PLN_INT_BLK, BR_SGL_PLN_INT_BLK, B_SGL_PLN_INT_BLK, R_SGL_PLN_INT_FLW, BR_SGL_PLN_INT_FLW,
|
|
||||||
R_SGL_PLN_FLT_BLK, BR_SGL_PLN_FLT_BLK, B_SGL_PLN_FLT_BLK, R_SGL_PLN_FLT_FLW, BR_SGL_PLN_FLT_FLW,
|
|
||||||
R_SGL_PLN_STR_BLK, BR_SGL_PLN_STR_BLK, B_SGL_PLN_STR_BLK, R_SGL_PLN_STR_FLW, BR_SGL_PLN_STR_FLW,
|
|
||||||
|
|
||||||
R_MTL_PLN_STR_BLK, BR_MTL_PLN_STR_BLK,
|
|
||||||
R_MTL_PLN_STR_FLW, BR_MTL_PLN_STR_FLW,
|
|
||||||
|
|
||||||
R_TAG, BR_TAG, B_TAG,
|
|
||||||
R_ACR_BGN, BR_ACR_BGN, B_ACR_BGN, R_ACR_CTN,
|
|
||||||
R_ALS_BGN, BR_ALS_BGN, B_ALS_BGN, R_ALS_CTN,
|
|
||||||
|
|
||||||
BL,
|
|
||||||
COMMENT,
|
|
||||||
};
|
|
||||||
|
|
||||||
#define ADV() adv(lexer)
|
|
||||||
#define ADV_NWL() adv_nwl(lexer)
|
|
||||||
#define SKP() skp(lexer)
|
|
||||||
#define SKP_NWL() skp_nwl(lexer)
|
|
||||||
#define MRK_END() mrk_end(lexer)
|
|
||||||
#define LKA lexer->lookahead
|
|
||||||
#define VLD valid_symbols
|
|
||||||
|
|
||||||
#define SCN_SUCC 1
|
|
||||||
#define SCN_STOP 0
|
|
||||||
#define SCN_FAIL -1
|
|
||||||
|
|
||||||
#define IND_ROT 'r'
|
|
||||||
#define IND_MAP 'm'
|
|
||||||
#define IND_SEQ 'q'
|
|
||||||
#define IND_STR 's'
|
|
||||||
|
|
||||||
#define RET_SYM(RESULT_SYMBOL) { \
|
|
||||||
flush(); \
|
|
||||||
lexer->result_symbol = RESULT_SYMBOL; \
|
|
||||||
return true; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define POP_IND() { \
|
|
||||||
/* incorrect status caused by error recovering */ \
|
|
||||||
if (ind_typ_stk.size() == 1) { \
|
|
||||||
return false; \
|
|
||||||
} \
|
|
||||||
pop_ind(); \
|
|
||||||
}
|
|
||||||
#define PUSH_IND(TYP, LEN) push_ind(TYP, LEN)
|
|
||||||
#define PUSH_BGN_IND(TYP) { \
|
|
||||||
if (has_tab_ind) return false; \
|
|
||||||
push_ind(TYP, bgn_col); \
|
|
||||||
}
|
|
||||||
#define MAY_PUSH_IMP_IND(TYP) { \
|
|
||||||
if (cur_ind != blk_imp_col) { \
|
|
||||||
if (blk_imp_tab) return false; \
|
|
||||||
push_ind(IND_MAP, blk_imp_col); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
#define MAY_PUSH_SPC_SEQ_IND() { \
|
|
||||||
if (cur_ind_typ == IND_MAP) { \
|
|
||||||
push_ind(IND_SEQ, bgn_col); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
#define MAY_UPD_IMP_COL() { \
|
|
||||||
if (blk_imp_row != bgn_row) { \
|
|
||||||
blk_imp_row = bgn_row; \
|
|
||||||
blk_imp_col = bgn_col; \
|
|
||||||
blk_imp_tab = has_tab_ind; \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
#define UPD_SCH_STT() { \
|
|
||||||
sch_stt = adv_sch_stt(sch_stt, cur_chr, &rlt_sch); \
|
|
||||||
}
|
|
||||||
#define SGL_PLN_SYM(POS, CTX) ( \
|
|
||||||
rlt_sch == RS_NUL ? POS##_SGL_PLN_NUL_##CTX : \
|
|
||||||
rlt_sch == RS_BOL ? POS##_SGL_PLN_BOL_##CTX : \
|
|
||||||
rlt_sch == RS_INT ? POS##_SGL_PLN_INT_##CTX : \
|
|
||||||
rlt_sch == RS_FLT ? POS##_SGL_PLN_FLT_##CTX : \
|
|
||||||
POS##_SGL_PLN_STR_##CTX \
|
|
||||||
)
|
|
||||||
|
|
||||||
struct Scanner {
|
|
||||||
int16_t row;
|
|
||||||
int16_t col;
|
|
||||||
int16_t blk_imp_row;
|
|
||||||
int16_t blk_imp_col;
|
|
||||||
int16_t blk_imp_tab;
|
|
||||||
vector<int16_t> ind_typ_stk;
|
|
||||||
vector<int16_t> ind_len_stk;
|
|
||||||
|
|
||||||
// temp
|
|
||||||
int16_t end_row;
|
|
||||||
int16_t end_col;
|
|
||||||
int16_t cur_row;
|
|
||||||
int16_t cur_col;
|
|
||||||
int32_t cur_chr;
|
|
||||||
int8_t sch_stt;
|
|
||||||
ResultSchema rlt_sch;
|
|
||||||
|
|
||||||
Scanner() {
|
|
||||||
deserialize(NULL, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned serialize(char *buffer) {
|
|
||||||
size_t i = 0;
|
|
||||||
buffer[i++] = row;
|
|
||||||
buffer[i++] = col;
|
|
||||||
buffer[i++] = blk_imp_row;
|
|
||||||
buffer[i++] = blk_imp_col;
|
|
||||||
buffer[i++] = blk_imp_tab;
|
|
||||||
vector<int16_t>::iterator
|
|
||||||
typ_itr = ind_typ_stk.begin() + 1,
|
|
||||||
typ_end = ind_typ_stk.end(),
|
|
||||||
len_itr = ind_len_stk.begin() + 1;
|
|
||||||
for (; typ_itr != typ_end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++typ_itr, ++len_itr) {
|
|
||||||
buffer[i++] = *typ_itr;
|
|
||||||
buffer[i++] = *len_itr;
|
|
||||||
}
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
void deserialize(const char *buffer, unsigned length) {
|
|
||||||
row = 0;
|
|
||||||
col = 0;
|
|
||||||
blk_imp_row = -1;
|
|
||||||
blk_imp_col = -1;
|
|
||||||
blk_imp_tab = 0;
|
|
||||||
ind_typ_stk.clear();
|
|
||||||
ind_typ_stk.push_back(IND_ROT);
|
|
||||||
ind_len_stk.clear();
|
|
||||||
ind_len_stk.push_back(-1);
|
|
||||||
if (length > 0) {
|
|
||||||
size_t i = 0;
|
|
||||||
row = buffer[i++];
|
|
||||||
col = buffer[i++];
|
|
||||||
blk_imp_row = buffer[i++];
|
|
||||||
blk_imp_col = buffer[i++];
|
|
||||||
blk_imp_tab = buffer[i++];
|
|
||||||
while (i < length) {
|
|
||||||
ind_typ_stk.push_back(buffer[i++]);
|
|
||||||
ind_len_stk.push_back(buffer[i++]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void adv(TSLexer *lexer) {
|
|
||||||
cur_col++;
|
|
||||||
cur_chr = lexer->lookahead;
|
|
||||||
lexer->advance(lexer, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
void adv_nwl(TSLexer *lexer) {
|
|
||||||
cur_row++;
|
|
||||||
cur_col = 0;
|
|
||||||
cur_chr = lexer->lookahead;
|
|
||||||
lexer->advance(lexer, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
void skp(TSLexer *lexer) {
|
|
||||||
cur_col++;
|
|
||||||
cur_chr = lexer->lookahead;
|
|
||||||
lexer->advance(lexer, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
void skp_nwl(TSLexer *lexer) {
|
|
||||||
cur_row++;
|
|
||||||
cur_col = 0;
|
|
||||||
cur_chr = lexer->lookahead;
|
|
||||||
lexer->advance(lexer, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
void mrk_end(TSLexer *lexer) {
|
|
||||||
end_row = cur_row;
|
|
||||||
end_col = cur_col;
|
|
||||||
lexer->mark_end(lexer);
|
|
||||||
}
|
|
||||||
|
|
||||||
void init() {
|
|
||||||
cur_row = row;
|
|
||||||
cur_col = col;
|
|
||||||
cur_chr = 0;
|
|
||||||
sch_stt = 0;
|
|
||||||
rlt_sch = RS_STR;
|
|
||||||
}
|
|
||||||
|
|
||||||
void flush() {
|
|
||||||
row = end_row;
|
|
||||||
col = end_col;
|
|
||||||
}
|
|
||||||
|
|
||||||
void pop_ind() {
|
|
||||||
ind_len_stk.pop_back();
|
|
||||||
ind_typ_stk.pop_back();
|
|
||||||
}
|
|
||||||
|
|
||||||
void push_ind(int16_t typ, int16_t len) {
|
|
||||||
ind_len_stk.push_back(len);
|
|
||||||
ind_typ_stk.push_back(typ);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_wsp(int32_t c) {
|
|
||||||
return c == ' ' || c == '\t';
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_nwl(int32_t c) {
|
|
||||||
return c == '\r' || c == '\n';
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_wht(int32_t c) {
|
|
||||||
return is_wsp(c) || is_nwl(c) || c == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_ns_dec_digit(int32_t c) {
|
|
||||||
return c >= '0' && c <= '9';
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_ns_hex_digit(int32_t c) {
|
|
||||||
return is_ns_dec_digit(c)
|
|
||||||
|| (c >= 'a' && c <= 'f')
|
|
||||||
|| (c >= 'A' && c <= 'F');
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_ns_word_char(int32_t c) {
|
|
||||||
return c == '-'
|
|
||||||
|| (c >= '0' && c <= '9')
|
|
||||||
|| (c >= 'a' && c <= 'z')
|
|
||||||
|| (c >= 'A' && c <= 'Z');
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_nb_json(int32_t c) {
|
|
||||||
return c == 0x09 || (c >= 0x20 && c <= 0x10ffff);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_nb_double_char(int32_t c) {
|
|
||||||
return is_nb_json(c) && c != '\\' && c != '"';
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_nb_single_char(int32_t c) {
|
|
||||||
return is_nb_json(c) && c != '\'';
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_ns_char(int32_t c) {
|
|
||||||
return (c >= 0x21 && c <= 0x7e)
|
|
||||||
|| c == 0x85
|
|
||||||
|| (c >= 0xa0 && c <= 0xd7ff)
|
|
||||||
|| (c >= 0xe000 && c <= 0xfefe)
|
|
||||||
|| (c >= 0xff00 && c <= 0xfffd)
|
|
||||||
|| (c >= 0x10000 && c <= 0x10ffff);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_c_indicator(int32_t c) {
|
|
||||||
return c == '-' || c == '?' || c == ':' || c == ',' || c == '[' || c == ']' || c == '{' || c == '}'
|
|
||||||
|| c == '#' || c == '&' || c == '*' || c == '!' || c == '|' || c == '>' || c == '\'' || c == '"'
|
|
||||||
|| c == '%' || c == '@' || c == '`';
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_c_flow_indicator(int32_t c) {
|
|
||||||
return c == ',' || c == '[' || c == ']' || c == '{' || c == '}';
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_plain_safe_in_block(int32_t c) {
|
|
||||||
return is_ns_char(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_plain_safe_in_flow(int32_t c) {
|
|
||||||
return is_ns_char(c) && !is_c_flow_indicator(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_ns_uri_char(int32_t c) {
|
|
||||||
return is_ns_word_char(c)
|
|
||||||
|| c == '#' || c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c == '&'
|
|
||||||
|| c == '=' || c == '+' || c == '$' || c == ',' || c == '_' || c == '.' || c == '!'
|
|
||||||
|| c == '~' || c == '*' || c == '\'' || c == '(' || c == ')' || c == '[' || c == ']';
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_ns_tag_char(int32_t c) {
|
|
||||||
return is_ns_word_char(c)
|
|
||||||
|| c == '#' || c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c == '&'
|
|
||||||
|| c == '=' || c == '+' || c == '$' || c == '_' || c == '.'
|
|
||||||
|| c == '~' || c == '*' || c == '\'' || c == '(' || c == ')';
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_ns_anchor_char(int32_t c) {
|
|
||||||
return is_ns_char(c) && !is_c_flow_indicator(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
char scn_uri_esc(TSLexer *lexer) {
|
|
||||||
if (LKA != '%') return SCN_STOP;
|
|
||||||
MRK_END();
|
|
||||||
ADV();
|
|
||||||
if (!is_ns_hex_digit(LKA)) return SCN_FAIL;
|
|
||||||
ADV();
|
|
||||||
if (!is_ns_hex_digit(LKA)) return SCN_FAIL;
|
|
||||||
ADV();
|
|
||||||
return SCN_SUCC;
|
|
||||||
}
|
|
||||||
|
|
||||||
char scn_ns_uri_char(TSLexer *lexer) {
|
|
||||||
if (is_ns_uri_char(LKA)) {ADV(); return SCN_SUCC;}
|
|
||||||
return scn_uri_esc(lexer);
|
|
||||||
}
|
|
||||||
|
|
||||||
char scn_ns_tag_char(TSLexer *lexer) {
|
|
||||||
if (is_ns_tag_char(LKA)) {ADV(); return SCN_SUCC;}
|
|
||||||
return scn_uri_esc(lexer);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_dir_bgn(TSLexer *lexer) {
|
|
||||||
ADV();
|
|
||||||
if (LKA == 'Y') {
|
|
||||||
ADV();
|
|
||||||
if (LKA == 'A') {
|
|
||||||
ADV();
|
|
||||||
if (LKA == 'M') {
|
|
||||||
ADV();
|
|
||||||
if (LKA == 'L') {
|
|
||||||
ADV();
|
|
||||||
if (is_wht(LKA)) {
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(S_DIR_YML_BGN);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (LKA == 'T') {
|
|
||||||
ADV();
|
|
||||||
if (LKA == 'A') {
|
|
||||||
ADV();
|
|
||||||
if (LKA == 'G') {
|
|
||||||
ADV();
|
|
||||||
if (is_wht(LKA)) {
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(S_DIR_TAG_BGN);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (;;) {
|
|
||||||
if (!is_ns_char(LKA)) break;
|
|
||||||
ADV();
|
|
||||||
}
|
|
||||||
if (cur_col > 1 && is_wht(LKA)) {
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(S_DIR_RSV_BGN);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_dir_yml_ver(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
uint16_t n1 = 0;
|
|
||||||
uint16_t n2 = 0;
|
|
||||||
while (is_ns_dec_digit(LKA)) {ADV();n1++;}
|
|
||||||
if (LKA != '.') return false;
|
|
||||||
ADV();
|
|
||||||
while (is_ns_dec_digit(LKA)) {ADV();n2++;}
|
|
||||||
if (n1 == 0 || n2 == 0) return false;
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_tag_hdl_tal(TSLexer *lexer) {
|
|
||||||
if (LKA == '!') {ADV();return true;}
|
|
||||||
uint16_t n = 0;
|
|
||||||
while (is_ns_word_char(LKA)) {ADV();n++;}
|
|
||||||
if (n == 0) return true;
|
|
||||||
if (LKA == '!') {ADV();return true;}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_dir_tag_hdl(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
if (LKA == '!') {
|
|
||||||
ADV();
|
|
||||||
if (scn_tag_hdl_tal(lexer)) {MRK_END();RET_SYM(result_symbol);}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_dir_tag_pfx(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
if (LKA == '!') ADV();
|
|
||||||
else if (scn_ns_tag_char(lexer) == SCN_SUCC);
|
|
||||||
else return false;
|
|
||||||
for (;;) {
|
|
||||||
switch (scn_ns_uri_char(lexer)) {
|
|
||||||
case SCN_STOP:
|
|
||||||
MRK_END();
|
|
||||||
case SCN_FAIL:
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_dir_rsv_prm(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
if (!is_ns_char(LKA)) return false;
|
|
||||||
ADV();
|
|
||||||
while (is_ns_char(LKA)) ADV();
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_tag(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
if (LKA != '!') return false;
|
|
||||||
ADV();
|
|
||||||
if (is_wht(LKA)) {MRK_END();RET_SYM(result_symbol);}
|
|
||||||
if (LKA == '<') {
|
|
||||||
ADV();
|
|
||||||
if (scn_ns_uri_char(lexer) != SCN_SUCC) return false;
|
|
||||||
for (;;) {
|
|
||||||
switch (scn_ns_uri_char(lexer)) {
|
|
||||||
case SCN_STOP:
|
|
||||||
if (LKA == '>') {
|
|
||||||
ADV();
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
case SCN_FAIL:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (scn_tag_hdl_tal(lexer) && scn_ns_tag_char(lexer) != SCN_SUCC) return false;
|
|
||||||
for (;;) {
|
|
||||||
switch (scn_ns_tag_char(lexer)) {
|
|
||||||
case SCN_STOP:
|
|
||||||
MRK_END();
|
|
||||||
case SCN_FAIL:
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_acr_bgn(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
if (LKA != '&') return false;
|
|
||||||
ADV();
|
|
||||||
if (!is_ns_anchor_char(LKA)) return false;
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_acr_ctn(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
while (is_ns_anchor_char(LKA)) ADV();
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_als_bgn(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
if (LKA != '*') return false;
|
|
||||||
ADV();
|
|
||||||
if (!is_ns_anchor_char(LKA)) return false;
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_als_ctn(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
while (is_ns_anchor_char(LKA)) ADV();
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_dqt_esc_seq(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
uint16_t i;
|
|
||||||
switch (LKA) {
|
|
||||||
case '0': case 'a': case 'b': case 't': case '\t': case 'n': case 'v':
|
|
||||||
case 'r': case 'e': case ' ': case '"': case '/': case '\\': case 'N':
|
|
||||||
case '_': case 'L': case 'P':
|
|
||||||
ADV();
|
|
||||||
break;
|
|
||||||
case 'U':
|
|
||||||
ADV();
|
|
||||||
for (i = 0; i < 8; i++) if (is_ns_hex_digit(LKA)) ADV(); else return false;
|
|
||||||
break;
|
|
||||||
case 'u':
|
|
||||||
ADV();
|
|
||||||
for (i = 0; i < 4; i++) if (is_ns_hex_digit(LKA)) ADV(); else return false;
|
|
||||||
break;
|
|
||||||
case 'x':
|
|
||||||
ADV();
|
|
||||||
for (i = 0; i < 2; i++) if (is_ns_hex_digit(LKA)) ADV(); else return false;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_dqt_str_cnt(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
if (!is_nb_double_char(LKA)) return false;
|
|
||||||
if (cur_col == 0 && scn_drs_doc_end(lexer)) {
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(cur_chr == '-' ? S_DRS_END : S_DOC_END);
|
|
||||||
} else ADV();
|
|
||||||
while (is_nb_double_char(LKA)) ADV();
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_sqt_str_cnt(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
if (!is_nb_single_char(LKA)) return false;
|
|
||||||
if (cur_col == 0 && scn_drs_doc_end(lexer)) {
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(cur_chr == '-' ? S_DRS_END : S_DOC_END);
|
|
||||||
} else ADV();
|
|
||||||
while (is_nb_single_char(LKA)) ADV();
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_blk_str_bgn(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
if (LKA != '|' && LKA != '>') return false;
|
|
||||||
ADV();
|
|
||||||
int16_t cur_ind = ind_len_stk.back();
|
|
||||||
int16_t ind = -1;
|
|
||||||
if (LKA >= '1' && LKA <= '9') {
|
|
||||||
ind = LKA - '1';
|
|
||||||
ADV();
|
|
||||||
if (LKA == '+' || LKA == '-') {
|
|
||||||
ADV();
|
|
||||||
}
|
|
||||||
} else if (LKA == '+' || LKA == '-') {
|
|
||||||
ADV();
|
|
||||||
if (LKA >= '1' && LKA <= '9') {
|
|
||||||
ind = LKA - '1';
|
|
||||||
ADV();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!is_wht(LKA)) return false;
|
|
||||||
MRK_END();
|
|
||||||
if (ind != -1) ind += cur_ind;
|
|
||||||
else {
|
|
||||||
ind = cur_ind;
|
|
||||||
while (is_wsp(LKA)) ADV();
|
|
||||||
if (LKA == '#') {
|
|
||||||
ADV();
|
|
||||||
while (!is_nwl(LKA) && LKA != 0) ADV();
|
|
||||||
}
|
|
||||||
if (is_nwl(LKA)) ADV_NWL();
|
|
||||||
while (LKA != 0) {
|
|
||||||
if (LKA == ' ') ADV();
|
|
||||||
else if (is_nwl(LKA)) {
|
|
||||||
if (cur_col - 1 < ind) break;
|
|
||||||
ind = cur_col - 1;
|
|
||||||
ADV_NWL();
|
|
||||||
} else {
|
|
||||||
if (cur_col - 1 > ind) ind = cur_col - 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PUSH_IND(IND_STR, ind);
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_blk_str_cnt(TSLexer *lexer, TSSymbol result_symbol) {
|
|
||||||
if (!is_ns_char(LKA)) return false;
|
|
||||||
if (cur_col == 0 && scn_drs_doc_end(lexer)) {POP_IND();RET_SYM(BL);}
|
|
||||||
else ADV();
|
|
||||||
MRK_END();
|
|
||||||
for (;;) {
|
|
||||||
if (is_ns_char(LKA)) {
|
|
||||||
ADV();
|
|
||||||
while (is_ns_char(LKA)) ADV();
|
|
||||||
MRK_END();
|
|
||||||
}
|
|
||||||
if (is_wsp(LKA)) {
|
|
||||||
ADV();
|
|
||||||
while (is_wsp(LKA)) ADV();
|
|
||||||
} else break;
|
|
||||||
}
|
|
||||||
RET_SYM(result_symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
char scn_pln_cnt(TSLexer *lexer, bool (Scanner::*is_plain_safe)(int32_t)) {
|
|
||||||
bool is_cur_wsp = is_wsp(cur_chr);
|
|
||||||
bool is_cur_saf = (this->*is_plain_safe)(cur_chr);
|
|
||||||
bool is_lka_wsp = is_wsp(LKA);
|
|
||||||
bool is_lka_saf = (this->*is_plain_safe)(LKA);
|
|
||||||
if (is_lka_saf || is_lka_wsp) {
|
|
||||||
for (;;) {
|
|
||||||
if (is_lka_saf && LKA != '#' && LKA != ':') {ADV();MRK_END();UPD_SCH_STT();}
|
|
||||||
else if (is_cur_saf && LKA == '#') {ADV();MRK_END();UPD_SCH_STT();}
|
|
||||||
else if (is_lka_wsp) {ADV();UPD_SCH_STT();}
|
|
||||||
else if (LKA == ':') ADV(); // check later
|
|
||||||
else break;
|
|
||||||
|
|
||||||
is_cur_wsp = is_lka_wsp;
|
|
||||||
is_cur_saf = is_lka_saf;
|
|
||||||
is_lka_wsp = is_wsp(LKA);
|
|
||||||
is_lka_saf = (this->*is_plain_safe)(LKA);
|
|
||||||
|
|
||||||
if (cur_chr == ':') {
|
|
||||||
if (is_lka_saf) {MRK_END();UPD_SCH_STT();}
|
|
||||||
else return SCN_FAIL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else return SCN_STOP;
|
|
||||||
return SCN_SUCC;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scn_drs_doc_end(TSLexer *lexer) {
|
|
||||||
if (LKA != '-' && LKA != '.') return false;
|
|
||||||
int32_t delimeter = LKA;
|
|
||||||
ADV();
|
|
||||||
if (LKA == delimeter) {
|
|
||||||
ADV();
|
|
||||||
if (LKA == delimeter) {
|
|
||||||
ADV();
|
|
||||||
if (is_wht(LKA)) return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
MRK_END();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool scan(TSLexer *lexer, const bool *valid_symbols) {
|
|
||||||
init();
|
|
||||||
MRK_END();
|
|
||||||
|
|
||||||
bool allow_comment = !(VLD[R_DQT_STR_CTN] || VLD[BR_DQT_STR_CTN] || VLD[R_SQT_STR_CTN] || VLD[BR_SQT_STR_CTN]);
|
|
||||||
|
|
||||||
vector<int16_t>::reverse_iterator ind_ptr = ind_len_stk.rbegin();
|
|
||||||
vector<int16_t>::reverse_iterator ind_end = ind_len_stk.rend();
|
|
||||||
int16_t cur_ind = *ind_ptr++;
|
|
||||||
int16_t prt_ind = ind_ptr == ind_end ? -1 : *ind_ptr;
|
|
||||||
int16_t cur_ind_typ = ind_typ_stk.back();
|
|
||||||
|
|
||||||
bool has_tab_ind = false;
|
|
||||||
int16_t leading_spaces = 0;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
if (LKA == ' ') {
|
|
||||||
if (!has_tab_ind) leading_spaces++;
|
|
||||||
SKP();
|
|
||||||
} else if (LKA == '\t') {
|
|
||||||
has_tab_ind = true;
|
|
||||||
SKP();
|
|
||||||
} else if (is_nwl(LKA)) {
|
|
||||||
has_tab_ind = false;
|
|
||||||
leading_spaces = 0;
|
|
||||||
SKP_NWL();
|
|
||||||
} else if (allow_comment && LKA == '#') {
|
|
||||||
if (VLD[BR_BLK_STR_CTN] && VLD[BL] && cur_col <= cur_ind) {POP_IND();RET_SYM(BL);}
|
|
||||||
if (
|
|
||||||
VLD[BR_BLK_STR_CTN]
|
|
||||||
? cur_row == row
|
|
||||||
: cur_col == 0 || cur_row != row || cur_col > col
|
|
||||||
) {
|
|
||||||
ADV();
|
|
||||||
while (!is_nwl(LKA) && LKA != 0) ADV();
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(COMMENT);
|
|
||||||
} else break;
|
|
||||||
} else break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (LKA == 0) {
|
|
||||||
if (VLD[BL]) {MRK_END();POP_IND();RET_SYM(BL)}
|
|
||||||
if (VLD[END_OF_FILE]) {MRK_END();RET_SYM(END_OF_FILE)}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
int16_t bgn_row = cur_row;
|
|
||||||
int16_t bgn_col = cur_col;
|
|
||||||
int32_t bgn_chr = LKA;
|
|
||||||
|
|
||||||
if (VLD[BL] && bgn_col <= cur_ind && !has_tab_ind) {
|
|
||||||
if (
|
|
||||||
cur_ind == prt_ind && cur_ind_typ == IND_SEQ
|
|
||||||
? bgn_col < cur_ind || LKA != '-'
|
|
||||||
: bgn_col <= prt_ind || cur_ind_typ == IND_STR
|
|
||||||
) {POP_IND();RET_SYM(BL);}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool has_nwl = cur_row > row;
|
|
||||||
bool is_r = !has_nwl;
|
|
||||||
bool is_br = has_nwl && leading_spaces > cur_ind;
|
|
||||||
bool is_b = has_nwl && leading_spaces == cur_ind && !has_tab_ind;
|
|
||||||
bool is_s = bgn_col == 0;
|
|
||||||
|
|
||||||
if (VLD[R_DIR_YML_VER] && is_r) return scn_dir_yml_ver(lexer, R_DIR_YML_VER);
|
|
||||||
if (VLD[R_DIR_TAG_HDL] && is_r) return scn_dir_tag_hdl(lexer, R_DIR_TAG_HDL);
|
|
||||||
if (VLD[R_DIR_TAG_PFX] && is_r) return scn_dir_tag_pfx(lexer, R_DIR_TAG_PFX);
|
|
||||||
if (VLD[R_DIR_RSV_PRM] && is_r) return scn_dir_rsv_prm(lexer, R_DIR_RSV_PRM);
|
|
||||||
if (VLD[BR_BLK_STR_CTN] && is_br && scn_blk_str_cnt(lexer, BR_BLK_STR_CTN)) return true;
|
|
||||||
|
|
||||||
if (
|
|
||||||
(VLD[R_DQT_STR_CTN] && is_r && scn_dqt_str_cnt(lexer, R_DQT_STR_CTN))
|
|
||||||
|| (VLD[BR_DQT_STR_CTN] && is_br && scn_dqt_str_cnt(lexer, BR_DQT_STR_CTN))
|
|
||||||
) return true;
|
|
||||||
|
|
||||||
if (
|
|
||||||
(VLD[R_SQT_STR_CTN] && is_r && scn_sqt_str_cnt(lexer, R_SQT_STR_CTN))
|
|
||||||
|| (VLD[BR_SQT_STR_CTN] && is_br && scn_sqt_str_cnt(lexer, BR_SQT_STR_CTN))
|
|
||||||
) return true;
|
|
||||||
|
|
||||||
if (VLD[R_ACR_CTN] && is_r) return scn_acr_ctn(lexer, R_ACR_CTN);
|
|
||||||
if (VLD[R_ALS_CTN] && is_r) return scn_als_ctn(lexer, R_ALS_CTN);
|
|
||||||
|
|
||||||
if (LKA == '%') {
|
|
||||||
if (VLD[S_DIR_YML_BGN] && is_s) return scn_dir_bgn(lexer);
|
|
||||||
} else if (LKA == '*') {
|
|
||||||
if (VLD[R_ALS_BGN] && is_r) {MAY_UPD_IMP_COL();return scn_als_bgn(lexer, R_ALS_BGN);}
|
|
||||||
if (VLD[BR_ALS_BGN] && is_br) {MAY_UPD_IMP_COL();return scn_als_bgn(lexer, BR_ALS_BGN);}
|
|
||||||
if (VLD[B_ALS_BGN] && is_b) {MAY_UPD_IMP_COL();return scn_als_bgn(lexer, B_ALS_BGN);}
|
|
||||||
} else if (LKA == '&') {
|
|
||||||
if (VLD[R_ACR_BGN] && is_r) {MAY_UPD_IMP_COL();return scn_acr_bgn(lexer, R_ACR_BGN);}
|
|
||||||
if (VLD[BR_ACR_BGN] && is_br) {MAY_UPD_IMP_COL();return scn_acr_bgn(lexer, BR_ACR_BGN);}
|
|
||||||
if (VLD[B_ACR_BGN] && is_b) {MAY_UPD_IMP_COL();return scn_acr_bgn(lexer, B_ACR_BGN);}
|
|
||||||
} else if (LKA == '!') {
|
|
||||||
if (VLD[R_TAG] && is_r) {MAY_UPD_IMP_COL();return scn_tag(lexer, R_TAG);}
|
|
||||||
if (VLD[BR_TAG] && is_br) {MAY_UPD_IMP_COL();return scn_tag(lexer, BR_TAG);}
|
|
||||||
if (VLD[B_TAG] && is_b) {MAY_UPD_IMP_COL();return scn_tag(lexer, B_TAG);}
|
|
||||||
} else if (LKA == '[') {
|
|
||||||
if (VLD[R_FLW_SEQ_BGN] && is_r) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(R_FLW_SEQ_BGN)}
|
|
||||||
if (VLD[BR_FLW_SEQ_BGN] && is_br) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(BR_FLW_SEQ_BGN)}
|
|
||||||
if (VLD[B_FLW_SEQ_BGN] && is_b) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(B_FLW_SEQ_BGN)}
|
|
||||||
} else if (LKA == ']') {
|
|
||||||
if (VLD[R_FLW_SEQ_END] && is_r) {ADV();MRK_END();RET_SYM(R_FLW_SEQ_END)}
|
|
||||||
if (VLD[BR_FLW_SEQ_END] && is_br) {ADV();MRK_END();RET_SYM(BR_FLW_SEQ_END)}
|
|
||||||
} else if (LKA == '{') {
|
|
||||||
if (VLD[R_FLW_MAP_BGN] && is_r) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(R_FLW_MAP_BGN)}
|
|
||||||
if (VLD[BR_FLW_MAP_BGN] && is_br) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(BR_FLW_MAP_BGN)}
|
|
||||||
if (VLD[B_FLW_MAP_BGN] && is_b) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(B_FLW_MAP_BGN)}
|
|
||||||
} else if (LKA == '}') {
|
|
||||||
if (VLD[R_FLW_MAP_END] && is_r) {ADV();MRK_END();RET_SYM(R_FLW_MAP_END)}
|
|
||||||
if (VLD[BR_FLW_MAP_END] && is_br) {ADV();MRK_END();RET_SYM(BR_FLW_MAP_END)}
|
|
||||||
} else if (LKA == ',') {
|
|
||||||
if (VLD[R_FLW_SEP_BGN] && is_r) {ADV();MRK_END();RET_SYM(R_FLW_SEP_BGN)}
|
|
||||||
if (VLD[BR_FLW_SEP_BGN] && is_br) {ADV();MRK_END();RET_SYM(BR_FLW_SEP_BGN)}
|
|
||||||
} else if (LKA == '"') {
|
|
||||||
if (VLD[R_DQT_STR_BGN] && is_r) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(R_DQT_STR_BGN)}
|
|
||||||
if (VLD[BR_DQT_STR_BGN] && is_br) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(BR_DQT_STR_BGN)}
|
|
||||||
if (VLD[B_DQT_STR_BGN] && is_b) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(B_DQT_STR_BGN)}
|
|
||||||
if (VLD[R_DQT_STR_END] && is_r) {ADV();MRK_END();RET_SYM(R_DQT_STR_END)}
|
|
||||||
if (VLD[BR_DQT_STR_END] && is_br) {ADV();MRK_END();RET_SYM(BR_DQT_STR_END)}
|
|
||||||
} else if (LKA == '\'') {
|
|
||||||
if (VLD[R_SQT_STR_BGN] && is_r) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(R_SQT_STR_BGN)}
|
|
||||||
if (VLD[BR_SQT_STR_BGN] && is_br) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(BR_SQT_STR_BGN)}
|
|
||||||
if (VLD[B_SQT_STR_BGN] && is_b) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(B_SQT_STR_BGN)}
|
|
||||||
if (VLD[R_SQT_STR_END] && is_r) {
|
|
||||||
ADV();
|
|
||||||
if (LKA == '\'') {ADV();MRK_END();RET_SYM(R_SQT_ESC_SQT)}
|
|
||||||
else {MRK_END();RET_SYM(R_SQT_STR_END)}
|
|
||||||
}
|
|
||||||
if (VLD[BR_SQT_STR_END] && is_br) {
|
|
||||||
ADV();
|
|
||||||
if (LKA == '\'') {ADV();MRK_END();RET_SYM(BR_SQT_ESC_SQT)}
|
|
||||||
else {MRK_END();RET_SYM(BR_SQT_STR_END)}
|
|
||||||
}
|
|
||||||
} else if (LKA == '?') {
|
|
||||||
bool is_r_blk_key_bgn = VLD[R_BLK_KEY_BGN] && is_r;
|
|
||||||
bool is_br_blk_key_bgn = VLD[BR_BLK_KEY_BGN] && is_br;
|
|
||||||
bool is_b_blk_key_bgn = VLD[B_BLK_KEY_BGN] && is_b;
|
|
||||||
bool is_r_flw_key_bgn = VLD[R_FLW_KEY_BGN] && is_r;
|
|
||||||
bool is_br_flw_key_bgn = VLD[BR_FLW_KEY_BGN] && is_br;
|
|
||||||
if (is_r_blk_key_bgn || is_br_blk_key_bgn || is_b_blk_key_bgn || is_r_flw_key_bgn || is_br_flw_key_bgn) {
|
|
||||||
ADV();
|
|
||||||
if (is_wht(LKA)) {
|
|
||||||
MRK_END();
|
|
||||||
if (is_r_blk_key_bgn) {PUSH_BGN_IND(IND_MAP);RET_SYM(R_BLK_KEY_BGN);}
|
|
||||||
if (is_br_blk_key_bgn) {PUSH_BGN_IND(IND_MAP);RET_SYM(BR_BLK_KEY_BGN);}
|
|
||||||
if (is_b_blk_key_bgn) RET_SYM(B_BLK_KEY_BGN);
|
|
||||||
if (is_r_flw_key_bgn) RET_SYM(R_FLW_KEY_BGN);
|
|
||||||
if (is_br_flw_key_bgn) RET_SYM(BR_FLW_KEY_BGN);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (LKA == ':') {
|
|
||||||
if (VLD[R_FLW_JSV_BGN] && is_r) {ADV();MRK_END();RET_SYM(R_FLW_JSV_BGN);}
|
|
||||||
if (VLD[BR_FLW_JSV_BGN] && is_br) {ADV();MRK_END();RET_SYM(BR_FLW_JSV_BGN);}
|
|
||||||
bool is_r_blk_val_bgn = VLD[R_BLK_VAL_BGN] && is_r;
|
|
||||||
bool is_br_blk_val_bgn = VLD[BR_BLK_VAL_BGN] && is_br;
|
|
||||||
bool is_b_blk_val_bgn = VLD[B_BLK_VAL_BGN] && is_b;
|
|
||||||
bool is_r_blk_imp_bgn = VLD[R_BLK_IMP_BGN] && is_r;
|
|
||||||
bool is_r_flw_njv_bgn = VLD[R_FLW_NJV_BGN] && is_r;
|
|
||||||
bool is_br_flw_njv_bgn = VLD[BR_FLW_NJV_BGN] && is_br;
|
|
||||||
if (is_r_blk_val_bgn || is_br_blk_val_bgn || is_b_blk_val_bgn || is_r_blk_imp_bgn || is_r_flw_njv_bgn || is_br_flw_njv_bgn) {
|
|
||||||
ADV();
|
|
||||||
bool is_lka_wht = is_wht(LKA);
|
|
||||||
if (is_lka_wht) {
|
|
||||||
if (is_r_blk_val_bgn) {PUSH_BGN_IND(IND_MAP);MRK_END();RET_SYM(R_BLK_VAL_BGN);}
|
|
||||||
if (is_br_blk_val_bgn) {PUSH_BGN_IND(IND_MAP);MRK_END();RET_SYM(BR_BLK_VAL_BGN);}
|
|
||||||
if (is_b_blk_val_bgn) {MRK_END();RET_SYM(B_BLK_VAL_BGN);}
|
|
||||||
if (is_r_blk_imp_bgn) {MAY_PUSH_IMP_IND();MRK_END();RET_SYM(R_BLK_IMP_BGN);}
|
|
||||||
}
|
|
||||||
if (is_lka_wht || LKA == ',' || LKA == ']' || LKA == '}') {
|
|
||||||
if (is_r_flw_njv_bgn) {MRK_END();RET_SYM(R_FLW_NJV_BGN);}
|
|
||||||
if (is_br_flw_njv_bgn) {MRK_END();RET_SYM(BR_FLW_NJV_BGN);}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (LKA == '-') {
|
|
||||||
bool is_r_blk_seq_bgn = VLD[R_BLK_SEQ_BGN] && is_r;
|
|
||||||
bool is_br_blk_seq_bgn = VLD[BR_BLK_SEQ_BGN] && is_br;
|
|
||||||
bool is_b_blk_seq_bgn = VLD[B_BLK_SEQ_BGN] && is_b;
|
|
||||||
bool is_s_drs_end = is_s;
|
|
||||||
if (is_r_blk_seq_bgn || is_br_blk_seq_bgn || is_b_blk_seq_bgn || is_s_drs_end) {
|
|
||||||
ADV();
|
|
||||||
if (is_wht(LKA)) {
|
|
||||||
if (is_r_blk_seq_bgn) {PUSH_BGN_IND(IND_SEQ);MRK_END();RET_SYM(R_BLK_SEQ_BGN)}
|
|
||||||
if (is_br_blk_seq_bgn) {PUSH_BGN_IND(IND_SEQ);MRK_END();RET_SYM(BR_BLK_SEQ_BGN)}
|
|
||||||
if (is_b_blk_seq_bgn) {MAY_PUSH_SPC_SEQ_IND();MRK_END();RET_SYM(B_BLK_SEQ_BGN)}
|
|
||||||
} else if (LKA == '-' && is_s_drs_end) {
|
|
||||||
ADV();
|
|
||||||
if (LKA == '-') {
|
|
||||||
ADV();
|
|
||||||
if (is_wht(LKA)) {
|
|
||||||
if (VLD[BL]) {POP_IND();RET_SYM(BL);}
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(S_DRS_END);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (LKA == '.') {
|
|
||||||
if (is_s) {
|
|
||||||
ADV();
|
|
||||||
if (LKA == '.') {
|
|
||||||
ADV();
|
|
||||||
if (LKA == '.') {
|
|
||||||
ADV();
|
|
||||||
if (is_wht(LKA)) {
|
|
||||||
if (VLD[BL]) {POP_IND();RET_SYM(BL);}
|
|
||||||
MRK_END();
|
|
||||||
RET_SYM(S_DOC_END);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (LKA == '\\') {
|
|
||||||
bool is_r_dqt_esc_nwl = VLD[R_DQT_ESC_NWL] && is_r;
|
|
||||||
bool is_br_dqt_esc_nwl = VLD[BR_DQT_ESC_NWL] && is_br;
|
|
||||||
bool is_r_dqt_esc_seq = VLD[R_DQT_ESC_SEQ] && is_r;
|
|
||||||
bool is_br_dqt_esc_seq = VLD[BR_DQT_ESC_SEQ] && is_br;
|
|
||||||
if (is_r_dqt_esc_nwl || is_br_dqt_esc_nwl || is_r_dqt_esc_seq || is_br_dqt_esc_seq) {
|
|
||||||
ADV();
|
|
||||||
if (is_nwl(LKA)) {
|
|
||||||
if (is_r_dqt_esc_nwl) {MRK_END();RET_SYM(R_DQT_ESC_NWL)}
|
|
||||||
if (is_br_dqt_esc_nwl) {MRK_END();RET_SYM(BR_DQT_ESC_NWL)}
|
|
||||||
}
|
|
||||||
if (is_r_dqt_esc_seq) return scn_dqt_esc_seq(lexer, R_DQT_ESC_SEQ);
|
|
||||||
if (is_br_dqt_esc_seq) return scn_dqt_esc_seq(lexer, BR_DQT_ESC_SEQ);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else if (LKA == '|') {
|
|
||||||
if (VLD[R_BLK_LIT_BGN] && is_r) return scn_blk_str_bgn(lexer, R_BLK_LIT_BGN);
|
|
||||||
if (VLD[BR_BLK_LIT_BGN] && is_br) return scn_blk_str_bgn(lexer, BR_BLK_LIT_BGN);
|
|
||||||
} else if (LKA == '>') {
|
|
||||||
if (VLD[R_BLK_FLD_BGN] && is_r) return scn_blk_str_bgn(lexer, R_BLK_FLD_BGN);
|
|
||||||
if (VLD[BR_BLK_FLD_BGN] && is_br) return scn_blk_str_bgn(lexer, BR_BLK_FLD_BGN);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool maybe_sgl_pln_blk = (VLD[R_SGL_PLN_STR_BLK] && is_r) || (VLD[BR_SGL_PLN_STR_BLK] && is_br) || (VLD[B_SGL_PLN_STR_BLK] && is_b);
|
|
||||||
bool maybe_sgl_pln_flw = (VLD[R_SGL_PLN_STR_FLW] && is_r) || (VLD[BR_SGL_PLN_STR_FLW] && is_br);
|
|
||||||
bool maybe_mtl_pln_blk = (VLD[R_MTL_PLN_STR_BLK] && is_r) || (VLD[BR_MTL_PLN_STR_BLK] && is_br);
|
|
||||||
bool maybe_mtl_pln_flw = (VLD[R_MTL_PLN_STR_FLW] && is_r) || (VLD[BR_MTL_PLN_STR_FLW] && is_br);
|
|
||||||
|
|
||||||
if (maybe_sgl_pln_blk || maybe_sgl_pln_flw || maybe_mtl_pln_blk || maybe_mtl_pln_flw) {
|
|
||||||
bool is_in_blk = maybe_sgl_pln_blk || maybe_mtl_pln_blk;
|
|
||||||
bool (Scanner::*is_plain_safe)(int32_t) = is_in_blk ? &Scanner::is_plain_safe_in_block : &Scanner::is_plain_safe_in_flow;
|
|
||||||
if (cur_col - bgn_col == 0) ADV();
|
|
||||||
if (cur_col - bgn_col == 1) {
|
|
||||||
bool is_plain_first =
|
|
||||||
(is_ns_char(bgn_chr) && !is_c_indicator(bgn_chr))
|
|
||||||
|| ((bgn_chr == '-' || bgn_chr == '?' || bgn_chr == ':')
|
|
||||||
&& (this->*is_plain_safe)(LKA));
|
|
||||||
if (!is_plain_first) return false;
|
|
||||||
UPD_SCH_STT();
|
|
||||||
} else {
|
|
||||||
// no need to check the following cases:
|
|
||||||
// ..X
|
|
||||||
// ...X
|
|
||||||
// --X
|
|
||||||
// ---X
|
|
||||||
// X: lookahead
|
|
||||||
sch_stt = SCH_STT_FRZ; // must be RS_STR
|
|
||||||
}
|
|
||||||
|
|
||||||
MRK_END();
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
if (!is_nwl(LKA)) {
|
|
||||||
if (scn_pln_cnt(lexer, is_plain_safe) != SCN_SUCC) break;
|
|
||||||
}
|
|
||||||
if (LKA == 0 || !is_nwl(LKA)) break;
|
|
||||||
for (;;) {
|
|
||||||
if (is_nwl(LKA)) ADV_NWL();
|
|
||||||
else if (is_wsp(LKA)) ADV();
|
|
||||||
else break;
|
|
||||||
}
|
|
||||||
if (LKA == 0 || cur_col <= cur_ind) break;
|
|
||||||
if (cur_col == 0 && scn_drs_doc_end(lexer)) break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (end_row == bgn_row) {
|
|
||||||
if (maybe_sgl_pln_blk) {MAY_UPD_IMP_COL();RET_SYM(is_r ? SGL_PLN_SYM(R, BLK) : is_br ? SGL_PLN_SYM(BR, BLK) : SGL_PLN_SYM(B, BLK));}
|
|
||||||
if (maybe_sgl_pln_flw) RET_SYM(is_r ? SGL_PLN_SYM(R, FLW) : SGL_PLN_SYM(BR, FLW));
|
|
||||||
} else {
|
|
||||||
if (maybe_mtl_pln_blk) {MAY_UPD_IMP_COL();RET_SYM(is_r ? R_MTL_PLN_STR_BLK : BR_MTL_PLN_STR_BLK);}
|
|
||||||
if (maybe_mtl_pln_flw) RET_SYM(is_r ? R_MTL_PLN_STR_FLW : BR_MTL_PLN_STR_FLW);
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
|
|
||||||
void *tree_sitter_yaml_external_scanner_create() {
|
|
||||||
return new Scanner();
|
|
||||||
}
|
|
||||||
|
|
||||||
void tree_sitter_yaml_external_scanner_destroy(void *payload) {
|
|
||||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
||||||
delete scanner;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned tree_sitter_yaml_external_scanner_serialize(void *payload, char *buffer) {
|
|
||||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
||||||
return scanner->serialize(buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
void tree_sitter_yaml_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
|
|
||||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
||||||
scanner->deserialize(buffer, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool tree_sitter_yaml_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
|
|
||||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
||||||
return scanner->scan(lexer, valid_symbols);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
16
src/schema.generated.cc → src/schema.generated.c
generated
16
src/schema.generated.cc → src/schema.generated.c
generated
|
@ -1,16 +1,16 @@
|
||||||
#include <cassert>
|
#include <assert.h>
|
||||||
|
#include <stdbool.h>
|
||||||
namespace tree_sitter_yaml {
|
#include <stdint.h>
|
||||||
|
|
||||||
const int8_t SCH_STT_FRZ = -1;
|
const int8_t SCH_STT_FRZ = -1;
|
||||||
|
|
||||||
enum ResultSchema {
|
typedef enum {
|
||||||
RS_STR,
|
RS_STR,
|
||||||
RS_INT,
|
RS_INT,
|
||||||
RS_NUL,
|
RS_NUL,
|
||||||
RS_BOL,
|
RS_BOL,
|
||||||
RS_FLT,
|
RS_FLT,
|
||||||
};
|
} ResultSchema;
|
||||||
|
|
||||||
int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
|
int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
|
||||||
switch (sch_stt) {
|
switch (sch_stt) {
|
||||||
|
@ -197,8 +197,8 @@ int8_t adv_sch_stt(int8_t sch_stt, int32_t cur_chr, ResultSchema *rlt_sch) {
|
||||||
*rlt_sch = RS_STR;
|
*rlt_sch = RS_STR;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (cur_chr != '\r' && cur_chr != '\n' && cur_chr != ' ' && cur_chr != 0) *rlt_sch = RS_STR;
|
if (cur_chr != '\r' && cur_chr != '\n' && cur_chr != ' ' && cur_chr != 0) {
|
||||||
|
*rlt_sch = RS_STR;
|
||||||
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
54
src/tree_sitter/alloc.h
generated
Normal file
54
src/tree_sitter/alloc.h
generated
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
#ifndef TREE_SITTER_ALLOC_H_
|
||||||
|
#define TREE_SITTER_ALLOC_H_
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// Allow clients to override allocation functions
|
||||||
|
#ifdef TREE_SITTER_REUSE_ALLOCATOR
|
||||||
|
|
||||||
|
extern void *(*ts_current_malloc)(size_t);
|
||||||
|
extern void *(*ts_current_calloc)(size_t, size_t);
|
||||||
|
extern void *(*ts_current_realloc)(void *, size_t);
|
||||||
|
extern void (*ts_current_free)(void *);
|
||||||
|
|
||||||
|
#ifndef ts_malloc
|
||||||
|
#define ts_malloc ts_current_malloc
|
||||||
|
#endif
|
||||||
|
#ifndef ts_calloc
|
||||||
|
#define ts_calloc ts_current_calloc
|
||||||
|
#endif
|
||||||
|
#ifndef ts_realloc
|
||||||
|
#define ts_realloc ts_current_realloc
|
||||||
|
#endif
|
||||||
|
#ifndef ts_free
|
||||||
|
#define ts_free ts_current_free
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifndef ts_malloc
|
||||||
|
#define ts_malloc malloc
|
||||||
|
#endif
|
||||||
|
#ifndef ts_calloc
|
||||||
|
#define ts_calloc calloc
|
||||||
|
#endif
|
||||||
|
#ifndef ts_realloc
|
||||||
|
#define ts_realloc realloc
|
||||||
|
#endif
|
||||||
|
#ifndef ts_free
|
||||||
|
#define ts_free free
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // TREE_SITTER_ALLOC_H_
|
290
src/tree_sitter/array.h
generated
Normal file
290
src/tree_sitter/array.h
generated
Normal file
|
@ -0,0 +1,290 @@
|
||||||
|
#ifndef TREE_SITTER_ARRAY_H_
|
||||||
|
#define TREE_SITTER_ARRAY_H_
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "./alloc.h"
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(disable : 4101)
|
||||||
|
#elif defined(__GNUC__) || defined(__clang__)
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define Array(T) \
|
||||||
|
struct { \
|
||||||
|
T *contents; \
|
||||||
|
uint32_t size; \
|
||||||
|
uint32_t capacity; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Initialize an array.
|
||||||
|
#define array_init(self) \
|
||||||
|
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
|
||||||
|
|
||||||
|
/// Create an empty array.
|
||||||
|
#define array_new() \
|
||||||
|
{ NULL, 0, 0 }
|
||||||
|
|
||||||
|
/// Get a pointer to the element at a given `index` in the array.
|
||||||
|
#define array_get(self, _index) \
|
||||||
|
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
|
||||||
|
|
||||||
|
/// Get a pointer to the first element in the array.
|
||||||
|
#define array_front(self) array_get(self, 0)
|
||||||
|
|
||||||
|
/// Get a pointer to the last element in the array.
|
||||||
|
#define array_back(self) array_get(self, (self)->size - 1)
|
||||||
|
|
||||||
|
/// Clear the array, setting its size to zero. Note that this does not free any
|
||||||
|
/// memory allocated for the array's contents.
|
||||||
|
#define array_clear(self) ((self)->size = 0)
|
||||||
|
|
||||||
|
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
|
||||||
|
/// less than the array's current capacity, this function has no effect.
|
||||||
|
#define array_reserve(self, new_capacity) \
|
||||||
|
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
|
||||||
|
|
||||||
|
/// Free any memory allocated for this array. Note that this does not free any
|
||||||
|
/// memory allocated for the array's contents.
|
||||||
|
#define array_delete(self) _array__delete((Array *)(self))
|
||||||
|
|
||||||
|
/// Push a new `element` onto the end of the array.
|
||||||
|
#define array_push(self, element) \
|
||||||
|
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
|
||||||
|
(self)->contents[(self)->size++] = (element))
|
||||||
|
|
||||||
|
/// Increase the array's size by `count` elements.
|
||||||
|
/// New elements are zero-initialized.
|
||||||
|
#define array_grow_by(self, count) \
|
||||||
|
do { \
|
||||||
|
if ((count) == 0) break; \
|
||||||
|
_array__grow((Array *)(self), count, array_elem_size(self)); \
|
||||||
|
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
|
||||||
|
(self)->size += (count); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/// Append all elements from one array to the end of another.
|
||||||
|
#define array_push_all(self, other) \
|
||||||
|
array_extend((self), (other)->size, (other)->contents)
|
||||||
|
|
||||||
|
/// Append `count` elements to the end of the array, reading their values from the
|
||||||
|
/// `contents` pointer.
|
||||||
|
#define array_extend(self, count, contents) \
|
||||||
|
_array__splice( \
|
||||||
|
(Array *)(self), array_elem_size(self), (self)->size, \
|
||||||
|
0, count, contents \
|
||||||
|
)
|
||||||
|
|
||||||
|
/// Remove `old_count` elements from the array starting at the given `index`. At
|
||||||
|
/// the same index, insert `new_count` new elements, reading their values from the
|
||||||
|
/// `new_contents` pointer.
|
||||||
|
#define array_splice(self, _index, old_count, new_count, new_contents) \
|
||||||
|
_array__splice( \
|
||||||
|
(Array *)(self), array_elem_size(self), _index, \
|
||||||
|
old_count, new_count, new_contents \
|
||||||
|
)
|
||||||
|
|
||||||
|
/// Insert one `element` into the array at the given `index`.
|
||||||
|
#define array_insert(self, _index, element) \
|
||||||
|
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
|
||||||
|
|
||||||
|
/// Remove one element from the array at the given `index`.
|
||||||
|
#define array_erase(self, _index) \
|
||||||
|
_array__erase((Array *)(self), array_elem_size(self), _index)
|
||||||
|
|
||||||
|
/// Pop the last element off the array, returning the element by value.
|
||||||
|
#define array_pop(self) ((self)->contents[--(self)->size])
|
||||||
|
|
||||||
|
/// Assign the contents of one array to another, reallocating if necessary.
|
||||||
|
#define array_assign(self, other) \
|
||||||
|
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
|
||||||
|
|
||||||
|
/// Swap one array with another
|
||||||
|
#define array_swap(self, other) \
|
||||||
|
_array__swap((Array *)(self), (Array *)(other))
|
||||||
|
|
||||||
|
/// Get the size of the array contents
|
||||||
|
#define array_elem_size(self) (sizeof *(self)->contents)
|
||||||
|
|
||||||
|
/// Search a sorted array for a given `needle` value, using the given `compare`
|
||||||
|
/// callback to determine the order.
|
||||||
|
///
|
||||||
|
/// If an existing element is found to be equal to `needle`, then the `index`
|
||||||
|
/// out-parameter is set to the existing value's index, and the `exists`
|
||||||
|
/// out-parameter is set to true. Otherwise, `index` is set to an index where
|
||||||
|
/// `needle` should be inserted in order to preserve the sorting, and `exists`
|
||||||
|
/// is set to false.
|
||||||
|
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
|
||||||
|
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
|
||||||
|
|
||||||
|
/// Search a sorted array for a given `needle` value, using integer comparisons
|
||||||
|
/// of a given struct field (specified with a leading dot) to determine the order.
|
||||||
|
///
|
||||||
|
/// See also `array_search_sorted_with`.
|
||||||
|
#define array_search_sorted_by(self, field, needle, _index, _exists) \
|
||||||
|
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
|
||||||
|
|
||||||
|
/// Insert a given `value` into a sorted array, using the given `compare`
|
||||||
|
/// callback to determine the order.
|
||||||
|
#define array_insert_sorted_with(self, compare, value) \
|
||||||
|
do { \
|
||||||
|
unsigned _index, _exists; \
|
||||||
|
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
|
||||||
|
if (!_exists) array_insert(self, _index, value); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/// Insert a given `value` into a sorted array, using integer comparisons of
|
||||||
|
/// a given struct field (specified with a leading dot) to determine the order.
|
||||||
|
///
|
||||||
|
/// See also `array_search_sorted_by`.
|
||||||
|
#define array_insert_sorted_by(self, field, value) \
|
||||||
|
do { \
|
||||||
|
unsigned _index, _exists; \
|
||||||
|
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
|
||||||
|
if (!_exists) array_insert(self, _index, value); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
// Private
|
||||||
|
|
||||||
|
typedef Array(void) Array;
|
||||||
|
|
||||||
|
/// This is not what you're looking for, see `array_delete`.
|
||||||
|
static inline void _array__delete(Array *self) {
|
||||||
|
if (self->contents) {
|
||||||
|
ts_free(self->contents);
|
||||||
|
self->contents = NULL;
|
||||||
|
self->size = 0;
|
||||||
|
self->capacity = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This is not what you're looking for, see `array_erase`.
|
||||||
|
static inline void _array__erase(Array *self, size_t element_size,
|
||||||
|
uint32_t index) {
|
||||||
|
assert(index < self->size);
|
||||||
|
char *contents = (char *)self->contents;
|
||||||
|
memmove(contents + index * element_size, contents + (index + 1) * element_size,
|
||||||
|
(self->size - index - 1) * element_size);
|
||||||
|
self->size--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This is not what you're looking for, see `array_reserve`.
|
||||||
|
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
|
||||||
|
if (new_capacity > self->capacity) {
|
||||||
|
if (self->contents) {
|
||||||
|
self->contents = ts_realloc(self->contents, new_capacity * element_size);
|
||||||
|
} else {
|
||||||
|
self->contents = ts_malloc(new_capacity * element_size);
|
||||||
|
}
|
||||||
|
self->capacity = new_capacity;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This is not what you're looking for, see `array_assign`.
|
||||||
|
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
|
||||||
|
_array__reserve(self, element_size, other->size);
|
||||||
|
self->size = other->size;
|
||||||
|
memcpy(self->contents, other->contents, self->size * element_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This is not what you're looking for, see `array_swap`.
|
||||||
|
static inline void _array__swap(Array *self, Array *other) {
|
||||||
|
Array swap = *other;
|
||||||
|
*other = *self;
|
||||||
|
*self = swap;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
|
||||||
|
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
|
||||||
|
uint32_t new_size = self->size + count;
|
||||||
|
if (new_size > self->capacity) {
|
||||||
|
uint32_t new_capacity = self->capacity * 2;
|
||||||
|
if (new_capacity < 8) new_capacity = 8;
|
||||||
|
if (new_capacity < new_size) new_capacity = new_size;
|
||||||
|
_array__reserve(self, element_size, new_capacity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This is not what you're looking for, see `array_splice`.
|
||||||
|
static inline void _array__splice(Array *self, size_t element_size,
|
||||||
|
uint32_t index, uint32_t old_count,
|
||||||
|
uint32_t new_count, const void *elements) {
|
||||||
|
uint32_t new_size = self->size + new_count - old_count;
|
||||||
|
uint32_t old_end = index + old_count;
|
||||||
|
uint32_t new_end = index + new_count;
|
||||||
|
assert(old_end <= self->size);
|
||||||
|
|
||||||
|
_array__reserve(self, element_size, new_size);
|
||||||
|
|
||||||
|
char *contents = (char *)self->contents;
|
||||||
|
if (self->size > old_end) {
|
||||||
|
memmove(
|
||||||
|
contents + new_end * element_size,
|
||||||
|
contents + old_end * element_size,
|
||||||
|
(self->size - old_end) * element_size
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (new_count > 0) {
|
||||||
|
if (elements) {
|
||||||
|
memcpy(
|
||||||
|
(contents + index * element_size),
|
||||||
|
elements,
|
||||||
|
new_count * element_size
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
memset(
|
||||||
|
(contents + index * element_size),
|
||||||
|
0,
|
||||||
|
new_count * element_size
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self->size += new_count - old_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
|
||||||
|
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
|
||||||
|
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
|
||||||
|
do { \
|
||||||
|
*(_index) = start; \
|
||||||
|
*(_exists) = false; \
|
||||||
|
uint32_t size = (self)->size - *(_index); \
|
||||||
|
if (size == 0) break; \
|
||||||
|
int comparison; \
|
||||||
|
while (size > 1) { \
|
||||||
|
uint32_t half_size = size / 2; \
|
||||||
|
uint32_t mid_index = *(_index) + half_size; \
|
||||||
|
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
|
||||||
|
if (comparison <= 0) *(_index) = mid_index; \
|
||||||
|
size -= half_size; \
|
||||||
|
} \
|
||||||
|
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
|
||||||
|
if (comparison == 0) *(_exists) = true; \
|
||||||
|
else if (comparison < 0) *(_index) += 1; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
|
||||||
|
/// parameter by reference in order to work with the generic sorting function above.
|
||||||
|
#define _compare_int(a, b) ((int)*(a) - (int)(b))
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(default : 4101)
|
||||||
|
#elif defined(__GNUC__) || defined(__clang__)
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // TREE_SITTER_ARRAY_H_
|
Loading…
Reference in a new issue