The parser now understands comments (line and bracket)

This commit is contained in:
Uy Ha 2021-06-27 13:14:31 +02:00
parent 9bc53cd0df
commit bd57f88f2d
6 changed files with 11353 additions and 12937 deletions

View file

@ -6,7 +6,7 @@ Bracket comment [comment]
---
(source_file
(comment (bracket_argument))
(bracket_comment)
)
==========================================================
@ -19,12 +19,37 @@ message(STATUS #[[Some comment]] "comment is next" #[[Some comment]])
(source_file
(message_command
(message)
(comment (bracket_argument))
(bracket_comment)
(argument (quoted_argument (quoted_element)))
(comment (bracket_argument))
(bracket_comment)
)
)
======================
Line comment [comment]
======================
# [[Some comment]] "comment is next" #[[Some comment]]
---
(source_file
(line_comment)
)
===================================
Message with Line comment [comment]
===================================
message(STATUS # Some line comment
message #Some other line comment
)
---
(source_file
(message_command
(message)
(line_comment)
(argument (unquoted_argument))
(line_comment)
)
)

View file

@ -79,8 +79,8 @@ message_args = [
module.exports = grammar({
name: "cmake",
externals: ($) => [$.bracket_argument, $.bracket_comment],
extras: ($) => [/[\s\n\r]/, $.comment],
externals: ($) => [$.bracket_argument, $.bracket_comment, $.line_comment],
extras: ($) => [/[\s\n\r]/, $.bracket_comment, $.line_comment],
rules: {
source_file: ($) => repeat($._command_invocation),
@ -141,8 +141,6 @@ module.exports = grammar({
$.message_command
),
comment: ($) => choice($.bracket_comment),
...commandNames(...commands),
identifier: (_) => /[A-Za-z_][A-Za-z0-9_]*/,
integer: (_) => /[+-]*\d+/,

View file

@ -1910,15 +1910,6 @@
}
]
},
"comment": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "bracket_comment"
}
]
},
"if": {
"type": "PATTERN",
"value": "[iI][fF]"
@ -1987,7 +1978,11 @@
},
{
"type": "SYMBOL",
"name": "comment"
"name": "bracket_comment"
},
{
"type": "SYMBOL",
"name": "line_comment"
}
],
"conflicts": [],
@ -2000,6 +1995,10 @@
{
"type": "SYMBOL",
"name": "bracket_comment"
},
{
"type": "SYMBOL",
"name": "line_comment"
}
],
"inline": [],

View file

@ -37,21 +37,6 @@
]
}
},
{
"type": "comment",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "bracket_comment",
"named": true
}
]
}
},
{
"type": "else_command",
"named": true,
@ -1062,6 +1047,10 @@
"type": "if",
"named": true
},
{
"type": "line_comment",
"named": true
},
{
"type": "macro",
"named": true

24161
src/parser.c

File diff suppressed because it is too large Load diff

View file

@ -1,18 +1,16 @@
#include <cwctype>
#include <iostream>
#include <tree_sitter/parser.h>
namespace {
enum TokenType { BRACKET_ARGUMENT, BRACKET_COMMENT };
enum TokenType { BRACKET_ARGUMENT, BRACKET_COMMENT, LINE_COMMENT };
void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
bool scan_bracket_argument(TSLexer *lexer, bool skip_wspace) {
if (skip_wspace) {
void skip_wspace(TSLexer *lexer) {
while (std::iswspace(lexer->lookahead)) {
skip(lexer);
}
}
bool is_bracket_argument(TSLexer *lexer) {
if (lexer->lookahead != '[') {
return false;
}
@ -41,29 +39,35 @@ bool scan_bracket_argument(TSLexer *lexer, bool skip_wspace) {
if (lexer->lookahead == ']' && close_level == open_level) {
advance(lexer);
return true;
}
}
}
return false;
}
bool scan(void *payload, TSLexer *lexer, bool const *valid_symbols) {
skip_wspace(lexer);
if (lexer->lookahead != '#' && valid_symbols[BRACKET_ARGUMENT]) {
if (is_bracket_argument(lexer)) {
lexer->result_symbol = BRACKET_ARGUMENT;
return true;
}
}
}
return false;
}
bool scan_bracket_comment(TSLexer *lexer) {
if (lexer->lookahead != '#') {
return false;
}
if (lexer->lookahead == '#' &&
(valid_symbols[BRACKET_COMMENT] || valid_symbols[LINE_COMMENT])) {
advance(lexer);
if (scan_bracket_argument(lexer, false)) {
if (is_bracket_argument(lexer)) {
lexer->result_symbol = BRACKET_COMMENT;
return true;
} else {
while (lexer->lookahead != '\n' && lexer->lookahead != '\0') {
advance(lexer);
}
lexer->result_symbol = LINE_COMMENT;
return true;
}
return false;
}
bool scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
if (valid_symbols[BRACKET_ARGUMENT])
return scan_bracket_argument(lexer, true);
if (valid_symbols[BRACKET_ARGUMENT])
return scan_bracket_comment(lexer);
return false;
}
@ -77,10 +81,10 @@ unsigned tree_sitter_cmake_external_scanner_serialize(void *payload,
return 0;
}
void tree_sitter_cmake_external_scanner_deserialize(void *payload,
const char *buffer,
char const *buffer,
unsigned length) {}
bool tree_sitter_cmake_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
bool const *valid_symbols) {
return scan(payload, lexer, valid_symbols);
}
}