The parser now understands comments (line and bracket)

This commit is contained in:
Uy Ha 2021-06-27 13:14:31 +02:00
parent 9bc53cd0df
commit bd57f88f2d
6 changed files with 11353 additions and 12937 deletions

View file

@ -6,7 +6,7 @@ Bracket comment [comment]
--- ---
(source_file (source_file
(comment (bracket_argument)) (bracket_comment)
) )
========================================================== ==========================================================
@ -19,12 +19,37 @@ message(STATUS #[[Some comment]] "comment is next" #[[Some comment]])
(source_file (source_file
(message_command (message_command
(message) (message)
(comment (bracket_argument)) (bracket_comment)
(argument (quoted_argument (quoted_element))) (argument (quoted_argument (quoted_element)))
(comment (bracket_argument)) (bracket_comment)
) )
) )
====================== ======================
Line comment [comment] Line comment [comment]
====================== ======================
# [[Some comment]] "comment is next" #[[Some comment]]
---
(source_file
(line_comment)
)
===================================
Message with Line comment [comment]
===================================
message(STATUS # Some line comment
message #Some other line comment
)
---
(source_file
(message_command
(message)
(line_comment)
(argument (unquoted_argument))
(line_comment)
)
)

View file

@ -79,8 +79,8 @@ message_args = [
module.exports = grammar({ module.exports = grammar({
name: "cmake", name: "cmake",
externals: ($) => [$.bracket_argument, $.bracket_comment], externals: ($) => [$.bracket_argument, $.bracket_comment, $.line_comment],
extras: ($) => [/[\s\n\r]/, $.comment], extras: ($) => [/[\s\n\r]/, $.bracket_comment, $.line_comment],
rules: { rules: {
source_file: ($) => repeat($._command_invocation), source_file: ($) => repeat($._command_invocation),
@ -141,8 +141,6 @@ module.exports = grammar({
$.message_command $.message_command
), ),
comment: ($) => choice($.bracket_comment),
...commandNames(...commands), ...commandNames(...commands),
identifier: (_) => /[A-Za-z_][A-Za-z0-9_]*/, identifier: (_) => /[A-Za-z_][A-Za-z0-9_]*/,
integer: (_) => /[+-]*\d+/, integer: (_) => /[+-]*\d+/,

View file

@ -1910,15 +1910,6 @@
} }
] ]
}, },
"comment": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "bracket_comment"
}
]
},
"if": { "if": {
"type": "PATTERN", "type": "PATTERN",
"value": "[iI][fF]" "value": "[iI][fF]"
@ -1987,7 +1978,11 @@
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "comment" "name": "bracket_comment"
},
{
"type": "SYMBOL",
"name": "line_comment"
} }
], ],
"conflicts": [], "conflicts": [],
@ -2000,6 +1995,10 @@
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "bracket_comment" "name": "bracket_comment"
},
{
"type": "SYMBOL",
"name": "line_comment"
} }
], ],
"inline": [], "inline": [],

View file

@ -37,21 +37,6 @@
] ]
} }
}, },
{
"type": "comment",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "bracket_comment",
"named": true
}
]
}
},
{ {
"type": "else_command", "type": "else_command",
"named": true, "named": true,
@ -1062,6 +1047,10 @@
"type": "if", "type": "if",
"named": true "named": true
}, },
{
"type": "line_comment",
"named": true
},
{ {
"type": "macro", "type": "macro",
"named": true "named": true

24161
src/parser.c

File diff suppressed because it is too large Load diff

View file

@ -1,18 +1,16 @@
#include <cwctype> #include <cwctype>
#include <iostream>
#include <tree_sitter/parser.h> #include <tree_sitter/parser.h>
namespace { namespace {
enum TokenType { BRACKET_ARGUMENT, BRACKET_COMMENT }; enum TokenType { BRACKET_ARGUMENT, BRACKET_COMMENT, LINE_COMMENT };
void skip(TSLexer *lexer) { lexer->advance(lexer, true); } void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
void advance(TSLexer *lexer) { lexer->advance(lexer, false); } void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
bool scan_bracket_argument(TSLexer *lexer, bool skip_wspace) { void skip_wspace(TSLexer *lexer) {
if (skip_wspace) { while (std::iswspace(lexer->lookahead)) {
while (std::iswspace(lexer->lookahead)) { skip(lexer);
skip(lexer);
}
} }
}
bool is_bracket_argument(TSLexer *lexer) {
if (lexer->lookahead != '[') { if (lexer->lookahead != '[') {
return false; return false;
} }
@ -41,29 +39,35 @@ bool scan_bracket_argument(TSLexer *lexer, bool skip_wspace) {
if (lexer->lookahead == ']' && close_level == open_level) { if (lexer->lookahead == ']' && close_level == open_level) {
advance(lexer); advance(lexer);
lexer->result_symbol = BRACKET_ARGUMENT;
return true; return true;
} }
} }
} }
return false; return false;
} }
bool scan_bracket_comment(TSLexer *lexer) { bool scan(void *payload, TSLexer *lexer, bool const *valid_symbols) {
if (lexer->lookahead != '#') { skip_wspace(lexer);
return false;
if (lexer->lookahead != '#' && valid_symbols[BRACKET_ARGUMENT]) {
if (is_bracket_argument(lexer)) {
lexer->result_symbol = BRACKET_ARGUMENT;
return true;
}
} }
advance(lexer); if (lexer->lookahead == '#' &&
if (scan_bracket_argument(lexer, false)) { (valid_symbols[BRACKET_COMMENT] || valid_symbols[LINE_COMMENT])) {
lexer->result_symbol = BRACKET_COMMENT; advance(lexer);
return true; if (is_bracket_argument(lexer)) {
lexer->result_symbol = BRACKET_COMMENT;
return true;
} else {
while (lexer->lookahead != '\n' && lexer->lookahead != '\0') {
advance(lexer);
}
lexer->result_symbol = LINE_COMMENT;
return true;
}
} }
return false;
}
bool scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
if (valid_symbols[BRACKET_ARGUMENT])
return scan_bracket_argument(lexer, true);
if (valid_symbols[BRACKET_ARGUMENT])
return scan_bracket_comment(lexer);
return false; return false;
} }
@ -77,10 +81,10 @@ unsigned tree_sitter_cmake_external_scanner_serialize(void *payload,
return 0; return 0;
} }
void tree_sitter_cmake_external_scanner_deserialize(void *payload, void tree_sitter_cmake_external_scanner_deserialize(void *payload,
const char *buffer, char const *buffer,
unsigned length) {} unsigned length) {}
bool tree_sitter_cmake_external_scanner_scan(void *payload, TSLexer *lexer, bool tree_sitter_cmake_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) { bool const *valid_symbols) {
return scan(payload, lexer, valid_symbols); return scan(payload, lexer, valid_symbols);
} }
} }