feat: initial implementation

This commit is contained in:
Ika 2019-08-31 19:14:13 +08:00
commit f45f18f407
26 changed files with 43786 additions and 0 deletions

12
.editorconfig Normal file
View file

@ -0,0 +1,12 @@
root = true
[*]
charset = utf-8
end_of_line = lf
indent_size = 2
indent_style = space
insert_final_newline = true
trim_trailing_whitespace = true
[corpus/*]
trim_trailing_whitespace = false

6
.gitattributes vendored Normal file
View file

@ -0,0 +1,6 @@
* text eol=lf
/src/** linguist-generated
/src/scanner.cc linguist-generated=false
/index.js linguist-generated
/binding.gyp linguist-detectable=false

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
/build
/node_modules
/tree-sitter

4
.gitmodules vendored Normal file
View file

@ -0,0 +1,4 @@
[submodule "yaml-test-suite"]
path = yaml-test-suite
url = https://github.com/yaml/yaml-test-suite
branch = data

0
.npmignore Normal file
View file

13
.travis.yml Normal file
View file

@ -0,0 +1,13 @@
language: rust
rust:
- stable
script:
- if [ ! -d "./tree-sitter" ]; then bash ./scripts/setup-tree-sitter.sh; fi
- ./tree-sitter/target/release/tree-sitter test
cache:
cargo: true
directories:
- ./tree-sitter

21
LICENSE Normal file
View file

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) Ika <ikatyang@gmail.com> (https://github.com/ikatyang)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

51
README.md Normal file
View file

@ -0,0 +1,51 @@
# tree-sitter-yaml
[![npm](https://img.shields.io/npm/v/tree-sitter-yaml.svg)](https://www.npmjs.com/package/tree-sitter-yaml)
[![build](https://img.shields.io/travis/com/ikatyang/tree-sitter-yaml/master.svg)](https://travis-ci.com/ikatyang/tree-sitter-yaml/builds)
YAML ([YAML Spec v1.2](https://yaml.org/spec/1.2/spec.html)) grammar for [tree-sitter](https://github.com/tree-sitter/tree-sitter)
[Changelog](https://github.com/ikatyang/tree-sitter-yaml/blob/master/CHANGELOG.md)
## Install
```sh
npm install tree-sitter-yaml tree-sitter
```
## Usage
```js
const Parser = require("tree-sitter");
const YAML = require("tree-sitter-yaml");
const parser = new Parser();
parser.setLanguage(YAML);
const sourceCode = `
!!map
hello: &anchor
- world
`;
const tree = parser.parse(sourceCode);
console.log(tree.rootNode.toString());
// (stream
// (document
// (block_node
// (tag)
// (block_mapping
// (block_mapping_pair
// key: (flow_node
// (plain_scalar))
// value: (block_node
// (anchor)
// (block_sequence
// (block_sequence_item
// (flow_node
// (plain_scalar))))))))))
```
## License
MIT © [Ika](https://github.com/ikatyang)

19
binding.gyp Normal file
View file

@ -0,0 +1,19 @@
{
"targets": [
{
"target_name": "tree_sitter_yaml_binding",
"include_dirs": [
"<!(node -e \"require('nan')\")",
"src"
],
"sources": [
"src/binding.cc",
"src/parser.c",
"src/scanner.cc",
],
"cflags_c": [
"-std=c99",
]
}
]
}

8542
corpus/spec.txt Normal file

File diff suppressed because it is too large Load diff

539
grammar.js Normal file
View file

@ -0,0 +1,539 @@
module.exports = grammar({
name: "yaml",
externals: $ => [
$._eof,
// s = starts at column 0 in the current or following row
// r = starts with 0 or more s_white in the current row
// br = starts with more than `current_indent` s_white in the following row
// b = starts with `current_indent` s_white in the following row
// bl = starts with `parent_indent` or less s_white in the following row
$._s_dir_yml_bgn, $._r_dir_yml_ver, // %YAML 1.2
$._s_dir_tag_bgn, $._r_dir_tag_hdl, $._r_dir_tag_pfx, // %TAG !yaml! tag:yaml.org,2002:
$._s_dir_rsv_bgn, $._r_dir_rsv_prm, // %FOO bar baz
$._s_drs_end, // ---
$._s_doc_end, // ...
$._r_blk_seq_bgn, $._br_blk_seq_bgn, $._b_blk_seq_bgn, // -
$._r_blk_key_bgn, $._br_blk_key_bgn, $._b_blk_key_bgn, // ?
$._r_blk_val_bgn, $._br_blk_val_bgn, $._b_blk_val_bgn, // :
$._r_blk_imp_bgn, // : (implicit)
$._r_blk_str_bgn, $._br_blk_str_bgn, // | >
$._br_blk_str_ctn, // block scalar content
$._r_flw_seq_bgn, $._br_flw_seq_bgn, $._b_flw_seq_bgn, // [
$._r_flw_seq_end, $._br_flw_seq_end, // ]
$._r_flw_map_bgn, $._br_flw_map_bgn, $._b_flw_map_bgn, // {
$._r_flw_map_end, $._br_flw_map_end, // }
$._r_flw_sep_bgn, $._br_flw_sep_bgn, // ,
$._r_flw_key_bgn, $._br_flw_key_bgn, // ?
$._r_flw_jsv_bgn, $._br_flw_jsv_bgn, // : (json key)
$._r_flw_njv_bgn, $._br_flw_njv_bgn, // : (non-json key)
$._r_dqt_str_bgn, $._br_dqt_str_bgn, $._b_dqt_str_bgn, // " (start)
$._r_dqt_str_ctn, $._br_dqt_str_ctn, // double quote scalar content
$._r_dqt_esc_nwl, $._br_dqt_esc_nwl, // escape newline
$._r_dqt_esc_seq, $._br_dqt_esc_seq, // escape sequence
$._r_dqt_str_end, $._br_dqt_str_end, // " (end)
$._r_sqt_str_bgn, $._br_sqt_str_bgn, $._b_sqt_str_bgn, // ' (start)
$._r_sqt_str_ctn, $._br_sqt_str_ctn, // single quote scalar content
$._r_sqt_esc_sqt, $._br_sqt_esc_sqt, // ''
$._r_sqt_str_end, $._br_sqt_str_end, // ' (end)
$._r_sgl_pln_blk, $._br_sgl_pln_blk, $._b_sgl_pln_blk, // plain scalar (singleline in block)
$._r_sgl_pln_flw, $._br_sgl_pln_flw, // plain scalar (singleline in flow)
$._r_mtl_pln_blk, $._br_mtl_pln_blk, // plain scalar (multiline in block)
$._r_mtl_pln_flw, $._br_mtl_pln_flw, // plain scalar (multiline in flow)
$._r_tag, $._br_tag, $._b_tag, // !tag
$._r_acr, $._br_acr, $._b_acr, // &id
$._r_als, $._br_als, $._b_als, // *id
$._bl,
$.comment,
],
extras: $ => [$.comment],
conflicts: $ => [
[$._r_prp, $._r_sgl_prp],
[$._br_prp, $._br_sgl_prp],
[$._flw_seq_tal, $._sgl_flw_seq_tal],
[$._flw_map_tal, $._sgl_flw_map_tal],
[$._flw_ann_par_tal, $._sgl_flw_ann_par_tal],
[$._r_flw_seq_itm, $._r_sgl_flw_col_itm],
[$._r_flw_map_itm, $._r_sgl_flw_col_itm],
[$._r_flw_njl_ann_par, $._r_sgl_flw_njl_ann_par],
[$._r_flw_exp_par, $._r_sgl_flw_exp_par],
[$._r_dqt_str, $._r_sgl_dqt_str],
[$._r_sqt_str, $._r_sgl_sqt_str],
[$._r_pln_flw_val, $._r_sgl_pln_flw_val],
/**
* (_r_prp _r_acr _br_tag)
*
* &str
* !!str a
*
* (_r_prp _r_acr) _br_tag
*
* &map
* !!str a: 1
*/
[$._r_prp],
[$._br_prp],
],
inline: $ => [
$._r_pln_blk,
$._br_pln_blk,
$._r_pln_flw,
$._br_pln_flw,
$._r_blk_seq_val,
$._r_blk_map_val,
$._r_flw_val_blk,
$._br_flw_val_blk,
$._r_sgl_flw_val_blk,
$._br_sgl_flw_val_blk,
$._b_sgl_flw_val_blk,
$._r_flw_val_flw,
$._br_flw_val_flw,
$._r_sgl_flw_val_flw,
$._r_flw_jsl_val,
$._br_flw_jsl_val,
$._r_sgl_flw_jsl_val,
$._br_sgl_flw_jsl_val,
$._b_sgl_flw_jsl_val,
$._r_flw_njl_val_blk,
$._br_flw_njl_val_blk,
$._r_sgl_flw_njl_val_blk,
$._br_sgl_flw_njl_val_blk,
$._b_sgl_flw_njl_val_blk,
$._r_flw_njl_val_flw,
$._br_flw_njl_val_flw,
$._r_sgl_flw_njl_val_flw,
],
rules: {
stream: $ => seq(optional(choice(
seq(
choice($._bgn_imp_doc, $._drs_doc, $._exp_doc),
optional(choice($._doc_w_bgn_w_end_seq, $._doc_w_bgn_wo_end_seq))),
seq(
choice($._bgn_imp_doc_end, $._drs_doc_end, $._exp_doc_end, $._doc_end),
optional(choice($._doc_w_bgn_w_end_seq, $._doc_w_bgn_wo_end_seq, $._doc_wo_bgn_w_end_seq, $._doc_wo_bgn_wo_end_seq))),
)), $._eof),
_doc_w_bgn_w_end_seq: $ => seq($._doc_w_bgn_w_end, optional(choice($._doc_w_bgn_w_end_seq, $._doc_w_bgn_wo_end_seq, $._doc_wo_bgn_w_end_seq, $._doc_wo_bgn_wo_end_seq))),
_doc_w_bgn_wo_end_seq: $ => seq($._doc_w_bgn_wo_end, optional(choice($._doc_w_bgn_w_end_seq, $._doc_w_bgn_wo_end_seq))),
_doc_wo_bgn_w_end_seq: $ => seq($._doc_wo_bgn_w_end, optional(choice($._doc_w_bgn_w_end_seq, $._doc_w_bgn_wo_end_seq, $._doc_wo_bgn_w_end_seq, $._doc_wo_bgn_wo_end_seq))),
_doc_wo_bgn_wo_end_seq: $ => seq($._doc_wo_bgn_wo_end, optional(choice($._doc_w_bgn_w_end_seq, $._doc_w_bgn_wo_end_seq))),
_doc_w_bgn_w_end: $ => choice($._exp_doc_end, $._doc_end),
_doc_w_bgn_wo_end: $ => $._exp_doc,
_doc_wo_bgn_w_end: $ => choice($._drs_doc_end, $._imp_doc_end),
_doc_wo_bgn_wo_end: $ => choice($._drs_doc, $._imp_doc),
// document
_bgn_imp_doc: $ => choice($._exp_doc_tal, $._r_blk_seq_r_val, $._r_blk_map_r_val),
_drs_doc: $ => seq(repeat1($._s_dir), $._exp_doc),
_exp_doc: $ => seq($._s_drs_end, optional($._exp_doc_tal)),
_imp_doc: $ => choice($._br_blk_seq_val, $._br_blk_map_val, $._br_blk_str_val, $._br_flw_val_blk),
_drs_doc_end: $ => prec(1, seq($._drs_doc, $._s_doc_end)),
_exp_doc_end: $ => prec(1, seq($._exp_doc, $._s_doc_end)),
_imp_doc_end: $ => prec(1, seq($._imp_doc, $._s_doc_end)),
_bgn_imp_doc_end: $ => prec(1, seq($._bgn_imp_doc, $._s_doc_end)),
_doc_end: $ => $._s_doc_end,
_exp_doc_tal: $ => choice($._r_blk_seq_br_val, $._br_blk_seq_val, $._r_blk_map_br_val, $._br_blk_map_val, $._r_blk_str_val, $._br_blk_str_val, $._r_flw_val_blk, $._br_flw_val_blk),
// directive
_s_dir: $ => choice($._s_dir_yml, $._s_dir_tag, $._s_dir_rsv),
_s_dir_yml: $ => seq($._s_dir_yml_bgn, $._r_dir_yml_ver),
_s_dir_tag: $ => seq($._s_dir_tag_bgn, $._r_dir_tag_hdl, $._r_dir_tag_pfx),
_s_dir_rsv: $ => seq($._s_dir_rsv_bgn, repeat($._r_dir_rsv_prm)),
// property
_r_prp_val: $ => $._r_prp,
_br_prp_val: $ => $._br_prp,
_r_sgl_prp_val: $ => $._r_sgl_prp,
_br_sgl_prp_val: $ => $._br_sgl_prp,
_b_sgl_prp_val: $ => $._b_sgl_prp,
_r_prp: $ => choice(seq($._r_acr, optional(choice($._r_tag, $._br_tag))), seq($._r_tag, optional(choice($._r_acr, $._br_acr)))),
_br_prp: $ => choice(seq($._br_acr, optional(choice($._r_tag, $._br_tag))), seq($._br_tag, optional(choice($._r_acr, $._br_acr)))),
_r_sgl_prp: $ => choice(seq($._r_acr, optional($._r_tag)), seq($._r_tag, optional($._r_acr))),
_br_sgl_prp: $ => choice(seq($._br_acr, optional($._r_tag)), seq($._br_tag, optional($._r_acr))),
_b_sgl_prp: $ => choice(seq($._b_acr, optional($._r_tag)), seq($._b_tag, optional($._r_acr))),
// block sequence
_r_blk_seq_val: $ => choice($._r_blk_seq_r_val, $._r_blk_seq_br_val),
_r_blk_seq_r_val: $ => $._r_blk_seq,
_r_blk_seq_br_val: $ => seq($._r_prp, $._br_blk_seq),
_br_blk_seq_val: $ => choice($._br_blk_seq, seq($._br_prp, $._br_blk_seq)),
_r_blk_seq_spc_val: $ => seq($._r_prp, $._b_blk_seq_spc),
_br_blk_seq_spc_val: $ => seq($._br_prp, $._b_blk_seq_spc),
_b_blk_seq_spc_val: $ => $._b_blk_seq_spc,
_r_blk_seq: $ => seq($._r_blk_seq_itm, repeat($._b_blk_seq_itm), $._bl),
_br_blk_seq: $ => seq($._br_blk_seq_itm, repeat($._b_blk_seq_itm), $._bl),
_b_blk_seq_spc: $ => seq(repeat1($._b_blk_seq_itm), $._bl),
_r_blk_seq_itm: $ => seq($._r_blk_seq_bgn, optional($._blk_seq_itm_tal)),
_br_blk_seq_itm: $ => seq($._br_blk_seq_bgn, optional($._blk_seq_itm_tal)),
_b_blk_seq_itm: $ => seq($._b_blk_seq_bgn, optional($._blk_seq_itm_tal)),
_blk_seq_itm_tal: $ => choice($._r_blk_seq_val, $._br_blk_seq_val, $._r_blk_map_val, $._br_blk_map_val, $._r_blk_str_val, $._br_blk_str_val, $._r_flw_val_blk, $._br_flw_val_blk),
// block mapping
_r_blk_map_val: $ => choice($._r_blk_map_r_val, $._r_blk_map_br_val),
_r_blk_map_r_val: $ => $._r_blk_map,
_r_blk_map_br_val: $ => seq($._r_prp, $._br_blk_map),
_br_blk_map_val: $ => choice($._br_blk_map, seq($._br_prp, $._br_blk_map)),
_r_blk_map: $ => seq($._r_blk_map_itm, repeat($._b_blk_map_itm), $._bl),
_br_blk_map: $ => seq($._br_blk_map_itm, repeat($._b_blk_map_itm), $._bl),
_r_blk_map_itm: $ => choice($._r_blk_exp_itm, $._r_blk_imp_itm),
_br_blk_map_itm: $ => choice($._br_blk_exp_itm, $._br_blk_imp_itm),
_b_blk_map_itm: $ => choice($._b_blk_exp_itm, $._b_blk_imp_itm),
_r_blk_exp_itm: $ => prec.right(choice(seq($._r_blk_key_itm, optional($._b_blk_val_itm)), $._r_blk_val_itm)),
_br_blk_exp_itm: $ => prec.right(choice(seq($._br_blk_key_itm, optional($._b_blk_val_itm)), $._br_blk_val_itm)),
_b_blk_exp_itm: $ => prec.right(choice(seq($._b_blk_key_itm, optional($._b_blk_val_itm)), $._b_blk_val_itm)),
_r_blk_key_itm: $ => seq($._r_blk_key_bgn, optional(field("key", $._blk_exp_itm_tal))),
_br_blk_key_itm: $ => seq($._br_blk_key_bgn, optional(field("key", $._blk_exp_itm_tal))),
_b_blk_key_itm: $ => seq($._b_blk_key_bgn, optional(field("key", $._blk_exp_itm_tal))),
_r_blk_val_itm: $ => seq($._r_blk_val_bgn, optional(field("value", $._blk_exp_itm_tal))),
_br_blk_val_itm: $ => seq($._br_blk_val_bgn, optional(field("value", $._blk_exp_itm_tal))),
_b_blk_val_itm: $ => seq($._b_blk_val_bgn, optional(field("value", $._blk_exp_itm_tal))),
_r_blk_imp_itm: $ => seq(field("key", $._r_sgl_flw_val_blk), $._blk_imp_itm_tal),
_br_blk_imp_itm: $ => seq(field("key", $._br_sgl_flw_val_blk), $._blk_imp_itm_tal),
_b_blk_imp_itm: $ => seq(field("key", $._b_sgl_flw_val_blk), $._blk_imp_itm_tal),
_blk_exp_itm_tal: $ => choice($._blk_seq_itm_tal, $._r_blk_seq_spc_val, $._br_blk_seq_spc_val, $._b_blk_seq_spc_val),
_blk_imp_itm_tal: $ => seq($._r_blk_imp_bgn, optional(field("value", choice($._r_blk_seq_br_val, $._br_blk_seq_val, $._r_blk_seq_spc_val, $._br_blk_seq_spc_val, $._b_blk_seq_spc_val, $._r_blk_map_br_val, $._br_blk_map_val, $._r_blk_str_val, $._br_blk_str_val, $._r_flw_val_blk, $._br_flw_val_blk)))),
// block scalar
_r_blk_str_val: $ => choice($._r_blk_str, seq($._r_prp, choice($._r_blk_str, $._br_blk_str))),
_br_blk_str_val: $ => choice($._br_blk_str, seq($._br_prp, choice($._r_blk_str, $._br_blk_str))),
_r_blk_str: $ => seq($._r_blk_str_bgn, repeat($._br_blk_str_ctn), $._bl),
_br_blk_str: $ => seq($._br_blk_str_bgn, repeat($._br_blk_str_ctn), $._bl),
// flow value in block
_r_flw_val_blk: $ => choice($._r_flw_jsl_val, $._r_flw_njl_val_blk),
_br_flw_val_blk: $ => choice($._br_flw_jsl_val, $._br_flw_njl_val_blk),
_r_sgl_flw_val_blk: $ => choice($._r_sgl_flw_jsl_val, $._r_sgl_flw_njl_val_blk),
_br_sgl_flw_val_blk: $ => choice($._br_sgl_flw_jsl_val, $._br_sgl_flw_njl_val_blk),
_b_sgl_flw_val_blk: $ => choice($._b_sgl_flw_jsl_val, $._b_sgl_flw_njl_val_blk),
// flow value in flow
_r_flw_val_flw: $ => choice($._r_flw_jsl_val, $._r_flw_njl_val_flw),
_br_flw_val_flw: $ => choice($._br_flw_jsl_val, $._br_flw_njl_val_flw),
_r_sgl_flw_val_flw: $ => choice($._r_sgl_flw_jsl_val, $._r_sgl_flw_njl_val_flw),
// json-like flow value
_r_flw_jsl_val: $ => choice($._r_flw_seq_val, $._r_flw_map_val, $._r_dqt_str_val, $._r_sqt_str_val),
_br_flw_jsl_val: $ => choice($._br_flw_seq_val, $._br_flw_map_val, $._br_dqt_str_val, $._br_sqt_str_val),
_r_sgl_flw_jsl_val: $ => choice($._r_sgl_flw_seq_val, $._r_sgl_flw_map_val, $._r_sgl_dqt_str_val, $._r_sgl_sqt_str_val),
_br_sgl_flw_jsl_val: $ => choice($._br_sgl_flw_seq_val, $._br_sgl_flw_map_val, $._br_sgl_dqt_str_val, $._br_sgl_sqt_str_val),
_b_sgl_flw_jsl_val: $ => choice($._b_sgl_flw_seq_val, $._b_sgl_flw_map_val, $._b_sgl_dqt_str_val, $._b_sgl_sqt_str_val),
// non-json-like flow value in block
_r_flw_njl_val_blk: $ => choice($._r_als_val, $._r_prp_val, $._r_pln_blk_val),
_br_flw_njl_val_blk: $ => choice($._br_als_val, $._br_prp_val, $._br_pln_blk_val),
_r_sgl_flw_njl_val_blk: $ => choice($._r_als_val, $._r_sgl_prp_val, $._r_sgl_pln_blk_val),
_br_sgl_flw_njl_val_blk: $ => choice($._br_als_val, $._br_sgl_prp_val, $._br_sgl_pln_blk_val),
_b_sgl_flw_njl_val_blk: $ => choice($._b_als_val, $._b_sgl_prp_val, $._b_sgl_pln_blk_val),
// non-json-like flow value in flow
_r_flw_njl_val_flw: $ => choice($._r_als_val, $._r_prp_val, $._r_pln_flw_val),
_br_flw_njl_val_flw: $ => choice($._br_als_val, $._br_prp_val, $._br_pln_flw_val),
_r_sgl_flw_njl_val_flw: $ => choice($._r_als_val, $._r_sgl_prp_val, $._r_sgl_pln_flw_val),
// flow sequence
_r_flw_seq_val: $ => choice($._r_flw_seq, seq($._r_prp, choice($._r_flw_seq, $._br_flw_seq))),
_br_flw_seq_val: $ => choice($._br_flw_seq, seq($._br_prp, choice($._r_flw_seq, $._br_flw_seq))),
_r_sgl_flw_seq_val: $ => choice($._r_sgl_flw_seq, seq($._r_sgl_prp, $._r_sgl_flw_seq)),
_br_sgl_flw_seq_val: $ => choice($._br_sgl_flw_seq, seq($._br_sgl_prp, $._r_sgl_flw_seq)),
_b_sgl_flw_seq_val: $ => choice($._b_sgl_flw_seq, seq($._b_sgl_prp, $._r_sgl_flw_seq)),
_r_flw_seq: $ => seq($._r_flw_seq_bgn, $._flw_seq_tal),
_br_flw_seq: $ => seq($._br_flw_seq_bgn, $._flw_seq_tal),
_r_sgl_flw_seq: $ => seq($._r_flw_seq_bgn, $._sgl_flw_seq_tal),
_br_sgl_flw_seq: $ => seq($._br_flw_seq_bgn, $._sgl_flw_seq_tal),
_b_sgl_flw_seq: $ => seq($._b_flw_seq_bgn, $._sgl_flw_seq_tal),
_flw_seq_tal: $ => seq(optional(choice($._r_flw_seq_dat, $._br_flw_seq_dat)), choice($._r_flw_seq_end, $._br_flw_seq_end)),
_sgl_flw_seq_tal: $ => seq(optional($._r_sgl_flw_col_dat), $._r_flw_seq_end),
// flow mapping
_r_flw_map_val: $ => choice($._r_flw_map, seq($._r_prp, choice($._r_flw_map, $._br_flw_map))),
_br_flw_map_val: $ => choice($._br_flw_map, seq($._br_prp, choice($._r_flw_map, $._br_flw_map))),
_r_sgl_flw_map_val: $ => choice($._r_sgl_flw_map, seq($._r_sgl_prp, $._r_sgl_flw_map)),
_br_sgl_flw_map_val: $ => choice($._br_sgl_flw_map, seq($._br_sgl_prp, $._r_sgl_flw_map)),
_b_sgl_flw_map_val: $ => choice($._b_sgl_flw_map, seq($._b_sgl_prp, $._r_sgl_flw_map)),
_r_flw_map: $ => seq($._r_flw_map_bgn, $._flw_map_tal),
_br_flw_map: $ => seq($._br_flw_map_bgn, $._flw_map_tal),
_r_sgl_flw_map: $ => seq($._r_flw_map_bgn, $._sgl_flw_map_tal),
_br_sgl_flw_map: $ => seq($._br_flw_map_bgn, $._sgl_flw_map_tal),
_b_sgl_flw_map: $ => seq($._b_flw_map_bgn, $._sgl_flw_map_tal),
_flw_map_tal: $ => seq(optional(choice($._r_flw_map_dat, $._br_flw_map_dat)), choice($._r_flw_map_end, $._br_flw_map_end)),
_sgl_flw_map_tal: $ => seq(optional($._r_sgl_flw_col_dat), $._r_flw_map_end),
// flow collection data
_r_flw_seq_dat: $ => seq($._r_flw_seq_itm, repeat($._flw_seq_dat_rpt), optional(choice($._r_flw_sep_bgn, $._br_flw_sep_bgn))),
_br_flw_seq_dat: $ => seq($._br_flw_seq_itm, repeat($._flw_seq_dat_rpt), optional(choice($._r_flw_sep_bgn, $._br_flw_sep_bgn))),
_r_flw_map_dat: $ => seq($._r_flw_map_itm, repeat($._flw_map_dat_rpt), optional(choice($._r_flw_sep_bgn, $._br_flw_sep_bgn))),
_br_flw_map_dat: $ => seq($._br_flw_map_itm, repeat($._flw_map_dat_rpt), optional(choice($._r_flw_sep_bgn, $._br_flw_sep_bgn))),
_r_sgl_flw_col_dat: $ => seq($._r_sgl_flw_col_itm, repeat($._sgl_flw_col_dat_rpt), optional($._r_flw_sep_bgn)),
_flw_seq_dat_rpt: $ => seq(choice($._r_flw_sep_bgn, $._br_flw_sep_bgn), choice($._r_flw_seq_itm, $._br_flw_seq_itm)),
_flw_map_dat_rpt: $ => seq(choice($._r_flw_sep_bgn, $._br_flw_sep_bgn), choice($._r_flw_map_itm, $._br_flw_map_itm)),
_sgl_flw_col_dat_rpt: $ => seq($._r_flw_sep_bgn, $._r_sgl_flw_col_itm),
// flow collection item
_r_flw_seq_itm: $ => choice($._r_flw_val_flw, $._r_flw_exp_par, $._r_flw_imp_r_par, $._r_flw_njl_ann_par),
_br_flw_seq_itm: $ => choice($._br_flw_val_flw, $._br_flw_exp_par, $._br_flw_imp_r_par, $._br_flw_njl_ann_par),
_r_flw_map_itm: $ => choice($._r_flw_val_flw, $._r_flw_exp_par, $._r_flw_imp_r_par, $._r_flw_imp_br_par, $._r_flw_njl_ann_par),
_br_flw_map_itm: $ => choice($._br_flw_val_flw, $._br_flw_exp_par, $._br_flw_imp_r_par, $._br_flw_imp_br_par, $._br_flw_njl_ann_par),
_r_sgl_flw_col_itm: $ => choice($._r_sgl_flw_val_flw, $._r_sgl_flw_exp_par, $._r_sgl_flw_imp_par, $._r_sgl_flw_njl_ann_par),
// explicit flow pair
_r_flw_exp_par: $ => seq($._r_flw_key_bgn, optional(choice($._r_flw_imp_r_par, $._r_flw_imp_br_par, $._br_flw_imp_r_par, $._br_flw_imp_br_par))),
_br_flw_exp_par: $ => seq($._br_flw_key_bgn, optional(choice($._r_flw_imp_r_par, $._r_flw_imp_br_par, $._br_flw_imp_r_par, $._br_flw_imp_br_par))),
_r_sgl_flw_exp_par: $ => seq($._r_flw_key_bgn, optional($._r_sgl_flw_imp_par)),
// implicit flow pair
_r_flw_imp_r_par: $ => choice(seq(field("key", $._r_flw_jsl_val), $._r_flw_jsl_ann_par), seq(field("key", $._r_flw_njl_val_flw), $._r_flw_njl_ann_par)),
_r_flw_imp_br_par: $ => choice(seq(field("key", $._r_flw_jsl_val), $._br_flw_jsl_ann_par), seq(field("key", $._r_flw_njl_val_flw), $._br_flw_njl_ann_par)),
_br_flw_imp_r_par: $ => choice(seq(field("key", $._br_flw_jsl_val), $._r_flw_jsl_ann_par), seq(field("key", $._br_flw_njl_val_flw), $._r_flw_njl_ann_par)),
_br_flw_imp_br_par: $ => choice(seq(field("key", $._br_flw_jsl_val), $._br_flw_jsl_ann_par), seq(field("key", $._br_flw_njl_val_flw), $._br_flw_njl_ann_par)),
_r_sgl_flw_imp_par: $ => choice(seq(field("key", $._r_sgl_flw_jsl_val), $._r_sgl_flw_jsl_ann_par), seq(field("key", $._r_sgl_flw_njl_val_flw), $._r_sgl_flw_njl_ann_par)),
// anonymous flow pair
_r_flw_jsl_ann_par: $ => seq($._r_flw_jsv_bgn, optional(field("value", $._flw_ann_par_tal))),
_br_flw_jsl_ann_par: $ => seq($._br_flw_jsv_bgn, optional(field("value", $._flw_ann_par_tal))),
_r_sgl_flw_jsl_ann_par: $ => seq($._r_flw_jsv_bgn, optional(field("value", $._sgl_flw_ann_par_tal))),
_r_flw_njl_ann_par: $ => seq($._r_flw_njv_bgn, optional(field("value", $._flw_ann_par_tal))),
_br_flw_njl_ann_par: $ => seq($._br_flw_njv_bgn, optional(field("value", $._flw_ann_par_tal))),
_r_sgl_flw_njl_ann_par: $ => seq($._r_flw_njv_bgn, optional(field("value", $._sgl_flw_ann_par_tal))),
_flw_ann_par_tal: $ => choice($._r_flw_val_flw, $._br_flw_val_flw),
_sgl_flw_ann_par_tal: $ => $._r_sgl_flw_val_flw,
// double quote scalar
_r_dqt_str_val: $ => choice($._r_dqt_str, seq($._r_prp, choice($._r_dqt_str, $._br_dqt_str))),
_br_dqt_str_val: $ => choice($._br_dqt_str, seq($._br_prp, choice($._r_dqt_str, $._br_dqt_str))),
_r_sgl_dqt_str_val: $ => choice($._r_sgl_dqt_str, seq($._r_sgl_prp, $._r_sgl_dqt_str)),
_br_sgl_dqt_str_val: $ => choice($._br_sgl_dqt_str, seq($._br_sgl_prp, $._r_sgl_dqt_str)),
_b_sgl_dqt_str_val: $ => choice($._b_sgl_dqt_str, seq($._b_sgl_prp, $._r_sgl_dqt_str)),
_r_dqt_str: $ => seq($._r_dqt_str_bgn, optional($._r_sgl_dqt_ctn), optional($._r_dqt_esc_nwl), repeat($._br_mtl_dqt_ctn), choice($._r_dqt_str_end, $._br_dqt_str_end)),
_br_dqt_str: $ => seq($._br_dqt_str_bgn, optional($._r_sgl_dqt_ctn), optional($._r_dqt_esc_nwl), repeat($._br_mtl_dqt_ctn), choice($._r_dqt_str_end, $._br_dqt_str_end)),
_r_sgl_dqt_str: $ => seq($._r_dqt_str_bgn, optional($._r_sgl_dqt_ctn), $._r_dqt_str_end),
_br_sgl_dqt_str: $ => seq($._br_dqt_str_bgn, optional($._r_sgl_dqt_ctn), $._r_dqt_str_end),
_b_sgl_dqt_str: $ => seq($._b_dqt_str_bgn, optional($._r_sgl_dqt_ctn), $._r_dqt_str_end),
_r_sgl_dqt_ctn: $ => repeat1(choice($._r_dqt_str_ctn, $._r_dqt_esc_seq)),
_br_mtl_dqt_ctn: $ => choice($._br_dqt_esc_nwl, seq(choice($._br_dqt_str_ctn, $._br_dqt_esc_seq), repeat(choice($._r_dqt_str_ctn, $._r_dqt_esc_seq)), optional($._r_dqt_esc_nwl))),
// single quote scalar
_r_sqt_str_val: $ => choice($._r_sqt_str, seq($._r_prp, choice($._r_sqt_str, $._br_sqt_str))),
_br_sqt_str_val: $ => choice($._br_sqt_str, seq($._br_prp, choice($._r_sqt_str, $._br_sqt_str))),
_r_sgl_sqt_str_val: $ => choice($._r_sgl_sqt_str, seq($._r_sgl_prp, $._r_sgl_sqt_str)),
_br_sgl_sqt_str_val: $ => choice($._br_sgl_sqt_str, seq($._br_sgl_prp, $._r_sgl_sqt_str)),
_b_sgl_sqt_str_val: $ => choice($._b_sgl_sqt_str, seq($._b_sgl_prp, $._r_sgl_sqt_str)),
_r_sqt_str: $ => seq($._r_sqt_str_bgn, optional($._r_sgl_sqt_ctn), repeat($._br_mtl_sqt_ctn), choice($._r_sqt_str_end, $._br_sqt_str_end)),
_br_sqt_str: $ => seq($._br_sqt_str_bgn, optional($._r_sgl_sqt_ctn), repeat($._br_mtl_sqt_ctn), choice($._r_sqt_str_end, $._br_sqt_str_end)),
_r_sgl_sqt_str: $ => seq($._r_sqt_str_bgn, optional($._r_sgl_sqt_ctn), $._r_sqt_str_end),
_br_sgl_sqt_str: $ => seq($._br_sqt_str_bgn, optional($._r_sgl_sqt_ctn), $._r_sqt_str_end),
_b_sgl_sqt_str: $ => seq($._b_sqt_str_bgn, optional($._r_sgl_sqt_ctn), $._r_sqt_str_end),
_r_sgl_sqt_ctn: $ => repeat1(choice($._r_sqt_str_ctn, $._r_sqt_esc_sqt)),
_br_mtl_sqt_ctn: $ => seq(choice($._br_sqt_str_ctn, $._br_sqt_esc_sqt), repeat(choice($._r_sqt_str_ctn, $._r_sqt_esc_sqt))),
// plain scalar in block
_r_pln_blk_val: $ => choice($._r_pln_blk, seq($._r_prp, choice($._r_pln_blk, $._br_pln_blk))),
_br_pln_blk_val: $ => choice($._br_pln_blk, seq($._br_prp, choice($._r_pln_blk, $._br_pln_blk))),
_r_sgl_pln_blk_val: $ => choice($._r_sgl_pln_blk, seq($._r_sgl_prp, $._r_sgl_pln_blk)),
_br_sgl_pln_blk_val: $ => choice($._br_sgl_pln_blk, seq($._br_sgl_prp, $._r_sgl_pln_blk)),
_b_sgl_pln_blk_val: $ => choice($._b_sgl_pln_blk, seq($._b_sgl_prp, $._r_sgl_pln_blk)),
_r_pln_blk: $ => choice($._r_sgl_pln_blk, $._r_mtl_pln_blk),
_br_pln_blk: $ => choice($._br_sgl_pln_blk, $._br_mtl_pln_blk),
// plain scalar in flow
_r_pln_flw_val: $ => choice($._r_pln_flw, seq($._r_prp, choice($._r_pln_flw, $._br_pln_flw))),
_br_pln_flw_val: $ => choice($._br_pln_flw, seq($._br_prp, choice($._r_pln_flw, $._br_pln_flw))),
_r_sgl_pln_flw_val: $ => choice($._r_sgl_pln_flw, seq($._r_sgl_prp, $._r_sgl_pln_flw)),
_r_pln_flw: $ => choice($._r_sgl_pln_flw, $._r_mtl_pln_flw),
_br_pln_flw: $ => choice($._br_sgl_pln_flw, $._br_mtl_pln_flw),
// alias
_r_als_val: $ => $._r_als,
_br_als_val: $ => $._br_als,
_b_als_val: $ => $._b_als,
},
});
module.exports = global_alias(module.exports, {
..._("yaml_directive", "_s_dir_yml"),
..._("yaml_version", "_r_dir_yml_ver"),
..._("tag_directive", "_s_dir_tag"),
..._("tag_handle", "_r_dir_tag_hdl"),
..._("tag_prefix", "_r_dir_tag_pfx"),
..._("reserved_directive", "_s_dir_rsv"),
..._("directive_name", "_s_dir_rsv_bgn"),
..._("directive_parameter", "_r_dir_rsv_prm"),
..._("flow_node", "_r_prp_val", "_br_prp_val", "_r_sgl_prp_val", "_br_sgl_prp_val", "_b_sgl_prp_val"),
..._("tag", "_r_tag", "_br_tag", "_b_tag"),
..._("anchor", "_r_acr", "_br_acr", "_b_acr"),
..._("flow_node", "_r_als_val", "_br_als_val", "_b_als_val"),
..._("alias", "_r_als", "_br_als", "_b_als"),
..._("document", "_bgn_imp_doc", "_imp_doc"),
..._(["document"], "_drs_doc", "_exp_doc", "_doc_end",
"_bgn_imp_doc_end", "_drs_doc_end", "_exp_doc_end", "_imp_doc_end"),
..._("block_node", "_r_blk_seq_r_val", "_r_blk_seq_br_val", "_br_blk_seq_val", "_r_blk_seq_spc_val", "_br_blk_seq_spc_val", "_b_blk_seq_spc_val"),
..._("block_node", "_r_blk_map_r_val", "_r_blk_map_br_val", "_br_blk_map_val"),
..._("block_node", "_r_blk_str_val", "_br_blk_str_val"),
..._("block_sequence", "_r_blk_seq", "_br_blk_seq", "_b_blk_seq_spc"),
..._("block_mapping", "_r_blk_map", "_br_blk_map"),
..._("block_scalar", "_r_blk_str", "_br_blk_str"),
..._("block_sequence_item", "_r_blk_seq_itm", "_br_blk_seq_itm", "_b_blk_seq_itm"),
..._("block_mapping_pair", "_r_blk_exp_itm", "_br_blk_exp_itm", "_b_blk_exp_itm"),
..._("block_mapping_pair", "_r_blk_imp_itm", "_br_blk_imp_itm", "_b_blk_imp_itm"),
..._("flow_node", "_r_flw_seq_val", "_br_flw_seq_val", "_r_sgl_flw_seq_val", "_br_sgl_flw_seq_val", "_b_sgl_flw_seq_val"),
..._("flow_node", "_r_flw_map_val", "_br_flw_map_val", "_r_sgl_flw_map_val", "_br_sgl_flw_map_val", "_b_sgl_flw_map_val"),
..._("flow_sequence", "_r_flw_seq", "_br_flw_seq", "_r_sgl_flw_seq", "_br_sgl_flw_seq", "_b_sgl_flw_seq"),
..._("flow_mapping", "_r_flw_map", "_br_flw_map", "_r_sgl_flw_map", "_br_sgl_flw_map", "_b_sgl_flw_map"),
..._(["flow_pair"], "_r_flw_exp_par", "_br_flw_exp_par", "_r_sgl_flw_exp_par",
"_r_flw_imp_r_par", "_r_flw_imp_br_par", "_br_flw_imp_r_par", "_br_flw_imp_br_par", "_r_sgl_flw_imp_par",
"_r_flw_njl_ann_par", "_br_flw_njl_ann_par", "_r_sgl_flw_njl_ann_par"),
..._("flow_node", "_r_dqt_str_val", "_br_dqt_str_val", "_r_sgl_dqt_str_val", "_br_sgl_dqt_str_val", "_b_sgl_dqt_str_val"),
..._("flow_node", "_r_sqt_str_val", "_br_sqt_str_val", "_r_sgl_sqt_str_val", "_br_sgl_sqt_str_val", "_b_sgl_sqt_str_val"),
..._("flow_node", "_r_pln_blk_val", "_br_pln_blk_val", "_r_sgl_pln_blk_val", "_br_sgl_pln_blk_val", "_b_sgl_pln_blk_val",
"_r_pln_flw_val", "_br_pln_flw_val", "_r_sgl_pln_flw_val"),
..._("double_quote_scalar", "_r_dqt_str", "_br_dqt_str", "_r_sgl_dqt_str", "_br_sgl_dqt_str", "_b_sgl_dqt_str"),
..._("single_quote_scalar", "_r_sqt_str", "_br_sqt_str", "_r_sgl_sqt_str", "_br_sgl_sqt_str", "_b_sgl_sqt_str"),
..._("plain_scalar", "_r_mtl_pln_blk", "_br_mtl_pln_blk", "_r_sgl_pln_blk", "_br_sgl_pln_blk", "_b_sgl_pln_blk",
"_r_mtl_pln_flw", "_br_mtl_pln_flw", "_r_sgl_pln_flw", "_br_sgl_pln_flw"),
..._("escape_sequence", "_r_dqt_esc_nwl", "_br_dqt_esc_nwl",
"_r_dqt_esc_seq", "_br_dqt_esc_seq",
"_r_sqt_esc_sqt", "_br_sqt_esc_sqt"),
});
function _(alias_value, ...rule_names) {
const alias_content = {};
if (typeof alias_value === "string") {
alias_content.name = alias_value;
} else if (Array.isArray(alias_value)) {
alias_content.name = alias_value[0];
alias_content.shallow = true;
} else {
throw new Error(`Unexpected value ${JSON.stringify(alias_value)}`);
}
const alias_map = {};
for (const rule_name of rule_names) {
alias_map[rule_name] = alias_content;
}
return alias_map;
}
function global_alias(grammar_json, alias_map) {
const new_rules = {};
const new_grammar = { ...grammar_json, rules: new_rules };
const checklist = Object.fromEntries(Object.entries(alias_map).map(([k, v]) => [k, 0]));
for (const [rule_name, rule] of Object.entries(grammar_json.rules)) {
new_rules[rule_name] = rule_name in alias_map && alias_map[rule_name].shallow
? rule
: recursive_alias(rule, alias_map, checklist);
}
for (const [rule_name, counter] of Object.entries(checklist)) {
if (counter === 0) {
console.warn(`warning: global_alias for ${JSON.stringify(rule_name)} is not used.`);
}
}
return new_grammar;
}
function recursive_alias(rule, alias_map, checklist) {
switch (rule.type) {
case "CHOICE":
case "SEQ":
return { ...rule, members: rule.members.map(member => recursive_alias(member, alias_map, checklist)) };
case "REPEAT":
case "REPEAT1":
case "FIELD":
case "PREC":
case "PREC_RIGHT":
return { ...rule, content: recursive_alias(rule.content, alias_map, checklist) };
case "SYMBOL":
if (rule.name in alias_map) {
checklist[rule.name]++;
return { type: "ALIAS", content: rule, named: true, value: alias_map[rule.name].name };
}
case "BLANK":
return rule;
default:
throw new Error(`Unexpected rule type ${JSON.stringify(rule.type)}`);
}
}

13
index.js generated Normal file
View file

@ -0,0 +1,13 @@
try {
module.exports = require("./build/Release/tree_sitter_yaml_binding");
} catch (error) {
try {
module.exports = require("./build/Debug/tree_sitter_yaml_binding");
} catch (_) {
throw error
}
}
try {
module.exports.nodeTypeInfo = require("./src/node-types.json");
} catch (_) {}

37
package.json Normal file
View file

@ -0,0 +1,37 @@
{
"name": "tree-sitter-yaml",
"version": "0.1.0",
"description": "YAML grammar for tree-sitter",
"keywords": [
"parser",
"lexer"
],
"main": "index.js",
"repository": "https://github.com/ikatyang/tree-sitter-yaml",
"homepage": "https://github.com/ikatyang/tree-sitter-yaml#readme",
"author": {
"name": "Ika",
"email": "ikatyang@gmail.com",
"url": "https://github.com/ikatyang"
},
"license": "MIT",
"scripts": {
"test": "yarn tree-sitter test",
"prepack": "yarn tree-sitter generate",
"release": "standard-version",
"tree-sitter": "./tree-sitter/target/release/tree-sitter"
},
"dependencies": {
"nan": "^2.14.0"
},
"devDependencies": {
"natural-orderby": "2.0.3",
"standard-version": "7.0.0"
},
"files": [
"/src/",
"/binding.gyp",
"/grammar.js",
"/index.js"
]
}

View file

@ -0,0 +1,6 @@
git clone https://github.com/tree-sitter/tree-sitter --branch 0.15.7 --depth 1
cd tree-sitter
git submodule update --init
git apply ../scripts/tree-sitter.diff
./script/build-wasm
cargo build --release

13
scripts/tree-sitter.diff Normal file
View file

@ -0,0 +1,13 @@
diff --git a/cli/src/test.rs b/cli/src/test.rs
index bc05f29..3578a0b 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -17,7 +17,7 @@ lazy_static! {
.multi_line(true)
.build()
.unwrap();
- static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"\r?\n---+\r?\n")
+ static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"\r?\n------+\r?\n")
.multi_line(true)
.build()
.unwrap();

135
scripts/update-snapshot.js Normal file
View file

@ -0,0 +1,135 @@
const child_process = require("child_process");
const { loadTests, writeTests } = require("./utils");
updateTestOutputs("./corpus");
function updateTestOutputs(dirname) {
const tests = loadTests(dirname);
for (const [, testCases] of Object.entries(tests)) {
for (const [, testCase] of Object.entries(testCases)) {
testCase.output = "(x y: (z))";
}
}
writeTests(dirname, tests);
let stdout;
try {
child_process.execFileSync(
"./tree-sitter/target/release/tree-sitter",
["test"],
);
} catch (error) {
stdout = error.stdout.toString();
}
const sexps = parseTestStdout(stdout);
for (const [basename, testCases] of Object.entries(sexps)) {
for (const [title, sexp] of Object.entries(testCases)) {
tests[basename][title].output = printSExpression(parseSExpression(sexp));
}
}
writeTests(dirname, tests);
}
function parseTestStdout(stdout) {
const [tocText, detailText] = stdout
.replace(/\u001b\[\d+m/g, "") // ansi color
.split(/\n\n\d+ failures?:\n\nexpected \/ actual\n\n/);
const indices = [];
const sexps = {};
let basename;
for (const lineText of tocText.trim().split("\n")) {
if (lineText.startsWith(" ")) {
const title = lineText.slice(" ✗ ".length);
indices.push([basename, title]);
} else {
basename = lineText.slice(0, -1) + ".txt";
}
}
let counter = 0;
detailText.split("\n").forEach((lineText, i) => {
if (i % 3 === 1) {
const sexp = lineText.replace(/\(x y: \(z\)\)\s*$/, "").trim();
const [basename, title] = indices[counter++];
sexps[basename] = sexps[basename] || {};
sexps[basename][title] = sexp;
}
});
return sexps;
}
function parseSExpression(text) {
let node;
let key = null;
const stack = [];
const pushNode = () => {
if (key) {
node.key = key;
key = null;
}
if (stack.length) {
stack[stack.length - 1].children.push(node);
}
stack.push(node);
};
for (let i = 0; i < text.length; i++) {
switch (text[i]) {
case "(":
node = {
type: /^MISSING/.test(text.slice(i + 1))
? text.slice(i + 1).match(/^[^) ]+( \w+)?/)[0]
: text.slice(i + 1).match(/^[^) ]+/)[0],
children: [],
};
i += node.type.length;
pushNode();
break;
case ")":
node = stack.pop();
case " ":
break;
default:
key = text.slice(i).match(/^[^:]+/)[0];
i += key.length;
break;
}
}
return node;
}
function printSExpression(node, indent = " ") {
if (node.children.length === 0) {
return `${printNodeHead(node)})`;
}
if (node.children.every(child => child.children.length === 0)) {
return `${printNodeHead(node)}\n ${node.children
.map(child => `${printNodeHead(child)})`)
.join(" ")})`;
}
return `${printNodeHead(node)}\n${node.children
.map(x =>
printSExpression(x, indent + " ")
.split("\n")
.map(x => ` ${x}`)
.join("\n"),
)
.join(`\n`)})`;
}
function printNodeHead(node) {
return node.key ? `${node.key}: (${node.type}` : `(${node.type}`;
}

View file

@ -0,0 +1,36 @@
const fs = require("fs");
const { orderBy } = require("natural-orderby");
const path = require("path");
const { writeTest } = require("./utils");
const testSuiteDirname = "./yaml-test-suite";
const whitelistForValidTest = [
"Duplicate YAML directive", // semantic error
"Missing space in YAML directive", // https://github.com/yaml/yaml-test-suite/issues/38
"Tag shorthand used in documents but only defined in the first", // semantic error
];
const testCases = {};
const testTagsDirname = path.join(testSuiteDirname, "tags");
for (const tagName of fs.readdirSync(testTagsDirname)) {
const testTagDirname = path.join(testTagsDirname, tagName);
for (const testCaseId of fs.readdirSync(testTagDirname)) {
const dirname = path.join(testSuiteDirname, testCaseId);
const title = fs.readFileSync(path.join(dirname, "==="), "utf8").trim();
const content = fs.readFileSync(path.join(dirname, "in.yaml"), "utf8");
const isInvalid =
fs.existsSync(path.join(dirname, "error")) &&
whitelistForValidTest.indexOf(title) === -1;
testCases[isInvalid ? `${title} [INVALID]` : title] = {
input: content,
output: "()",
};
}
}
const finalTestCases = {};
for (const title of orderBy(Object.keys(testCases))) {
finalTestCases[title] = testCases[title];
}
writeTest("./corpus/spec.txt", finalTestCases);

78
scripts/utils.js Normal file
View file

@ -0,0 +1,78 @@
const fs = require("fs");
const path = require("path");
function loadTests(dirname) {
const tests = {};
for (const basename of fs.readdirSync(dirname)) {
const filename = path.join(dirname, basename);
tests[basename] = loadTest(filename);
}
return tests;
}
function loadTest(filename) {
const testCases = {};
const text = fs.readFileSync(filename, "utf8");
let state = "start"; // start -> Loop(title -> input -> output)
let titleBuffer = [];
let inputBuffer = [];
let outputBuffer = [];
for (const lineText of (text + "\n" + "=".repeat(80)).split("\n")) {
if (
(state === "start" || state === "title" || state === "output") &&
/^=====+$/.test(lineText)
) {
if (state === "output") {
testCases[titleBuffer.join("\n")] = {
input: inputBuffer.join("\n"),
output: outputBuffer.join("\n"),
};
titleBuffer = [];
inputBuffer = [];
outputBuffer = [];
}
state = state === "title" ? "input" : "title";
} else if (state === "input" && /^-----+$/.test(lineText)) {
state = "output";
} else if (state === "title") {
titleBuffer.push(lineText);
} else if (state === "input") {
inputBuffer.push(lineText);
} else if (state === "output") {
outputBuffer.push(lineText);
} else {
// do nothing
}
}
return testCases;
}
function writeTests(dirname, tests) {
for (const [basename, testCases] of Object.entries(tests)) {
writeTest(path.join(dirname, basename), testCases);
}
}
function writeTest(filename, testCases) {
const lineTexts = [];
for (const [title, { input, output }] of Object.entries(testCases)) {
lineTexts.push(
"=".repeat(80),
title,
"=".repeat(80),
input,
"-".repeat(80),
"",
output.trim(),
"",
);
}
fs.writeFileSync(filename, lineTexts.join("\n"));
}
module.exports = {
loadTests,
loadTest,
writeTests,
writeTest,
};

28
src/binding.cc generated Normal file
View file

@ -0,0 +1,28 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"
using namespace v8;
extern "C" TSLanguage * tree_sitter_yaml();
namespace {
NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_yaml());
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("yaml").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
}
NODE_MODULE(tree_sitter_yaml_binding, Init)
} // namespace

6006
src/grammar.json generated Normal file

File diff suppressed because it is too large Load diff

42
src/node-types.json generated Normal file
View file

@ -0,0 +1,42 @@
[
{
"type": "stream",
"named": true,
"fields": {
"key": {
"multiple": false,
"required": false,
"types": [
{
"type": "flow_pair",
"named": true
}
]
},
"value": {
"multiple": false,
"required": false,
"types": [
{
"type": "flow_pair",
"named": true
}
]
}
},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "document",
"named": true
},
{
"type": "flow_pair",
"named": true
}
]
}
}
]

25708
src/parser.c generated Normal file

File diff suppressed because it is too large Load diff

939
src/scanner.cc Normal file
View file

@ -0,0 +1,939 @@
#include <tree_sitter/parser.h>
#include <vector>
namespace {
using std::vector;
enum TokenType {
END_OF_FILE,
S_DIR_YML_BGN, R_DIR_YML_VER,
S_DIR_TAG_BGN, R_DIR_TAG_HDL, R_DIR_TAG_PFX,
S_DIR_RSV_BGN, R_DIR_RSV_PRM,
S_DRS_END,
S_DOC_END,
R_BLK_SEQ_BGN, BR_BLK_SEQ_BGN, B_BLK_SEQ_BGN,
R_BLK_KEY_BGN, BR_BLK_KEY_BGN, B_BLK_KEY_BGN,
R_BLK_VAL_BGN, BR_BLK_VAL_BGN, B_BLK_VAL_BGN,
R_BLK_IMP_BGN,
R_BLK_STR_BGN, BR_BLK_STR_BGN,
BR_BLK_STR_CTN,
R_FLW_SEQ_BGN, BR_FLW_SEQ_BGN, B_FLW_SEQ_BGN,
R_FLW_SEQ_END, BR_FLW_SEQ_END,
R_FLW_MAP_BGN, BR_FLW_MAP_BGN, B_FLW_MAP_BGN,
R_FLW_MAP_END, BR_FLW_MAP_END,
R_FLW_SEP_BGN, BR_FLW_SEP_BGN,
R_FLW_KEY_BGN, BR_FLW_KEY_BGN,
R_FLW_JSV_BGN, BR_FLW_JSV_BGN,
R_FLW_NJV_BGN, BR_FLW_NJV_BGN,
R_DQT_STR_BGN, BR_DQT_STR_BGN, B_DQT_STR_BGN,
R_DQT_STR_CTN, BR_DQT_STR_CTN,
R_DQT_ESC_NWL, BR_DQT_ESC_NWL,
R_DQT_ESC_SEQ, BR_DQT_ESC_SEQ,
R_DQT_STR_END, BR_DQT_STR_END,
R_SQT_STR_BGN, BR_SQT_STR_BGN, B_SQT_STR_BGN,
R_SQT_STR_CTN, BR_SQT_STR_CTN,
R_SQT_ESC_SQT, BR_SQT_ESC_SQT,
R_SQT_STR_END, BR_SQT_STR_END,
R_SGL_PLN_BLK, BR_SGL_PLN_BLK, B_SGL_PLN_BLK,
R_SGL_PLN_FLW, BR_SGL_PLN_FLW,
R_MTL_PLN_BLK, BR_MTL_PLN_BLK,
R_MTL_PLN_FLW, BR_MTL_PLN_FLW,
R_TAG, BR_TAG, B_TAG,
R_ACR, BR_ACR, B_ACR,
R_ALS, BR_ALS, B_ALS,
BL,
COMMENT,
};
#define ADV() adv(lexer)
#define ADV_NWL() adv_nwl(lexer)
#define SKP() skp(lexer)
#define SKP_NWL() skp_nwl(lexer)
#define MRK_END() mrk_end(lexer)
#define LKA lexer->lookahead
#define VLD valid_symbols
#define SCN_SUCC 1
#define SCN_STOP 0
#define SCN_FAIL -1
#define IND_ROT 'r'
#define IND_MAP 'm'
#define IND_SEQ 'q'
#define IND_STR 's'
#define RET_SYM(RESULT_SYMBOL) { \
flush(); \
lexer->result_symbol = RESULT_SYMBOL; \
return true; \
}
#define POP_IND() { \
/* incorrect status caused by error recovering */ \
if (ind_typ_stk.size() == 1) { \
return false; \
} \
pop_ind(); \
}
#define PUSH_IND(TYP, LEN) push_ind(TYP, LEN)
#define PUSH_BGN_IND(TYP) { \
if (has_tab_ind) return false; \
push_ind(TYP, bgn_col); \
}
#define MAY_PUSH_IMP_IND(TYP) { \
if (cur_ind != blk_imp_col) { \
if (blk_imp_tab) return false; \
push_ind(IND_MAP, blk_imp_col); \
} \
}
#define MAY_PUSH_SPC_SEQ_IND() { \
if (cur_ind_typ == IND_MAP) { \
push_ind(IND_SEQ, bgn_col); \
} \
}
#define MAY_UPD_IMP_COL() { \
if (blk_imp_row != bgn_row) { \
blk_imp_row = bgn_row; \
blk_imp_col = bgn_col; \
blk_imp_tab = has_tab_ind; \
} \
}
struct Scanner {
int16_t row;
int16_t col;
int16_t blk_imp_row;
int16_t blk_imp_col;
int16_t blk_imp_tab;
vector<int16_t> ind_typ_stk;
vector<int16_t> ind_len_stk;
// temp
int16_t end_row;
int16_t end_col;
int16_t cur_row;
int16_t cur_col;
int32_t cur_chr;
Scanner() {
deserialize(NULL, 0);
}
unsigned serialize(char *buffer) {
size_t i = 0;
buffer[i++] = row;
buffer[i++] = col;
buffer[i++] = blk_imp_row;
buffer[i++] = blk_imp_col;
buffer[i++] = blk_imp_tab;
vector<int16_t>::iterator
typ_itr = ind_typ_stk.begin() + 1,
typ_end = ind_typ_stk.end(),
len_itr = ind_len_stk.begin() + 1;
for (; typ_itr != typ_end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++typ_itr, ++len_itr) {
buffer[i++] = *typ_itr;
buffer[i++] = *len_itr;
}
return i;
}
void deserialize(const char *buffer, unsigned length) {
row = 0;
col = 0;
blk_imp_row = -1;
blk_imp_col = -1;
blk_imp_tab = 0;
ind_typ_stk.clear();
ind_typ_stk.push_back(IND_ROT);
ind_len_stk.clear();
ind_len_stk.push_back(-1);
if (length > 0) {
size_t i = 0;
row = buffer[i++];
col = buffer[i++];
blk_imp_row = buffer[i++];
blk_imp_col = buffer[i++];
blk_imp_tab = buffer[i++];
while (i < length) {
ind_typ_stk.push_back(buffer[i++]);
ind_len_stk.push_back(buffer[i++]);
}
}
}
void adv(TSLexer *lexer) {
cur_col++;
cur_chr = lexer->lookahead;
lexer->advance(lexer, false);
}
void adv_nwl(TSLexer *lexer) {
cur_row++;
cur_col = 0;
cur_chr = lexer->lookahead;
lexer->advance(lexer, false);
}
void skp(TSLexer *lexer) {
cur_col++;
cur_chr = lexer->lookahead;
lexer->advance(lexer, true);
}
void skp_nwl(TSLexer *lexer) {
cur_row++;
cur_col = 0;
cur_chr = lexer->lookahead;
lexer->advance(lexer, true);
}
void mrk_end(TSLexer *lexer) {
end_row = cur_row;
end_col = cur_col;
lexer->mark_end(lexer);
}
void init() {
cur_row = row;
cur_col = col;
cur_chr = 0;
}
void flush() {
row = end_row;
col = end_col;
}
void pop_ind() {
ind_len_stk.pop_back();
ind_typ_stk.pop_back();
}
void push_ind(int16_t typ, int16_t len) {
ind_len_stk.push_back(len);
ind_typ_stk.push_back(typ);
}
bool is_wsp(int32_t c) {
return c == ' ' || c == '\t';
}
bool is_nwl(int32_t c) {
return c == '\r' || c == '\n';
}
bool is_wht(int32_t c) {
return is_wsp(c) || is_nwl(c) || c == 0;
}
bool is_ns_dec_digit(int32_t c) {
return c >= '0' && c <= '9';
}
bool is_ns_hex_digit(int32_t c) {
return is_ns_dec_digit(c)
|| (c >= 'a' && c <= 'f')
|| (c >= 'A' && c <= 'F');
}
bool is_ns_word_char(int32_t c) {
return c == '-'
|| (c >= '0' && c <= '9')
|| (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z');
}
bool is_nb_json(int32_t c) {
return c == 0x09 || (c >= 0x20 && c <= 0x10ffff);
}
bool is_nb_double_char(int32_t c) {
return is_nb_json(c) && c != '\\' && c != '"';
}
bool is_nb_single_char(int32_t c) {
return is_nb_json(c) && c != '\'';
}
bool is_ns_char(int32_t c) {
return (c >= 0x21 && c <= 0x7e)
|| c == 0x85
|| (c >= 0xa0 && c <= 0xd7ff)
|| (c >= 0xe000 && c <= 0xfefe)
|| (c >= 0xff00 && c <= 0xfffd)
|| (c >= 0x10000 && c <= 0x10ffff);
}
bool is_c_indicator(int32_t c) {
return c == '-' || c == '?' || c == ':' || c == ',' || c == '[' || c == ']' || c == '{' || c == '}'
|| c == '#' || c == '&' || c == '*' || c == '!' || c == '|' || c == '>' || c == '\'' || c == '"'
|| c == '%' || c == '@' || c == '`';
}
bool is_c_flow_indicator(int32_t c) {
return c == ',' || c == '[' || c == ']' || c == '{' || c == '}';
}
bool is_plain_safe_in_block(int32_t c) {
return is_ns_char(c);
}
bool is_plain_safe_in_flow(int32_t c) {
return is_ns_char(c) && !is_c_flow_indicator(c);
}
bool is_ns_uri_char(int32_t c) {
return is_ns_word_char(c)
|| c == '#' || c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c == '&'
|| c == '=' || c == '+' || c == '$' || c == ',' || c == '_' || c == '.' || c == '!'
|| c == '~' || c == '*' || c == '\'' || c == '(' || c == ')' || c == '[' || c == ']';
}
bool is_ns_tag_char(int32_t c) {
return is_ns_word_char(c)
|| c == '#' || c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c == '&'
|| c == '=' || c == '+' || c == '$' || c == '_' || c == '.'
|| c == '~' || c == '*' || c == '\'' || c == '(' || c == ')';
}
bool is_ns_anchor_char(int32_t c) {
return is_ns_char(c) && !is_c_flow_indicator(c);
}
char scn_uri_esc(TSLexer *lexer) {
if (LKA != '%') return SCN_STOP;
MRK_END();
ADV();
if (!is_ns_hex_digit(LKA)) return SCN_FAIL;
ADV();
if (!is_ns_hex_digit(LKA)) return SCN_FAIL;
ADV();
return SCN_SUCC;
}
char scn_ns_uri_char(TSLexer *lexer) {
if (is_ns_uri_char(LKA)) {ADV(); return SCN_SUCC;}
return scn_uri_esc(lexer);
}
char scn_ns_tag_char(TSLexer *lexer) {
if (is_ns_tag_char(LKA)) {ADV(); return SCN_SUCC;}
return scn_uri_esc(lexer);
}
bool scn_dir_bgn(TSLexer *lexer) {
ADV();
if (LKA == 'Y') {
ADV();
if (LKA == 'A') {
ADV();
if (LKA == 'M') {
ADV();
if (LKA == 'L') {
ADV();
if (is_wht(LKA)) {
MRK_END();
RET_SYM(S_DIR_YML_BGN);
}
}
}
}
} else if (LKA == 'T') {
ADV();
if (LKA == 'A') {
ADV();
if (LKA == 'G') {
ADV();
if (is_wht(LKA)) {
MRK_END();
RET_SYM(S_DIR_TAG_BGN);
}
}
}
}
for (;;) {
if (!is_ns_char(LKA)) break;
ADV();
}
if (cur_col > 1 && is_wht(LKA)) {
MRK_END();
RET_SYM(S_DIR_RSV_BGN);
}
return false;
}
bool scn_dir_yml_ver(TSLexer *lexer, TSSymbol result_symbol) {
uint16_t n1 = 0;
uint16_t n2 = 0;
while (is_ns_dec_digit(LKA)) {ADV();n1++;}
if (LKA != '.') return false;
ADV();
while (is_ns_dec_digit(LKA)) {ADV();n2++;}
if (n1 == 0 || n2 == 0) return false;
MRK_END();
RET_SYM(result_symbol);
}
bool scn_tag_hdl_tal(TSLexer *lexer) {
if (LKA == '!') {ADV();return true;}
uint16_t n = 0;
while (is_ns_word_char(LKA)) {ADV();n++;}
if (n == 0) return true;
if (LKA == '!') {ADV();return true;}
return false;
}
bool scn_dir_tag_hdl(TSLexer *lexer, TSSymbol result_symbol) {
if (LKA == '!') {
ADV();
if (scn_tag_hdl_tal(lexer)) {MRK_END();RET_SYM(result_symbol);}
}
return false;
}
bool scn_dir_tag_pfx(TSLexer *lexer, TSSymbol result_symbol) {
if (LKA == '!') ADV();
else if (scn_ns_tag_char(lexer) == SCN_SUCC);
else return false;
for (;;) {
switch (scn_ns_uri_char(lexer)) {
case SCN_STOP:
MRK_END();
case SCN_FAIL:
RET_SYM(result_symbol);
}
}
}
bool scn_dir_rsv_prm(TSLexer *lexer, TSSymbol result_symbol) {
if (!is_ns_char(LKA)) return false;
ADV();
while (is_ns_char(LKA)) ADV();
MRK_END();
RET_SYM(result_symbol);
}
bool scn_tag(TSLexer *lexer, TSSymbol result_symbol) {
if (LKA != '!') return false;
ADV();
if (is_wht(LKA)) {MRK_END();RET_SYM(result_symbol);}
if (LKA == '<') {
ADV();
if (scn_ns_uri_char(lexer) != SCN_SUCC) return false;
for (;;) {
switch (scn_ns_uri_char(lexer)) {
case SCN_STOP:
if (LKA == '>') {
ADV();
MRK_END();
RET_SYM(result_symbol);
}
case SCN_FAIL:
return false;
}
}
} else {
if (scn_tag_hdl_tal(lexer) && scn_ns_tag_char(lexer) != SCN_SUCC) return false;
for (;;) {
switch (scn_ns_tag_char(lexer)) {
case SCN_STOP:
MRK_END();
case SCN_FAIL:
RET_SYM(result_symbol);
}
}
}
return false;
}
bool scn_acr(TSLexer *lexer, TSSymbol result_symbol) {
if (LKA != '&') return false;
ADV();
if (!is_ns_anchor_char(LKA)) return false;
ADV();
while (is_ns_anchor_char(LKA)) ADV();
MRK_END();
RET_SYM(result_symbol);
}
bool scn_als(TSLexer *lexer, TSSymbol result_symbol) {
if (LKA != '*') return false;
ADV();
if (!is_ns_anchor_char(LKA)) return false;
ADV();
while (is_ns_anchor_char(LKA)) ADV();
MRK_END();
RET_SYM(result_symbol);
}
bool scn_dqt_esc_seq(TSLexer *lexer, TSSymbol result_symbol) {
uint16_t i;
switch (LKA) {
case '0': case 'a': case 'b': case 't': case '\t': case 'n': case 'v':
case 'r': case 'e': case ' ': case '"': case '/': case '\\': case 'N':
case '_': case 'L': case 'P':
ADV();
break;
case 'U':
ADV();
for (i = 0; i < 8; i++) if (is_ns_hex_digit(LKA)) ADV(); else return false;
break;
case 'u':
ADV();
for (i = 0; i < 4; i++) if (is_ns_hex_digit(LKA)) ADV(); else return false;
break;
case 'x':
ADV();
for (i = 0; i < 2; i++) if (is_ns_hex_digit(LKA)) ADV(); else return false;
break;
default:
return false;
}
MRK_END();
RET_SYM(result_symbol);
}
bool scn_dqt_str_cnt(TSLexer *lexer, TSSymbol result_symbol) {
if (!is_nb_double_char(LKA)) return false;
if (cur_col == 0 && scn_drs_doc_end(lexer)) {
MRK_END();
RET_SYM(cur_chr == '-' ? S_DRS_END : S_DOC_END);
} else ADV();
while (is_nb_double_char(LKA)) ADV();
MRK_END();
RET_SYM(result_symbol);
}
bool scn_sqt_str_cnt(TSLexer *lexer, TSSymbol result_symbol) {
if (!is_nb_single_char(LKA)) return false;
if (cur_col == 0 && scn_drs_doc_end(lexer)) {
MRK_END();
RET_SYM(cur_chr == '-' ? S_DRS_END : S_DOC_END);
} else ADV();
while (is_nb_single_char(LKA)) ADV();
MRK_END();
RET_SYM(result_symbol);
}
bool scn_blk_str_bgn(TSLexer *lexer, TSSymbol result_symbol) {
if (LKA != '|' && LKA != '>') return false;
ADV();
int16_t cur_ind = ind_len_stk.back();
int16_t ind = -1;
if (LKA >= '1' && LKA <= '9') {
ind = LKA - '1';
ADV();
if (LKA == '+' || LKA == '-') {
ADV();
}
} else if (LKA == '+' || LKA == '-') {
ADV();
if (LKA >= '1' && LKA <= '9') {
ind = LKA - '1';
ADV();
}
}
if (!is_wht(LKA)) return false;
MRK_END();
if (ind != -1) ind += cur_ind;
else {
ind = cur_ind;
while (is_wsp(LKA)) ADV();
if (LKA == '#') {
ADV();
while (!is_nwl(LKA) && LKA != 0) ADV();
}
if (is_nwl(LKA)) ADV_NWL();
while (LKA != 0) {
if (LKA == ' ') ADV();
else if (is_nwl(LKA)) {
if (cur_col - 1 < ind) break;
ind = cur_col - 1;
ADV_NWL();
} else {
if (cur_col - 1 > ind) ind = cur_col - 1;
break;
}
}
}
PUSH_IND(IND_STR, ind);
RET_SYM(result_symbol);
}
bool scn_blk_str_cnt(TSLexer *lexer, TSSymbol result_symbol) {
if (!is_ns_char(LKA)) return false;
if (cur_col == 0 && scn_drs_doc_end(lexer)) {POP_IND();RET_SYM(BL);}
else ADV();
MRK_END();
for (;;) {
if (is_ns_char(LKA)) {
ADV();
while (is_ns_char(LKA)) ADV();
MRK_END();
}
if (is_wsp(LKA)) {
ADV();
while (is_wsp(LKA)) ADV();
} else break;
}
RET_SYM(result_symbol);
}
char scn_pln_cnt(TSLexer *lexer, bool (Scanner::*is_plain_safe)(int32_t)) {
bool is_cur_wsp = is_wsp(cur_chr);
bool is_cur_saf = (this->*is_plain_safe)(cur_chr);
bool is_lka_wsp = is_wsp(LKA);
bool is_lka_saf = (this->*is_plain_safe)(LKA);
if (is_lka_saf || is_lka_wsp) {
for (;;) {
if (is_lka_saf && LKA != '#' && LKA != ':') {ADV();MRK_END();}
else if (is_cur_saf && LKA == '#') {ADV();MRK_END();}
else if (is_lka_wsp) ADV();
else if (LKA == ':') ADV(); // check later
else break;
is_cur_wsp = is_lka_wsp;
is_cur_saf = is_lka_saf;
is_lka_wsp = is_wsp(LKA);
is_lka_saf = (this->*is_plain_safe)(LKA);
if (cur_chr == ':') {
if (is_lka_saf) MRK_END();
else return SCN_FAIL;
}
}
} else return SCN_STOP;
return SCN_SUCC;
}
bool scn_drs_doc_end(TSLexer *lexer) {
if (LKA != '-' && LKA != '.') return false;
int32_t delimeter = LKA;
ADV();
if (LKA == delimeter) {
ADV();
if (LKA == delimeter) {
ADV();
if (is_wht(LKA)) return true;
}
}
MRK_END();
return false;
}
bool scan(TSLexer *lexer, const bool *valid_symbols) {
init();
MRK_END();
bool allow_comment = !(VLD[R_DQT_STR_CTN] || VLD[BR_DQT_STR_CTN] || VLD[R_SQT_STR_CTN] || VLD[BR_SQT_STR_CTN]);
int16_t *ind_ptr = &ind_len_stk.back();
int16_t cur_ind = *ind_ptr--;
int16_t prt_ind = *ind_ptr;
int16_t cur_ind_typ = ind_typ_stk.back();
bool has_tab_ind = false;
int16_t leading_spaces = 0;
for (;;) {
if (LKA == ' ') {
if (!has_tab_ind) leading_spaces++;
SKP();
} else if (LKA == '\t') {
has_tab_ind = true;
SKP();
} else if (is_nwl(LKA)) {
has_tab_ind = false;
leading_spaces = 0;
SKP_NWL();
} else if (allow_comment && LKA == '#') {
if (VLD[BR_BLK_STR_CTN] && VLD[BL] && cur_col <= cur_ind) {POP_IND();RET_SYM(BL);}
if (
VLD[BR_BLK_STR_CTN]
? cur_row == row
: cur_col == 0 || cur_row != row || cur_col > col
) {
ADV();
while (!is_nwl(LKA) && LKA != 0) ADV();
MRK_END();
RET_SYM(COMMENT);
} else break;
} else break;
}
if (LKA == 0) {
if (VLD[BL]) {MRK_END();POP_IND();RET_SYM(BL)}
if (VLD[END_OF_FILE]) {MRK_END();RET_SYM(END_OF_FILE)}
return false;
}
int16_t bgn_row = cur_row;
int16_t bgn_col = cur_col;
int32_t bgn_chr = LKA;
if (VLD[BL] && bgn_col <= cur_ind && !has_tab_ind) {
if (
cur_ind == prt_ind && cur_ind_typ == IND_SEQ
? bgn_col < cur_ind || LKA != '-'
: bgn_col <= prt_ind || cur_ind_typ == IND_STR
) {POP_IND();RET_SYM(BL);}
}
bool has_nwl = cur_row > row;
bool is_r = !has_nwl;
bool is_br = has_nwl && leading_spaces > cur_ind;
bool is_b = has_nwl && leading_spaces == cur_ind && !has_tab_ind;
bool is_s = bgn_col == 0;
if (VLD[R_DIR_YML_VER] && is_r) return scn_dir_yml_ver(lexer, R_DIR_YML_VER);
if (VLD[R_DIR_TAG_HDL] && is_r) return scn_dir_tag_hdl(lexer, R_DIR_TAG_HDL);
if (VLD[R_DIR_TAG_PFX] && is_r) return scn_dir_tag_pfx(lexer, R_DIR_TAG_PFX);
if (VLD[R_DIR_RSV_PRM] && is_r) return scn_dir_rsv_prm(lexer, R_DIR_RSV_PRM);
if (VLD[BR_BLK_STR_CTN] && is_br && scn_blk_str_cnt(lexer, BR_BLK_STR_CTN)) return true;
if (
(VLD[R_DQT_STR_CTN] && is_r && scn_dqt_str_cnt(lexer, R_DQT_STR_CTN))
|| (VLD[BR_DQT_STR_CTN] && is_br && scn_dqt_str_cnt(lexer, BR_DQT_STR_CTN))
) return true;
if (
(VLD[R_SQT_STR_CTN] && is_r && scn_sqt_str_cnt(lexer, R_SQT_STR_CTN))
|| (VLD[BR_SQT_STR_CTN] && is_br && scn_sqt_str_cnt(lexer, BR_SQT_STR_CTN))
) return true;
if (LKA == '%') {
if (VLD[S_DIR_YML_BGN] && is_s) return scn_dir_bgn(lexer);
} else if (LKA == '*') {
if (VLD[R_ALS] && is_r) {MAY_UPD_IMP_COL();return scn_als(lexer, R_ALS);}
if (VLD[BR_ALS] && is_br) {MAY_UPD_IMP_COL();return scn_als(lexer, BR_ALS);}
if (VLD[B_ALS] && is_b) {MAY_UPD_IMP_COL();return scn_als(lexer, B_ALS);}
} else if (LKA == '&') {
if (VLD[R_ACR] && is_r) {MAY_UPD_IMP_COL();return scn_acr(lexer, R_ACR);}
if (VLD[BR_ACR] && is_br) {MAY_UPD_IMP_COL();return scn_acr(lexer, BR_ACR);}
if (VLD[B_ACR] && is_b) {MAY_UPD_IMP_COL();return scn_acr(lexer, B_ACR);}
} else if (LKA == '!') {
if (VLD[R_TAG] && is_r) {MAY_UPD_IMP_COL();return scn_tag(lexer, R_TAG);}
if (VLD[BR_TAG] && is_br) {MAY_UPD_IMP_COL();return scn_tag(lexer, BR_TAG);}
if (VLD[B_TAG] && is_b) {MAY_UPD_IMP_COL();return scn_tag(lexer, B_TAG);}
} else if (LKA == '[') {
if (VLD[R_FLW_SEQ_BGN] && is_r) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(R_FLW_SEQ_BGN)}
if (VLD[BR_FLW_SEQ_BGN] && is_br) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(BR_FLW_SEQ_BGN)}
if (VLD[B_FLW_SEQ_BGN] && is_b) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(B_FLW_SEQ_BGN)}
} else if (LKA == ']') {
if (VLD[R_FLW_SEQ_END] && is_r) {ADV();MRK_END();RET_SYM(R_FLW_SEQ_END)}
if (VLD[BR_FLW_SEQ_END] && is_br) {ADV();MRK_END();RET_SYM(BR_FLW_SEQ_END)}
} else if (LKA == '{') {
if (VLD[R_FLW_MAP_BGN] && is_r) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(R_FLW_MAP_BGN)}
if (VLD[BR_FLW_MAP_BGN] && is_br) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(BR_FLW_MAP_BGN)}
if (VLD[B_FLW_MAP_BGN] && is_b) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(B_FLW_MAP_BGN)}
} else if (LKA == '}') {
if (VLD[R_FLW_MAP_END] && is_r) {ADV();MRK_END();RET_SYM(R_FLW_MAP_END)}
if (VLD[BR_FLW_MAP_END] && is_br) {ADV();MRK_END();RET_SYM(BR_FLW_MAP_END)}
} else if (LKA == ',') {
if (VLD[R_FLW_SEP_BGN] && is_r) {ADV();MRK_END();RET_SYM(R_FLW_SEP_BGN)}
if (VLD[BR_FLW_SEP_BGN] && is_br) {ADV();MRK_END();RET_SYM(BR_FLW_SEP_BGN)}
} else if (LKA == '"') {
if (VLD[R_DQT_STR_BGN] && is_r) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(R_DQT_STR_BGN)}
if (VLD[BR_DQT_STR_BGN] && is_br) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(BR_DQT_STR_BGN)}
if (VLD[B_DQT_STR_BGN] && is_b) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(B_DQT_STR_BGN)}
if (VLD[R_DQT_STR_END] && is_r) {ADV();MRK_END();RET_SYM(R_DQT_STR_END)}
if (VLD[BR_DQT_STR_END] && is_br) {ADV();MRK_END();RET_SYM(BR_DQT_STR_END)}
} else if (LKA == '\'') {
if (VLD[R_SQT_STR_BGN] && is_r) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(R_SQT_STR_BGN)}
if (VLD[BR_SQT_STR_BGN] && is_br) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(BR_SQT_STR_BGN)}
if (VLD[B_SQT_STR_BGN] && is_b) {MAY_UPD_IMP_COL();ADV();MRK_END();RET_SYM(B_SQT_STR_BGN)}
if (VLD[R_SQT_STR_END] && is_r) {
ADV();
if (LKA == '\'') {ADV();MRK_END();RET_SYM(R_SQT_ESC_SQT)}
else {MRK_END();RET_SYM(R_SQT_STR_END)}
}
if (VLD[BR_SQT_STR_END] && is_br) {
ADV();
if (LKA == '\'') {ADV();MRK_END();RET_SYM(BR_SQT_ESC_SQT)}
else {MRK_END();RET_SYM(BR_SQT_STR_END)}
}
} else if (LKA == '?') {
bool is_r_blk_key_bgn = VLD[R_BLK_KEY_BGN] && is_r;
bool is_br_blk_key_bgn = VLD[BR_BLK_KEY_BGN] && is_br;
bool is_b_blk_key_bgn = VLD[B_BLK_KEY_BGN] && is_b;
bool is_r_flw_key_bgn = VLD[R_FLW_KEY_BGN] && is_r;
bool is_br_flw_key_bgn = VLD[BR_FLW_KEY_BGN] && is_br;
if (is_r_blk_key_bgn || is_br_blk_key_bgn || is_b_blk_key_bgn || is_r_flw_key_bgn || is_br_flw_key_bgn) {
ADV();
if (is_wht(LKA)) {
MRK_END();
if (is_r_blk_key_bgn) {PUSH_BGN_IND(IND_MAP);RET_SYM(R_BLK_KEY_BGN);}
if (is_br_blk_key_bgn) {PUSH_BGN_IND(IND_MAP);RET_SYM(BR_BLK_KEY_BGN);}
if (is_b_blk_key_bgn) RET_SYM(B_BLK_KEY_BGN);
if (is_r_flw_key_bgn) RET_SYM(R_FLW_KEY_BGN);
if (is_br_flw_key_bgn) RET_SYM(BR_FLW_KEY_BGN);
}
}
} else if (LKA == ':') {
if (VLD[R_FLW_JSV_BGN] && is_r) {ADV();MRK_END();RET_SYM(R_FLW_JSV_BGN);}
if (VLD[BR_FLW_JSV_BGN] && is_br) {ADV();MRK_END();RET_SYM(BR_FLW_JSV_BGN);}
bool is_r_blk_val_bgn = VLD[R_BLK_VAL_BGN] && is_r;
bool is_br_blk_val_bgn = VLD[BR_BLK_VAL_BGN] && is_br;
bool is_b_blk_val_bgn = VLD[B_BLK_VAL_BGN] && is_b;
bool is_r_blk_imp_bgn = VLD[R_BLK_IMP_BGN] && is_r;
bool is_r_flw_njv_bgn = VLD[R_FLW_NJV_BGN] && is_r;
bool is_br_flw_njv_bgn = VLD[BR_FLW_NJV_BGN] && is_br;
if (is_r_blk_val_bgn || is_br_blk_val_bgn || is_b_blk_val_bgn || is_r_blk_imp_bgn || is_r_flw_njv_bgn || is_br_flw_njv_bgn) {
ADV();
bool is_lka_wht = is_wht(LKA);
if (is_lka_wht) {
if (is_r_blk_val_bgn) {PUSH_BGN_IND(IND_MAP);MRK_END();RET_SYM(R_BLK_VAL_BGN);}
if (is_br_blk_val_bgn) {PUSH_BGN_IND(IND_MAP);MRK_END();RET_SYM(BR_BLK_VAL_BGN);}
if (is_b_blk_val_bgn) {MRK_END();RET_SYM(B_BLK_VAL_BGN);}
if (is_r_blk_imp_bgn) {MAY_PUSH_IMP_IND();MRK_END();RET_SYM(R_BLK_IMP_BGN);}
}
if (is_lka_wht || LKA == ',' || LKA == ']' || LKA == '}') {
if (is_r_flw_njv_bgn) {MRK_END();RET_SYM(R_FLW_NJV_BGN);}
if (is_br_flw_njv_bgn) {MRK_END();RET_SYM(BR_FLW_NJV_BGN);}
}
}
} else if (LKA == '-') {
bool is_r_blk_seq_bgn = VLD[R_BLK_SEQ_BGN] && is_r;
bool is_br_blk_seq_bgn = VLD[BR_BLK_SEQ_BGN] && is_br;
bool is_b_blk_seq_bgn = VLD[B_BLK_SEQ_BGN] && is_b;
bool is_s_drs_end = is_s;
if (is_r_blk_seq_bgn || is_br_blk_seq_bgn || is_b_blk_seq_bgn || is_s_drs_end) {
ADV();
if (is_wht(LKA)) {
if (is_r_blk_seq_bgn) {PUSH_BGN_IND(IND_SEQ);MRK_END();RET_SYM(R_BLK_SEQ_BGN)}
if (is_br_blk_seq_bgn) {PUSH_BGN_IND(IND_SEQ);MRK_END();RET_SYM(BR_BLK_SEQ_BGN)}
if (is_b_blk_seq_bgn) {MAY_PUSH_SPC_SEQ_IND();MRK_END();RET_SYM(B_BLK_SEQ_BGN)}
} else if (LKA == '-' && is_s_drs_end) {
ADV();
if (LKA == '-') {
ADV();
if (is_wht(LKA)) {
if (VLD[BL]) {POP_IND();RET_SYM(BL);}
MRK_END();
RET_SYM(S_DRS_END);
}
}
}
}
} else if (LKA == '.') {
if (is_s) {
ADV();
if (LKA == '.') {
ADV();
if (LKA == '.') {
ADV();
if (is_wht(LKA)) {
if (VLD[BL]) {POP_IND();RET_SYM(BL);}
MRK_END();
RET_SYM(S_DOC_END);
}
}
}
}
} else if (LKA == '\\') {
bool is_r_dqt_esc_nwl = VLD[R_DQT_ESC_NWL] && is_r;
bool is_br_dqt_esc_nwl = VLD[BR_DQT_ESC_NWL] && is_br;
bool is_r_dqt_esc_seq = VLD[R_DQT_ESC_SEQ] && is_r;
bool is_br_dqt_esc_seq = VLD[BR_DQT_ESC_SEQ] && is_br;
if (is_r_dqt_esc_nwl || is_br_dqt_esc_nwl || is_r_dqt_esc_seq || is_br_dqt_esc_seq) {
ADV();
if (is_nwl(LKA)) {
if (is_r_dqt_esc_nwl) {MRK_END();RET_SYM(R_DQT_ESC_NWL)}
if (is_br_dqt_esc_nwl) {MRK_END();RET_SYM(BR_DQT_ESC_NWL)}
}
if (is_r_dqt_esc_seq) return scn_dqt_esc_seq(lexer, R_DQT_ESC_SEQ);
if (is_br_dqt_esc_seq) return scn_dqt_esc_seq(lexer, BR_DQT_ESC_SEQ);
return false;
}
} else if (LKA == '|' || LKA == '>') {
if (VLD[R_BLK_STR_BGN] && is_r) return scn_blk_str_bgn(lexer, R_BLK_STR_BGN);
if (VLD[BR_BLK_STR_BGN] && is_br) return scn_blk_str_bgn(lexer, BR_BLK_STR_BGN);
}
bool maybe_sgl_pln_blk = (VLD[R_SGL_PLN_BLK] && is_r) || (VLD[BR_SGL_PLN_BLK] && is_br) || (VLD[B_SGL_PLN_BLK] && is_b);
bool maybe_sgl_pln_flw = (VLD[R_SGL_PLN_FLW] && is_r) || (VLD[BR_SGL_PLN_FLW] && is_br);
bool maybe_mtl_pln_blk = (VLD[R_MTL_PLN_BLK] && is_r) || (VLD[BR_MTL_PLN_BLK] && is_br);
bool maybe_mtl_pln_flw = (VLD[R_MTL_PLN_FLW] && is_r) || (VLD[BR_MTL_PLN_FLW] && is_br);
if (maybe_sgl_pln_blk || maybe_sgl_pln_flw || maybe_mtl_pln_blk || maybe_mtl_pln_flw) {
bool is_in_blk = maybe_sgl_pln_blk || maybe_mtl_pln_blk;
bool (Scanner::*is_plain_safe)(int32_t) = is_in_blk ? &Scanner::is_plain_safe_in_block : &Scanner::is_plain_safe_in_flow;
if (cur_col - bgn_col == 0) ADV();
if (cur_col - bgn_col == 1) {
bool is_plain_first =
(is_ns_char(bgn_chr) && !is_c_indicator(bgn_chr))
|| ((bgn_chr == '-' || bgn_chr == '?' || bgn_chr == ':')
&& (this->*is_plain_safe)(LKA));
if (!is_plain_first) return false;
} else {
// no need to check the following cases:
// ..X
// ...X
// --X
// ---X
// X: lookahead
}
MRK_END();
for (;;) {
if (!is_nwl(LKA)) {
if (scn_pln_cnt(lexer, is_plain_safe) != SCN_SUCC) break;
}
if (LKA == 0 || !is_nwl(LKA)) break;
for (;;) {
if (is_nwl(LKA)) ADV_NWL();
else if (is_wsp(LKA)) ADV();
else break;
}
if (LKA == 0 || cur_col <= cur_ind) break;
if (cur_col == 0 && scn_drs_doc_end(lexer)) break;
}
if (end_row == bgn_row) {
if (maybe_sgl_pln_blk) {MAY_UPD_IMP_COL();RET_SYM(is_r ? R_SGL_PLN_BLK : is_br ? BR_SGL_PLN_BLK : B_SGL_PLN_BLK);}
if (maybe_sgl_pln_flw) RET_SYM(is_r ? R_SGL_PLN_FLW : BR_SGL_PLN_FLW);
} else {
if (maybe_mtl_pln_blk) {MAY_UPD_IMP_COL();RET_SYM(is_r ? R_MTL_PLN_BLK : BR_MTL_PLN_BLK);}
if (maybe_mtl_pln_flw) RET_SYM(is_r ? R_MTL_PLN_FLW : BR_MTL_PLN_FLW);
}
return false;
}
return false;
}
};
}
extern "C" {
void *tree_sitter_yaml_external_scanner_create() {
return new Scanner();
}
void tree_sitter_yaml_external_scanner_destroy(void *payload) {
Scanner *scanner = static_cast<Scanner *>(payload);
delete scanner;
}
unsigned tree_sitter_yaml_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->serialize(buffer);
}
void tree_sitter_yaml_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = static_cast<Scanner *>(payload);
scanner->deserialize(buffer, length);
}
bool tree_sitter_yaml_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->scan(lexer, valid_symbols);
}
}

215
src/tree_sitter/parser.h generated Normal file
View file

@ -0,0 +1,215 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef uint16_t TSStateId;
typedef struct {
bool visible : 1;
bool named : 1;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef struct {
union {
struct {
TSStateId state;
bool extra : 1;
bool repetition : 1;
};
struct {
TSSymbol symbol;
int16_t dynamic_precedence;
uint8_t child_count;
uint8_t production_id;
};
} params;
TSParseActionType type : 4;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable : 1;
};
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
const char **symbol_names;
const TSSymbolMetadata *symbol_metadata;
const uint16_t *parse_table;
const TSParseActionEntry *parse_actions;
const TSLexMode *lex_modes;
const TSSymbol *alias_sequences;
uint16_t max_alias_sequence_length;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
uint32_t field_count;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const char **field_names;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{ \
{ \
.type = TSParseActionTypeShift, \
.params = {.state = state_value}, \
} \
}
#define SHIFT_REPEAT(state_value) \
{ \
{ \
.type = TSParseActionTypeShift, \
.params = { \
.state = state_value, \
.repetition = true \
}, \
} \
}
#define RECOVER() \
{ \
{ .type = TSParseActionTypeRecover } \
}
#define SHIFT_EXTRA() \
{ \
{ \
.type = TSParseActionTypeShift, \
.params = {.extra = true} \
} \
}
#define REDUCE(symbol_val, child_count_val, ...) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.params = { \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
} \
} \
}
#define ACCEPT_INPUT() \
{ \
{ .type = TSParseActionTypeAccept } \
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

1
yaml-test-suite Submodule

@ -0,0 +1 @@
Subproject commit 053b73a9c12c0cd76da797fdc2ffbd4bb5264c12

1319
yarn.lock Normal file

File diff suppressed because it is too large Load diff