tree-sitter-toml/grammar.js
Ika 7df9880bf8
feat: upgrade to TOML spec v1.0.0-rc.1 (#11)
* chore: add toml-spec
* feat: upgrade to TOML spec v1.0.0-rc.1
* fix: accurate position for multiline string end
2020-06-29 00:08:52 +08:00

212 lines
5.4 KiB
JavaScript

const { Charset } = require("regexp-util");
const getInverseRegex = charset =>
new RegExp(`[^${charset.toString().slice(1, -1)}]`);
const control_chars = new Charset([0x0, 0x1f], 0x7f);
const newline = /\r?\n/;
const decimal_integer = /[+-]?(0|[1-9](_?[0-9])*)/;
const decimal_integer_in_float_exponent_part = /[+-]?[0-9](_?[0-9])*/; // allow leading zeros
const hexadecimal_integer = /0x[0-9a-fA-F](_?[0-9a-fA-F])*/;
const octal_integer = /0o[0-7](_?[0-7])*/;
const binary_integer = /0b[01](_?[01])*/;
const float_fractional_part = /[.][0-9](_?[0-9])*/;
const float_exponent_part = seq(/[eE]/, decimal_integer_in_float_exponent_part);
const rfc3339_date = /([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])/;
const rfc3339_delimiter = /[ tT]/;
const rfc3339_time = /([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)([.][0-9]+)?/;
const rfc3339_offset = /([zZ])|([+-]([01][0-9]|2[0-3]):[0-5][0-9])/;
module.exports = grammar({
name: "toml",
externals: $ => [
$._line_ending_or_eof,
$._multiline_basic_string_content,
$._multiline_basic_string_end,
$._multiline_literal_string_content,
$._multiline_literal_string_end,
],
extras: $ => [$.comment, /[ \t]/],
rules: {
document: $ =>
seq(
repeat(choice($.pair, newline)),
repeat(choice($.table, $.table_array_element)),
),
comment: $ =>
token(seq("#", repeat(getInverseRegex(control_chars.subtract("\t"))))),
table: $ =>
seq(
"[",
choice($.dotted_key, $.key),
"]",
$._line_ending_or_eof,
repeat(choice($.pair, newline)),
),
table_array_element: $ =>
seq(
"[[",
choice($.dotted_key, $.key),
"]]",
$._line_ending_or_eof,
repeat(choice($.pair, newline)),
),
pair: $ => seq($._inline_pair, $._line_ending_or_eof),
_inline_pair: $ => seq(choice($.dotted_key, $.key), "=", $._inline_value),
key: $ => choice($._bare_key, $._quoted_key),
dotted_key: $ => seq(choice($.dotted_key, $.key), ".", $.key),
_bare_key: $ => /[A-Za-z0-9_-]+/,
_quoted_key: $ => choice($._basic_string, $._literal_string),
_inline_value: $ =>
choice(
$.string,
$.integer,
$.float,
$.boolean,
$.offset_date_time,
$.local_date_time,
$.local_date,
$.local_time,
$.array,
$.inline_table,
),
string: $ =>
choice(
$._basic_string,
$._multiline_basic_string,
$._literal_string,
$._multiline_literal_string,
),
_basic_string: $ =>
seq(
'"',
repeat(
choice(
token.immediate(
repeat1(
getInverseRegex(control_chars.subtract("\t").union('"', "\\")),
),
),
$.escape_sequence,
),
),
token.immediate('"'),
),
_multiline_basic_string: $ =>
seq(
'"""',
repeat(
choice(
token.immediate(
repeat1(
getInverseRegex(control_chars.subtract("\t").union('"', "\\")),
),
),
$._multiline_basic_string_content,
token.immediate(newline),
$.escape_sequence,
alias($._escape_line_ending, $.escape_sequence),
),
),
$._multiline_basic_string_end,
),
escape_sequence: $ =>
token.immediate(/\\([btnfr"\\]|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})/),
_escape_line_ending: $ => token.immediate(seq(/\\/, newline)),
_literal_string: $ =>
seq(
"'",
optional(
token.immediate(
repeat1(getInverseRegex(control_chars.union("'").subtract("\t"))),
),
),
token.immediate("'"),
),
_multiline_literal_string: $ =>
seq(
"'''",
repeat(
choice(
token.immediate(
repeat1(getInverseRegex(control_chars.union("'").subtract("\t"))),
),
$._multiline_literal_string_content,
token.immediate(newline),
),
),
$._multiline_literal_string_end,
),
integer: $ =>
choice(
decimal_integer,
hexadecimal_integer,
octal_integer,
binary_integer,
),
float: $ =>
choice(
token(
seq(
decimal_integer,
choice(
float_fractional_part,
seq(optional(float_fractional_part), float_exponent_part),
),
),
),
/[+-]?(inf|nan)/,
),
boolean: $ => /true|false/,
offset_date_time: $ =>
token(seq(rfc3339_date, rfc3339_delimiter, rfc3339_time, rfc3339_offset)),
local_date_time: $ =>
token(seq(rfc3339_date, rfc3339_delimiter, rfc3339_time)),
local_date: $ => rfc3339_date,
local_time: $ => rfc3339_time,
array: $ =>
seq(
"[",
repeat(newline),
optional(
seq(
$._inline_value,
repeat(newline),
repeat(seq(",", repeat(newline), $._inline_value, repeat(newline))),
optional(seq(",", repeat(newline))),
),
),
"]",
),
inline_table: $ =>
seq(
"{",
optional(
seq(
alias($._inline_pair, $.pair),
repeat(seq(",", alias($._inline_pair, $.pair))),
),
),
"}",
),
},
});