feat: upgrade to TOML spec v1.0.0-rc.1 (#11)

* chore: add toml-spec
* feat: upgrade to TOML spec v1.0.0-rc.1
* fix: accurate position for multiline string end
This commit is contained in:
Ika 2020-06-29 00:08:52 +08:00 committed by GitHub
parent 470dc1e3bc
commit 7df9880bf8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 2424 additions and 957 deletions

View file

@ -3,7 +3,7 @@
[![npm](https://img.shields.io/npm/v/tree-sitter-toml.svg)](https://www.npmjs.com/package/tree-sitter-toml)
[![build](https://img.shields.io/travis/com/ikatyang/tree-sitter-toml/master.svg)](https://travis-ci.com/ikatyang/tree-sitter-toml/builds)
TOML ([TOML Spec v0.5.0](https://github.com/toml-lang/toml/blob/master/versions/en/toml-v0.5.0.md)) grammar for [tree-sitter](https://github.com/tree-sitter/tree-sitter)
TOML ([TOML Spec v1.0.0-rc.1](https://github.com/toml-lang/toml/blob/v1.0.0-rc.1/versions/en/toml-v1.0.0-rc.1.md)) grammar for [tree-sitter](https://github.com/tree-sitter/tree-sitter)
[Changelog](https://github.com/ikatyang/tree-sitter-toml/blob/master/CHANGELOG.md)

View file

@ -166,7 +166,6 @@ key
(document
(ERROR
(key)
(key)))
================================================================================

View file

@ -40,6 +40,24 @@ key = # INVALID
(key)
(comment)))
================================================================================
INVALID - key/value pair - no newline between pairs
================================================================================
first = "Tom" last = "Preston-Werner" # INVALID
--------------------------------------------------------------------------------
(document
(pair
(key)
(string)
(MISSING _line_ending_or_eof))
(pair
(key)
(string)
(comment)))
================================================================================
VALID - keys - bare keys
================================================================================
@ -173,19 +191,24 @@ name = "Pradyun"
VALID - keys - directly defined nested keys
================================================================================
a.b.c = 1
a.d = 2
# This makes the key "fruit" into a table.
fruit.apple.smooth = true
# So then you can add to the table "fruit" like so:
fruit.orange = 2
--------------------------------------------------------------------------------
(document
(comment)
(pair
(dotted_key
(dotted_key
(key)
(key))
(key))
(integer))
(boolean))
(comment)
(pair
(dotted_key
(key)
@ -196,9 +219,59 @@ a.d = 2
VALID - keys - overlapped keys (semantically INVALID)
================================================================================
# THIS IS INVALID
a.b = 1
a.b.c = 2
# THE FOLLOWING IS INVALID
# This defines the value of fruit.apple to be an integer.
fruit.apple = 1
# But then this treats fruit.apple like it's a table.
# You can't turn an integer into a table.
fruit.apple.smooth = true
--------------------------------------------------------------------------------
(document
(comment)
(comment)
(pair
(dotted_key
(key)
(key))
(integer))
(comment)
(comment)
(pair
(dotted_key
(dotted_key
(key)
(key))
(key))
(boolean)))
================================================================================
VALID - keys - order
================================================================================
# VALID BUT DISCOURAGED
apple.type = "fruit"
orange.type = "fruit"
apple.skin = "thin"
orange.skin = "thick"
apple.color = "red"
orange.color = "orange"
# RECOMMENDED
apple.type = "fruit"
apple.skin = "thin"
apple.color = "red"
orange.type = "fruit"
orange.skin = "thick"
orange.color = "orange"
--------------------------------------------------------------------------------
@ -208,14 +281,63 @@ a.b.c = 2
(dotted_key
(key)
(key))
(integer))
(string))
(pair
(dotted_key
(dotted_key
(key)
(key))
(key)
(key))
(integer)))
(string))
(pair
(dotted_key
(key)
(key))
(string))
(pair
(dotted_key
(key)
(key))
(string))
(pair
(dotted_key
(key)
(key))
(string))
(pair
(dotted_key
(key)
(key))
(string))
(comment)
(pair
(dotted_key
(key)
(key))
(string))
(pair
(dotted_key
(key)
(key))
(string))
(pair
(dotted_key
(key)
(key))
(string))
(pair
(dotted_key
(key)
(key))
(string))
(pair
(dotted_key
(key)
(key))
(string))
(pair
(dotted_key
(key)
(key))
(string)))
================================================================================
VALID - string - basic strings
@ -291,6 +413,42 @@ str3 = """\
(escape_sequence)
(escape_sequence))))
================================================================================
VALID - string - multi-line basic strings with double quotes
================================================================================
str4 = """Here are two quotation marks: "". Simple enough."""
# str5 = """Here are three quotation marks: """.""" # INVALID
str5 = """Here are three quotation marks: ""\"."""
str6 = """Here are fifteen quotation marks: ""\"""\"""\"""\"""\"."""
# "This," she said, "is just a pointless statement."
str7 = """"This," she said, "is just a pointless statement.""""
--------------------------------------------------------------------------------
(document
(pair
(key)
(string))
(comment)
(pair
(key)
(string
(escape_sequence)))
(pair
(key)
(string
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)))
(comment)
(pair
(key)
(string)))
================================================================================
VALID - string - literal strings
================================================================================
@ -340,6 +498,33 @@ trimmed in raw strings.
(key)
(string)))
================================================================================
VALID - string - multi-line literal strings with single quotes
================================================================================
quot15 = '''Here are fifteen quotation marks: """""""""""""""'''
# apos15 = '''Here are fifteen apostrophes: '''''''''''''''''' # INVALID
apos15 = "Here are fifteen apostrophes: '''''''''''''''"
# 'That's still pointless', she said.
str = ''''That's still pointless', she said.'''
--------------------------------------------------------------------------------
(document
(pair
(key)
(string))
(comment)
(pair
(key)
(string))
(comment)
(pair
(key)
(string)))
================================================================================
VALID - integer - signed/unsigned decimal integer
================================================================================
@ -440,7 +625,7 @@ flt3 = -0.01
# exponent
flt4 = 5e+22
flt5 = 1e6
flt5 = 1e06
flt6 = -2E-2
# both
@ -478,7 +663,7 @@ flt7 = 6.626e-34
VALID - float - float with underscores
================================================================================
flt8 = 9_224_617.445_991_228_313
flt8 = 224_617.445_991_228
--------------------------------------------------------------------------------
@ -630,16 +815,21 @@ lt2 = 00:32:00.999999
(local_time)))
================================================================================
VALID - array - basic (semantically INVALID for children with mixed types)
VALID - array - basic
================================================================================
arr1 = [ 1, 2, 3 ]
arr2 = [ "red", "yellow", "green" ]
arr3 = [ [ 1, 2 ], [3, 4, 5] ]
arr4 = [ "all", 'strings', """are the same""", '''type''']
arr5 = [ [ 1, 2 ], ["a", "b", "c"] ]
integers = [ 1, 2, 3 ]
colors = [ "red", "yellow", "green" ]
nested_array_of_int = [ [ 1, 2 ], [3, 4, 5] ]
nested_mixed_array = [ [ 1, 2 ], ["a", "b", "c"] ]
string_array = [ "all", 'strings', """are the same""", '''type''' ]
arr6 = [ 1, 2.0 ] # INVALID
# Mixed-type arrays are allowed
numbers = [ 0.1, 0.2, 0.5, 1, 2, 5 ]
contributors = [
"Foo Bar <foo@example.com>",
{ name = "Baz Qux", email = "bazqux@example.com", url = "https://example.com/bazqux" }
]
--------------------------------------------------------------------------------
@ -666,13 +856,6 @@ arr6 = [ 1, 2.0 ] # INVALID
(integer)
(integer)
(integer))))
(pair
(key)
(array
(string)
(string)
(string)
(string)))
(pair
(key)
(array
@ -686,19 +869,44 @@ arr6 = [ 1, 2.0 ] # INVALID
(pair
(key)
(array
(string)
(string)
(string)
(string)))
(comment)
(pair
(key)
(array
(float)
(float)
(float)
(integer)
(float))
(comment)))
(integer)
(integer)))
(pair
(key)
(array
(string)
(inline_table
(pair
(key)
(string))
(pair
(key)
(string))
(pair
(key)
(string))))))
================================================================================
VALID - array - allow newlines
================================================================================
arr7 = [
integers2 = [
1, 2, 3
]
arr8 = [
integers3 = [
1,
2, # this is ok
]
@ -833,6 +1041,8 @@ VALID - table - directly defined nested header key
# [x.y.z] need these
[x.y.z.w] # for this to work
[x] # defining a super-table afterwards is ok
--------------------------------------------------------------------------------
(document
@ -847,6 +1057,9 @@ VALID - table - directly defined nested header key
(key))
(key))
(key))
(comment))
(table
(key)
(comment)))
================================================================================
@ -855,11 +1068,11 @@ VALID - table - duplicate header key (semantically INVALID)
# DO NOT DO THIS
[a]
b = 1
[fruit]
apple = "red"
[a]
c = 2
[fruit]
orange = "orange"
--------------------------------------------------------------------------------
@ -869,12 +1082,12 @@ c = 2
(key)
(pair
(key)
(integer)))
(string)))
(table
(key)
(pair
(key)
(integer))))
(string))))
================================================================================
VALID - table - overlapped header key (semantically INVALID)
@ -882,11 +1095,11 @@ VALID - table - overlapped header key (semantically INVALID)
# DO NOT DO THIS EITHER
[a]
b = 1
[fruit]
apple = "red"
[a.b]
c = 2
[fruit.apple]
texture = "smooth"
--------------------------------------------------------------------------------
@ -896,14 +1109,98 @@ c = 2
(key)
(pair
(key)
(integer)))
(string)))
(table
(dotted_key
(key)
(key))
(pair
(key)
(integer))))
(string))))
================================================================================
VALID - table - order
================================================================================
# VALID BUT DISCOURAGED
[fruit.apple]
[animal]
[fruit.orange]
# RECOMMENDED
[fruit.apple]
[fruit.orange]
[animal]
--------------------------------------------------------------------------------
(document
(comment)
(table
(dotted_key
(key)
(key)))
(table
(key))
(table
(dotted_key
(key)
(key))
(comment))
(table
(dotted_key
(key)
(key)))
(table
(dotted_key
(key)
(key)))
(table
(key)))
================================================================================
VALID - table - sub-table
================================================================================
[fruit]
apple.color = "red"
apple.taste.sweet = true
# [fruit.apple] # INVALID
# [fruit.apple.taste] # INVALID
[fruit.apple.texture] # you can add sub-tables
smooth = true
--------------------------------------------------------------------------------
(document
(table
(key)
(pair
(dotted_key
(key)
(key))
(string))
(pair
(dotted_key
(dotted_key
(key)
(key))
(key))
(boolean))
(comment)
(comment))
(table
(dotted_key
(dotted_key
(key)
(key))
(key))
(comment)
(pair
(key)
(boolean))))
================================================================================
VALID - inline table - basic
@ -943,6 +1240,39 @@ animal = { type.name = "pug" }
(key))
(string)))))
================================================================================
VALID - inline table - overlapped key (semantically INVALID)
================================================================================
[product]
type = { name = "Nail" }
# type.edible = false # INVALID
[product]
type.name = "Nail"
# type = { edible = false } # INVALID
--------------------------------------------------------------------------------
(document
(table
(key)
(pair
(key)
(inline_table
(pair
(key)
(string))))
(comment))
(table
(key)
(pair
(dotted_key
(key)
(key))
(string))
(comment)))
================================================================================
VALID - array of tables - basic
================================================================================
@ -956,6 +1286,7 @@ sku = 738594937
[[products]]
name = "Nail"
sku = 284758393
color = "gray"
--------------------------------------------------------------------------------
@ -990,11 +1321,11 @@ VALID - array of tables - nested arrays of tables
[[fruit]]
name = "apple"
[fruit.physical]
[fruit.physical] # subtable
color = "red"
shape = "round"
[[fruit.variety]]
[[fruit.variety]] # nested array of tables
name = "red delicious"
[[fruit.variety]]
@ -1018,6 +1349,7 @@ VALID - array of tables - nested arrays of tables
(dotted_key
(key)
(key))
(comment)
(pair
(key)
(string))
@ -1028,6 +1360,7 @@ VALID - array of tables - nested arrays of tables
(dotted_key
(key)
(key))
(comment)
(pair
(key)
(string)))
@ -1051,6 +1384,42 @@ VALID - array of tables - nested arrays of tables
(key)
(string))))
================================================================================
VALID - array of tables - append to array in undefined table (semantically INVALID)
================================================================================
# INVALID TOML DOC
[fruit.physical] # subtable, but to which parent element should it belong?
color = "red"
shape = "round"
[[fruit]] # parser must throw an error upon discovering that "fruit" is
# an array rather than a table
name = "apple"
--------------------------------------------------------------------------------
(document
(comment)
(table
(dotted_key
(key)
(key))
(comment)
(pair
(key)
(string))
(pair
(key)
(string)))
(table_array_element
(key)
(comment)
(comment)
(pair
(key)
(string))))
================================================================================
VALID - array of tables - append to statically defined array (semantically INVALID)
================================================================================
@ -1082,10 +1451,18 @@ VALID - array of tables - append to table (semantically INVALID)
[[fruit.variety]]
name = "red delicious"
# This table conflicts with the previous table
# INVALID: This table conflicts with the previous array of tables
[fruit.variety]
name = "granny smith"
[fruit.physical]
color = "red"
shape = "round"
# INVALID: This array of tables conflicts with the previous table
[[fruit.physical]]
color = "green"
--------------------------------------------------------------------------------
(document
@ -1104,6 +1481,24 @@ VALID - array of tables - append to table (semantically INVALID)
(string))
(comment))
(table
(dotted_key
(key)
(key))
(pair
(key)
(string)))
(table
(dotted_key
(key)
(key))
(pair
(key)
(string))
(pair
(key)
(string))
(comment))
(table_array_element
(dotted_key
(key)
(key))

View file

@ -7,11 +7,12 @@ const control_chars = new Charset([0x0, 0x1f], 0x7f);
const newline = /\r?\n/;
const decimal_integer = /[+-]?(0|[1-9](_?[0-9])*)/;
const decimal_integer_in_float_exponent_part = /[+-]?[0-9](_?[0-9])*/; // allow leading zeros
const hexadecimal_integer = /0x[0-9a-fA-F](_?[0-9a-fA-F])*/;
const octal_integer = /0o[0-7](_?[0-7])*/;
const binary_integer = /0b[01](_?[01])*/;
const float_fractional_part = /[.][0-9](_?[0-9])*/;
const float_exponent_part = seq(/[eE]/, decimal_integer);
const float_exponent_part = seq(/[eE]/, decimal_integer_in_float_exponent_part);
const rfc3339_date = /([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])/;
const rfc3339_delimiter = /[ tT]/;
@ -21,7 +22,13 @@ const rfc3339_offset = /([zZ])|([+-]([01][0-9]|2[0-3]):[0-5][0-9])/;
module.exports = grammar({
name: "toml",
externals: $ => [$._line_ending_or_eof],
externals: $ => [
$._line_ending_or_eof,
$._multiline_basic_string_content,
$._multiline_basic_string_end,
$._multiline_literal_string_content,
$._multiline_literal_string_end,
],
extras: $ => [$.comment, /[ \t]/],
@ -32,7 +39,8 @@ module.exports = grammar({
repeat(choice($.table, $.table_array_element)),
),
comment: $ => /#.*/,
comment: $ =>
token(seq("#", repeat(getInverseRegex(control_chars.subtract("\t"))))),
table: $ =>
seq(
@ -87,7 +95,9 @@ module.exports = grammar({
repeat(
choice(
token.immediate(
repeat1(getInverseRegex(control_chars.union('"', "\\"))),
repeat1(
getInverseRegex(control_chars.subtract("\t").union('"', "\\")),
),
),
$.escape_sequence,
),
@ -100,15 +110,17 @@ module.exports = grammar({
repeat(
choice(
token.immediate(
repeat1(getInverseRegex(control_chars.union('"', "\\"))),
repeat1(
getInverseRegex(control_chars.subtract("\t").union('"', "\\")),
),
),
token.immediate(/"{1,2}/),
$._multiline_basic_string_content,
token.immediate(newline),
$.escape_sequence,
alias($._escape_line_ending, $.escape_sequence),
),
),
token.immediate('"""'),
$._multiline_basic_string_end,
),
escape_sequence: $ =>
token.immediate(/\\([btnfr"\\]|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})/),
@ -131,11 +143,11 @@ module.exports = grammar({
token.immediate(
repeat1(getInverseRegex(control_chars.union("'").subtract("\t"))),
),
token.immediate(/'{1,2}/),
$._multiline_literal_string_content,
token.immediate(newline),
),
),
token.immediate("'''"),
$._multiline_literal_string_end,
),
integer: $ =>

69
src/grammar.json generated
View file

@ -39,8 +39,23 @@
]
},
"comment": {
"type": "PATTERN",
"value": "#.*"
"type": "TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "#"
},
{
"type": "REPEAT",
"content": {
"type": "PATTERN",
"value": "[^\\u0000-\\u0008\\u000a-\\u001f\\u007f]"
}
}
]
}
},
"table": {
"type": "SEQ",
@ -313,7 +328,7 @@
"type": "REPEAT1",
"content": {
"type": "PATTERN",
"value": "[^\\u0000-\\u001f\\u0022\\u005c\\u007f]"
"value": "[^\\u0000-\\u0008\\u000a-\\u001f\\u0022\\u005c\\u007f]"
}
}
},
@ -351,16 +366,13 @@
"type": "REPEAT1",
"content": {
"type": "PATTERN",
"value": "[^\\u0000-\\u001f\\u0022\\u005c\\u007f]"
"value": "[^\\u0000-\\u0008\\u000a-\\u001f\\u0022\\u005c\\u007f]"
}
}
},
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "\"{1,2}"
}
"type": "SYMBOL",
"name": "_multiline_basic_string_content"
},
{
"type": "IMMEDIATE_TOKEN",
@ -386,11 +398,8 @@
}
},
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "STRING",
"value": "\"\"\""
}
"type": "SYMBOL",
"name": "_multiline_basic_string_end"
}
]
},
@ -474,11 +483,8 @@
}
},
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "'{1,2}"
}
"type": "SYMBOL",
"name": "_multiline_literal_string_content"
},
{
"type": "IMMEDIATE_TOKEN",
@ -491,11 +497,8 @@
}
},
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "STRING",
"value": "'''"
}
"type": "SYMBOL",
"name": "_multiline_literal_string_end"
}
]
},
@ -563,7 +566,7 @@
},
{
"type": "PATTERN",
"value": "[+-]?(0|[1-9](_?[0-9])*)"
"value": "[+-]?[0-9](_?[0-9])*"
}
]
}
@ -806,6 +809,22 @@
{
"type": "SYMBOL",
"name": "_line_ending_or_eof"
},
{
"type": "SYMBOL",
"name": "_multiline_basic_string_content"
},
{
"type": "SYMBOL",
"name": "_multiline_basic_string_end"
},
{
"type": "SYMBOL",
"name": "_multiline_literal_string_content"
},
{
"type": "SYMBOL",
"name": "_multiline_literal_string_end"
}
],
"inline": [],

1692
src/parser.c generated

File diff suppressed because it is too large Load diff

View file

@ -6,7 +6,11 @@
#endif
enum TokenType {
LINE_ENDING_OR_EOF
LINE_ENDING_OR_EOF,
MULTILINE_BASIC_STRING_CONTENT,
MULTILINE_BASIC_STRING_END,
MULTILINE_LITERAL_STRING_CONTENT,
MULTILINE_LITERAL_STRING_END,
};
void *tree_sitter_toml_external_scanner_create() { return NULL; }
@ -14,28 +18,71 @@ void tree_sitter_toml_external_scanner_destroy(void *payload) {}
unsigned tree_sitter_toml_external_scanner_serialize(void *payload, char *buffer) { return 0; }
void tree_sitter_toml_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {}
bool tree_sitter_toml_external_scanner_scan_multiline_string_end(TSLexer *lexer, const bool *valid_symbols, int32_t delimiter, enum TokenType content_symbol, enum TokenType end_symbol) {
if (!valid_symbols[end_symbol] || lexer->lookahead != delimiter) {
return false;
}
lexer->advance(lexer, false);
lexer->mark_end(lexer);
if (lexer->lookahead != delimiter) {
lexer->result_symbol = content_symbol;
return true;
}
lexer->advance(lexer, false);
if (lexer->lookahead != delimiter) {
lexer->mark_end(lexer);
lexer->result_symbol = content_symbol;
return true;
}
lexer->advance(lexer, false);
if (lexer->lookahead != delimiter) {
lexer->mark_end(lexer);
lexer->result_symbol = end_symbol;
return true;
}
lexer->result_symbol = content_symbol;
return true;
}
bool tree_sitter_toml_external_scanner_scan(
void *payload,
TSLexer *lexer,
const bool *valid_symbols
) {
lexer->result_symbol = LINE_ENDING_OR_EOF;
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
lexer->advance(lexer, true);
}
if (lexer->lookahead == 0 || lexer->lookahead == '\n') {
if (
tree_sitter_toml_external_scanner_scan_multiline_string_end(lexer, valid_symbols, '"', MULTILINE_BASIC_STRING_CONTENT, MULTILINE_BASIC_STRING_END)
|| tree_sitter_toml_external_scanner_scan_multiline_string_end(lexer, valid_symbols, '\'', MULTILINE_LITERAL_STRING_CONTENT, MULTILINE_LITERAL_STRING_END)
) {
return true;
}
if (lexer->lookahead == '\r') {
lexer->advance(lexer, true);
if (lexer->lookahead == '\n') {
if (valid_symbols[LINE_ENDING_OR_EOF]) {
lexer->result_symbol = LINE_ENDING_OR_EOF;
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
lexer->advance(lexer, true);
}
if (lexer->lookahead == 0 || lexer->lookahead == '\n') {
return true;
}
if (lexer->lookahead == '\r') {
lexer->advance(lexer, true);
if (lexer->lookahead == '\n') {
return true;
}
}
}
return false;
}

1031
toml-spec/spec.md Normal file

File diff suppressed because it is too large Load diff