fix: disallow invalid whitespaces between scalar components (#1)
This commit is contained in:
parent
46de5998d4
commit
e2b28db714
162
corpus/custom.txt
Normal file
162
corpus/custom.txt
Normal file
|
@ -0,0 +1,162 @@
|
|||
================================================================================
|
||||
VALID - empty file
|
||||
================================================================================
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(root)
|
||||
|
||||
================================================================================
|
||||
INVALID - key/value pair - multiline string for keys are not allowed
|
||||
================================================================================
|
||||
|
||||
"""
|
||||
invalid
|
||||
multiline
|
||||
basic
|
||||
key
|
||||
""" = false
|
||||
|
||||
'''
|
||||
invalid
|
||||
multiline
|
||||
literal
|
||||
key
|
||||
''' = false
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(root
|
||||
(pair
|
||||
(key) (ERROR) (boolean)
|
||||
)
|
||||
(pair
|
||||
(key) (ERROR) (boolean)
|
||||
)
|
||||
)
|
||||
|
||||
================================================================================
|
||||
INVALID - float - whitespaces between its components are not allowed
|
||||
================================================================================
|
||||
|
||||
invalid_fractional = 1 .0
|
||||
invalid_exponent = 1 e 2
|
||||
invalid_both = 1 .0 e 2
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(ERROR
|
||||
(dotted_key
|
||||
(key) (ERROR) (key)
|
||||
)
|
||||
)
|
||||
|
||||
================================================================================
|
||||
INVALID - offset date time - whitespaces between its components are not allowed
|
||||
================================================================================
|
||||
|
||||
invalid1 = 1979-05-27 07:32:00 Z
|
||||
invalid2 = 1979-05-27 T 07:32:00 Z
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(ERROR
|
||||
(key) (ERROR) (local_time) (local_time)
|
||||
)
|
||||
|
||||
================================================================================
|
||||
INVALID - local date time - whitespaces between its components are not allowed
|
||||
================================================================================
|
||||
|
||||
invalid1 = 1979-05-27 07:32:00
|
||||
invalid2 = 1979-05-27 T 07:32:00
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(root
|
||||
(pair
|
||||
(key) (ERROR) (local_time)
|
||||
)
|
||||
(pair
|
||||
(key) (ERROR) (local_time)
|
||||
)
|
||||
)
|
||||
|
||||
================================================================================
|
||||
INVALID - table - multiline string for header keys are not allowed
|
||||
================================================================================
|
||||
|
||||
["""
|
||||
invalid
|
||||
multiline
|
||||
basic
|
||||
key
|
||||
"""]
|
||||
|
||||
['''
|
||||
invalid
|
||||
multiline
|
||||
basic
|
||||
key
|
||||
''']
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(root
|
||||
(table
|
||||
(key) (ERROR)
|
||||
)
|
||||
(table
|
||||
(key) (ERROR)
|
||||
)
|
||||
)
|
||||
|
||||
================================================================================
|
||||
INVALID - inline table - newlines outside of pairs are not allowed
|
||||
================================================================================
|
||||
|
||||
key = {
|
||||
newline = true
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(root
|
||||
(pair
|
||||
(key)
|
||||
(inline_table
|
||||
(MISSING "}")
|
||||
)
|
||||
)
|
||||
(pair
|
||||
(key) (boolean)
|
||||
)
|
||||
(ERROR)
|
||||
)
|
||||
|
||||
================================================================================
|
||||
INVALID - array of tables - multiline string for header keys are not allowed
|
||||
================================================================================
|
||||
|
||||
[["""
|
||||
invalid
|
||||
multiline
|
||||
basic
|
||||
key
|
||||
"""]]
|
||||
|
||||
[['''
|
||||
invalid
|
||||
multiline
|
||||
basic
|
||||
key
|
||||
''']]
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(ERROR
|
||||
(ERROR
|
||||
(key)
|
||||
)
|
||||
(key)
|
||||
)
|
|
@ -41,7 +41,7 @@ key = # INVALID
|
|||
(key)
|
||||
(comment)
|
||||
(integer
|
||||
(MISSING _decimal_integer)
|
||||
(MISSING "integer_token1")
|
||||
)
|
||||
)
|
||||
)
|
||||
|
|
80
grammar.js
80
grammar.js
|
@ -2,9 +2,23 @@ const { Charset } = require("regexp-util");
|
|||
|
||||
const getInverseRegex = charset =>
|
||||
new RegExp(`[^${charset.toString().slice(1, -1)}]`);
|
||||
const concatRegex = (...regexes) =>
|
||||
new RegExp(regexes.reduce((a, b) => a.concat(`(${b.source})`), []).join(""));
|
||||
|
||||
const control_chars = new Charset([0x0, 0x1f], 0x7f);
|
||||
const newline_regex = /(\r?\n)+/;
|
||||
const newline = /(\r?\n)+/;
|
||||
|
||||
const decimal_integer = /[+-]?(0|[1-9](_?[0-9])*)/;
|
||||
const hexadecimal_integer = /0x[0-9a-fA-F](_?[0-9a-fA-F])*/;
|
||||
const octal_integer = /0o[0-7](_?[0-7])*/;
|
||||
const binary_integer = /0b[01](_?[01])*/;
|
||||
const float_fractional_part = /[.][0-9](_?[0-9])*/;
|
||||
const float_exponent_part = concatRegex(/[eE]/, decimal_integer);
|
||||
|
||||
const rfc3339_date = /([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])/;
|
||||
const rfc3339_delimiter = /[ tT]/;
|
||||
const rfc3339_time = /([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)([.][0-9]+)?/;
|
||||
const rfc3339_offset = /([zZ])|([+-]([01][0-9]|2[0-3]):[0-5][0-9])/;
|
||||
|
||||
module.exports = grammar({
|
||||
name: "toml",
|
||||
|
@ -27,7 +41,7 @@ module.exports = grammar({
|
|||
),
|
||||
|
||||
comment: $ => /#.*/,
|
||||
_newline: $ => newline_regex,
|
||||
_newline: $ => newline,
|
||||
_newline_or_eof: $ => choice($._newline, $._eof),
|
||||
|
||||
...table_like("table", "[", "]"),
|
||||
|
@ -45,8 +59,7 @@ module.exports = grammar({
|
|||
key: $ => choice($._bare_key, $._quoted_key),
|
||||
dotted_key: $ => seq(choice($.dotted_key, $.key), ".", $.key),
|
||||
_bare_key: $ => /[A-Za-z0-9_-]+/,
|
||||
_quoted_key: $ =>
|
||||
choice($._singleline_basic_string, $._singleline_literal_string),
|
||||
_quoted_key: $ => choice($._basic_string, $._literal_string),
|
||||
|
||||
_inline_value: $ =>
|
||||
choice(
|
||||
|
@ -64,12 +77,12 @@ module.exports = grammar({
|
|||
|
||||
string: $ =>
|
||||
choice(
|
||||
$._singleline_basic_string,
|
||||
$._basic_string,
|
||||
$._multiline_basic_string,
|
||||
$._singleline_literal_string,
|
||||
$._literal_string,
|
||||
$._multiline_literal_string
|
||||
),
|
||||
_singleline_basic_string: $ =>
|
||||
_basic_string: $ =>
|
||||
seq(
|
||||
'"',
|
||||
repeat(
|
||||
|
@ -91,7 +104,7 @@ module.exports = grammar({
|
|||
repeat1(getInverseRegex(control_chars.union('"', "\\")))
|
||||
),
|
||||
token.immediate(/"{1,2}/),
|
||||
token.immediate(newline_regex),
|
||||
token.immediate(newline),
|
||||
$.escape_sequence,
|
||||
alias($._escape_line_ending, $.escape_sequence)
|
||||
)
|
||||
|
@ -103,7 +116,7 @@ module.exports = grammar({
|
|||
seq("\\", choice(/[btnfr"\\]/, /u[0-9a-fA-F]{4}/, /U[0-9a-fA-F]{8}/))
|
||||
),
|
||||
_escape_line_ending: $ => token.immediate(seq("\\", /\r?\n/)),
|
||||
_singleline_literal_string: $ =>
|
||||
_literal_string: $ =>
|
||||
seq(
|
||||
"'",
|
||||
optional(
|
||||
|
@ -122,7 +135,7 @@ module.exports = grammar({
|
|||
repeat1(getInverseRegex(control_chars.union("'").subtract("\t")))
|
||||
),
|
||||
token.immediate(/'{1,2}/),
|
||||
token.immediate(newline_regex)
|
||||
token.immediate(newline)
|
||||
)
|
||||
),
|
||||
token.immediate("'''")
|
||||
|
@ -130,49 +143,44 @@ module.exports = grammar({
|
|||
|
||||
integer: $ =>
|
||||
choice(
|
||||
$._decimal_integer,
|
||||
$._hexadecimal_integer,
|
||||
$._octal_integer,
|
||||
$._binary_integer
|
||||
decimal_integer,
|
||||
hexadecimal_integer,
|
||||
octal_integer,
|
||||
binary_integer
|
||||
),
|
||||
_decimal_integer: $ => /[+-]?(0|[1-9](_?[0-9])*)/,
|
||||
_hexadecimal_integer: $ => /0x[0-9a-fA-F](_?[0-9a-fA-F])*/,
|
||||
_octal_integer: $ => /0o[0-7](_?[0-7])*/,
|
||||
_binary_integer: $ => /0b[01](_?[01])*/,
|
||||
|
||||
float: $ =>
|
||||
choice(
|
||||
seq(
|
||||
$._decimal_integer,
|
||||
decimal_integer,
|
||||
choice(
|
||||
seq($._float_fractional_part, optional($._float_exponent_part)),
|
||||
$._float_exponent_part
|
||||
seq(
|
||||
token.immediate(float_fractional_part),
|
||||
optional(token.immediate(float_exponent_part))
|
||||
),
|
||||
token.immediate(float_exponent_part)
|
||||
)
|
||||
),
|
||||
/[+-]?(inf|nan)/
|
||||
),
|
||||
_float_fractional_part: $ => /[.][0-9](_?[0-9])*/,
|
||||
_float_exponent_part: $ => seq(/[eE]/, $._decimal_integer),
|
||||
|
||||
boolean: $ => /true|false/,
|
||||
|
||||
offset_date_time: $ =>
|
||||
seq(
|
||||
$._rfc3339_date,
|
||||
$._rfc3339_delimiter,
|
||||
$._rfc3339_time,
|
||||
$._rfc3339_offset
|
||||
rfc3339_date,
|
||||
token.immediate(rfc3339_delimiter),
|
||||
token.immediate(rfc3339_time),
|
||||
token.immediate(rfc3339_offset)
|
||||
),
|
||||
local_date_time: $ =>
|
||||
seq($._rfc3339_date, $._rfc3339_delimiter, $._rfc3339_time),
|
||||
local_date: $ => $._rfc3339_date,
|
||||
local_time: $ => $._rfc3339_time,
|
||||
|
||||
_rfc3339_date: $ => /([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])/,
|
||||
_rfc3339_delimiter: $ => /[ tT]/,
|
||||
_rfc3339_time: $ =>
|
||||
/([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)([.][0-9]+)?/,
|
||||
_rfc3339_offset: $ => /([zZ])|([+-]([01][0-9]|2[0-3]):[0-5][0-9])/,
|
||||
seq(
|
||||
rfc3339_date,
|
||||
token.immediate(rfc3339_delimiter),
|
||||
token.immediate(rfc3339_time)
|
||||
),
|
||||
local_date: $ => rfc3339_date,
|
||||
local_time: $ => rfc3339_time,
|
||||
|
||||
array: $ =>
|
||||
seq(
|
||||
|
|
141
src/grammar.json
generated
141
src/grammar.json
generated
|
@ -423,11 +423,11 @@
|
|||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_singleline_basic_string"
|
||||
"name": "_basic_string"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_singleline_literal_string"
|
||||
"name": "_literal_string"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -481,7 +481,7 @@
|
|||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_singleline_basic_string"
|
||||
"name": "_basic_string"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
|
@ -489,7 +489,7 @@
|
|||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_singleline_literal_string"
|
||||
"name": "_literal_string"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
|
@ -497,7 +497,7 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"_singleline_basic_string": {
|
||||
"_basic_string": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
|
@ -641,7 +641,7 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"_singleline_literal_string": {
|
||||
"_literal_string": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
|
@ -727,38 +727,22 @@
|
|||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_decimal_integer"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_hexadecimal_integer"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_octal_integer"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_binary_integer"
|
||||
}
|
||||
]
|
||||
},
|
||||
"_decimal_integer": {
|
||||
"type": "PATTERN",
|
||||
"value": "[+-]?(0|[1-9](_?[0-9])*)"
|
||||
},
|
||||
"_hexadecimal_integer": {
|
||||
{
|
||||
"type": "PATTERN",
|
||||
"value": "0x[0-9a-fA-F](_?[0-9a-fA-F])*"
|
||||
},
|
||||
"_octal_integer": {
|
||||
{
|
||||
"type": "PATTERN",
|
||||
"value": "0o[0-7](_?[0-7])*"
|
||||
},
|
||||
"_binary_integer": {
|
||||
{
|
||||
"type": "PATTERN",
|
||||
"value": "0b[01](_?[01])*"
|
||||
}
|
||||
]
|
||||
},
|
||||
"float": {
|
||||
"type": "CHOICE",
|
||||
|
@ -767,8 +751,8 @@
|
|||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_decimal_integer"
|
||||
"type": "PATTERN",
|
||||
"value": "[+-]?(0|[1-9](_?[0-9])*)"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
|
@ -777,15 +761,21 @@
|
|||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_float_fractional_part"
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[.][0-9](_?[0-9])*"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_float_exponent_part"
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "([eE])([+-]?(0|[1-9](_?[0-9])*))"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
|
@ -795,8 +785,11 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_float_exponent_part"
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "([eE])([+-]?(0|[1-9](_?[0-9])*))"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -808,23 +801,6 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"_float_fractional_part": {
|
||||
"type": "PATTERN",
|
||||
"value": "[.][0-9](_?[0-9])*"
|
||||
},
|
||||
"_float_exponent_part": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "PATTERN",
|
||||
"value": "[eE]"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_decimal_integer"
|
||||
}
|
||||
]
|
||||
},
|
||||
"boolean": {
|
||||
"type": "PATTERN",
|
||||
"value": "true|false"
|
||||
|
@ -833,20 +809,29 @@
|
|||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_rfc3339_date"
|
||||
"type": "PATTERN",
|
||||
"value": "([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_rfc3339_delimiter"
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[ tT]"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_rfc3339_time"
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)([.][0-9]+)?"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_rfc3339_offset"
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "([zZ])|([+-]([01][0-9]|2[0-3]):[0-5][0-9])"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -854,43 +839,33 @@
|
|||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_rfc3339_date"
|
||||
"type": "PATTERN",
|
||||
"value": "([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_rfc3339_delimiter"
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[ tT]"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_rfc3339_time"
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)([.][0-9]+)?"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"local_date": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_rfc3339_date"
|
||||
},
|
||||
"local_time": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_rfc3339_time"
|
||||
},
|
||||
"_rfc3339_date": {
|
||||
"type": "PATTERN",
|
||||
"value": "([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])"
|
||||
},
|
||||
"_rfc3339_delimiter": {
|
||||
"type": "PATTERN",
|
||||
"value": "[ tT]"
|
||||
},
|
||||
"_rfc3339_time": {
|
||||
"local_time": {
|
||||
"type": "PATTERN",
|
||||
"value": "([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)([.][0-9]+)?"
|
||||
},
|
||||
"_rfc3339_offset": {
|
||||
"type": "PATTERN",
|
||||
"value": "([zZ])|([+-]([01][0-9]|2[0-3]):[0-5][0-9])"
|
||||
},
|
||||
"array": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
|
|
9
src/node-types.json
generated
9
src/node-types.json
generated
|
@ -119,11 +119,6 @@
|
|||
"named": true,
|
||||
"fields": {}
|
||||
},
|
||||
{
|
||||
"type": "local_time",
|
||||
"named": true,
|
||||
"fields": {}
|
||||
},
|
||||
{
|
||||
"type": "offset_date_time",
|
||||
"named": true,
|
||||
|
@ -340,6 +335,10 @@
|
|||
"type": "boolean",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "local_time",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": ",",
|
||||
"named": false
|
||||
|
|
5188
src/parser.c
generated
5188
src/parser.c
generated
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue