Disallow whitespace in some multi-fragment constructs.

This is more correct and more robust when rules are reordered, see
https://github.com/camdencheek/tree-sitter-dockerfile/issues/18 and
https://github.com/camdencheek/tree-sitter-dockerfile/issues/19
This commit is contained in:
Martin Jambon 2022-04-04 19:08:03 -07:00
parent 189b6b1f86
commit d8defd6796
4 changed files with 226 additions and 14 deletions

View file

@ -86,3 +86,111 @@ FROM --platform=linux/arm64 alpine-${VERSION}-z
(image_name (image_name
(expansion (expansion
(variable)))))) (variable))))))
======================================================
No spaces in image name
======================================================
FROM a b c
---
(source_file
(ERROR
(from_instruction
(image_spec
(image_name)))))
======================================================
No spaces in image name before expansion
======================================================
FROM a $b
---
(source_file
(from_instruction
(ERROR)
(image_spec
(image_name
(expansion
(variable))))))
======================================================
No spaces in image name expansion
======================================================
FROM $ X
---
(source_file
(ERROR))
======================================================
No spaces in image tag expansion
======================================================
FROM a: $B
---
(source_file
(from_instruction
(ERROR
(image_name))
(image_spec
(image_name
(expansion
(variable))))))
======================================================
No spaces in image digest
======================================================
FROM a:b@ $c
---
(source_file
(from_instruction
(ERROR
(image_name)
(image_tag))
(image_spec
(image_name
(expansion
(variable))))))
======================================================
No spaces in image alias
======================================================
FROM a AS b c
---
(source_file
(ERROR
(from_instruction
(image_spec
(image_name))
(image_alias))))
======================================================
No spaces in image alias expansion
======================================================
FROM a AS b $C
---
(source_file
(from_instruction
(image_spec
(image_name))
(ERROR)
(image_alias
(expansion
(variable)))))

View file

@ -19,3 +19,55 @@ STOPSIGNAL SIGKILL
(source_file (source_file
(stopsignal_instruction)) (stopsignal_instruction))
=========================================================
Signal value with expansions
=========================================================
STOPSIGNAL A$BC${DE}F
---
(source_file
(stopsignal_instruction
(expansion
(variable))
(expansion
(variable))))
=========================================================
No space in signal value
=========================================================
STOPSIGNAL A B
---
(source_file
(ERROR
(stopsignal_instruction)))
=========================================================
No space in signal value before expansion
=========================================================
STOPSIGNAL A $B
---
(source_file
(stopsignal_instruction
(ERROR)
(expansion
(variable))))
=========================================================
No space in signal value within expansion
=========================================================
STOPSIGNAL A$ B
---
(source_file
(ERROR))

View file

@ -35,3 +35,15 @@ USER 1004:1004
(user_instruction (user_instruction
user: (unquoted_string) user: (unquoted_string)
group: (unquoted_string))) group: (unquoted_string)))
===========================
No spaces in user:group
===========================
USER a: b
---
(source_file
(ERROR
(unquoted_string)))

View file

@ -106,13 +106,27 @@ module.exports = grammar({
optional( optional(
seq( seq(
token.immediate(":"), token.immediate(":"),
field("group", alias($._user_name_or_group, $.unquoted_string)) field("group",
alias($._immediate_user_name_or_group, $.unquoted_string))
) )
) )
), ),
_user_name_or_group: ($) => _user_name_or_group: ($) =>
repeat1(choice(/([a-z][-a-z0-9_]*|[0-9]+)/, $.expansion)), seq(
choice(/([a-z][-a-z0-9_]*|[0-9]+)/, $.expansion),
repeat($._immediate_user_name_or_group_fragment)
),
// same as _user_name_or_group but sticks to previous token
_immediate_user_name_or_group: ($) =>
repeat1($._immediate_user_name_or_group_fragment),
_immediate_user_name_or_group_fragment: ($) =>
choice(
token.immediate(/([a-z][-a-z0-9_]*|[0-9]+)/),
$._immediate_expansion
),
workdir_instruction: ($) => workdir_instruction: ($) =>
seq(alias(/[wW][oO][rR][kK][dD][iI][rR]/, "WORKDIR"), $.path), seq(alias(/[wW][oO][rR][kK][dD][iI][rR]/, "WORKDIR"), $.path),
@ -138,7 +152,11 @@ module.exports = grammar({
$._stopsignal_value $._stopsignal_value
), ),
_stopsignal_value: ($) => repeat1(choice(/[A-Z0-9]+/, $.expansion)), _stopsignal_value: ($) =>
seq(
choice(/[A-Z0-9]+/, $.expansion),
repeat(choice(token.immediate(/[A-Z0-9]+/), $._immediate_expansion))
),
healthcheck_instruction: ($) => healthcheck_instruction: ($) =>
seq( seq(
@ -170,13 +188,28 @@ module.exports = grammar({
/[^-\s\$]/, // cannot start with a '-' to avoid conflicts with params /[^-\s\$]/, // cannot start with a '-' to avoid conflicts with params
$.expansion $.expansion
), ),
repeat(choice(/[^\s\$]+/, $.expansion)) repeat(choice(token.immediate(/[^\s\$]+/), $._immediate_expansion))
), ),
expansion: ($) => expansion: $ =>
seq("$", choice($.variable, seq("{", alias(/[^\}]+/, $.variable), "}"))), seq("$", $._expansion_body),
variable: ($) => /[a-zA-Z][a-zA-Z0-9_]*/, // we have 2 rules b/c aliases don't work as expected on seq() directly
_immediate_expansion: $ => alias($._imm_expansion, $.expansion),
_imm_expansion: $ =>
seq(token.immediate("$"), $._expansion_body),
_expansion_body: $ =>
choice(
$.variable,
seq(
token.immediate("{"),
alias(token.immediate(/[^\}]+/), $.variable),
token.immediate("}")
)
),
variable: ($) => token.immediate(/[a-zA-Z][a-zA-Z0-9_]*/),
env_pair: ($) => env_pair: ($) =>
seq( seq(
@ -218,19 +251,19 @@ module.exports = grammar({
image_name: ($) => image_name: ($) =>
seq( seq(
choice(/[^@:\s\$-]/, $.expansion), choice(/[^@:\s\$-]/, $.expansion),
repeat(choice(/[^@:\s\$]+/, $.expansion)) repeat(choice(token.immediate(/[^@:\s\$]+/), $._immediate_expansion))
), ),
image_tag: ($) => image_tag: ($) =>
seq( seq(
token.immediate(":"), token.immediate(":"),
repeat1(choice(token.immediate(/[^@\s\$]+/), $.expansion)) repeat1(choice(token.immediate(/[^@\s\$]+/), $._immediate_expansion))
), ),
image_digest: ($) => image_digest: ($) =>
seq( seq(
token.immediate("@"), token.immediate("@"),
repeat1(choice(token.immediate(/[a-zA-Z0-9:]+/), $.expansion)) repeat1(choice(token.immediate(/[a-zA-Z0-9:]+/), $._immediate_expansion))
), ),
param: ($) => param: ($) =>
@ -241,7 +274,10 @@ module.exports = grammar({
field("value", token.immediate(/[^\s]+/)) field("value", token.immediate(/[^\s]+/))
), ),
image_alias: ($) => repeat1(choice(/[-a-zA-Z0-9_]+/, $.expansion)), image_alias: ($) => seq(
choice(/[-a-zA-Z0-9_]+/, $.expansion),
repeat(choice(token.immediate(/[-a-zA-Z0-9_]+/), $._immediate_expansion))
),
string_array: ($) => string_array: ($) =>
seq( seq(
@ -278,7 +314,11 @@ module.exports = grammar({
seq( seq(
'"', '"',
repeat( repeat(
choice(token.immediate(/[^"\n\\\$]+/), $.escape_sequence, $.expansion) choice(
token.immediate(/[^"\n\\\$]+/),
$.escape_sequence,
$._immediate_expansion
)
), ),
'"' '"'
), ),
@ -288,7 +328,7 @@ module.exports = grammar({
choice( choice(
token.immediate(/[^\s\n\"\\\$]+/), token.immediate(/[^\s\n\"\\\$]+/),
token.immediate("\\ "), token.immediate("\\ "),
$.expansion $._immediate_expansion
) )
), ),