Merge pull request #37 from returntocorp/mj-single-quoted-strings

Distinguish 3 kinds of quoted strings (single, double, JSON)
This commit is contained in:
Camden Cheek 2023-07-06 08:25:44 -06:00 committed by GitHub
commit c0a9d694d9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 5111 additions and 4718 deletions

View file

@ -41,7 +41,7 @@ cmd []
(source_file
(cmd_instruction
(string_array)))
(json_string_array)))
==================
Run with shell array
@ -52,8 +52,8 @@ cmd ["echo", "test"]
---
(source_file
(cmd_instruction
(string_array
(double_quoted_string)
(double_quoted_string))))
(cmd_instruction
(json_string_array
(json_string)
(json_string))))

View file

@ -41,7 +41,7 @@ ENTRYPOINT []
(source_file
(entrypoint_instruction
(string_array)))
(json_string_array)))
==================
Run with shell array
@ -52,8 +52,7 @@ ENTRYPOINT ["echo", "test"]
---
(source_file
(entrypoint_instruction
(string_array
(double_quoted_string)
(double_quoted_string))))
(entrypoint_instruction
(json_string_array
(json_string)
(json_string))))

View file

@ -41,7 +41,7 @@ run []
(source_file
(run_instruction
(string_array)))
(json_string_array)))
==================
Run with shell array
@ -52,10 +52,10 @@ run ["echo", "test"]
---
(source_file
(run_instruction
(string_array
(double_quoted_string)
(double_quoted_string))))
(run_instruction
(json_string_array
(json_string)
(json_string))))
==================
Run with options

View file

@ -8,7 +8,7 @@ SHELL ["powershell","-command"]
(source_file
(shell_instruction
(string_array
(double_quoted_string)
(double_quoted_string))))
(json_string_array
(json_string)
(json_string))))

151
corpus/strings.txt Normal file
View file

@ -0,0 +1,151 @@
==========================
JSON string arrays
==========================
CMD ["a", "bcd"]
CMD ["\"\\"]
CMD ["\b\f\n\r\t\u0ABC\u12Cd"]
---
(source_file
(cmd_instruction
(json_string_array
(json_string)
(json_string)))
(cmd_instruction
(json_string_array
(json_string
(escape_sequence)
(escape_sequence))))
(cmd_instruction
(json_string_array
(json_string
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)))))
==========================
Double-quoted strings
==========================
ENV KEY "value"
ENV KEY "conti\
nue"
ENV KEY "quotes: \" '"
ENV KEY "backslash: \\"
ENV KEY "not escape sequences: \a \n"
ENV KEY "expansions: $FOO ${BAR}"
---
(source_file
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string)))
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string
(line_continuation))))
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string
(escape_sequence))))
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string
(escape_sequence))))
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string)))
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string
(expansion
(variable))
(expansion
(variable))))))
==========================
Single-quoted strings
==========================
ENV KEY 'value'
ENV KEY 'conti\
nue'
ENV KEY 'quotes: \' \''
ENV KEY 'backslash: \\'
ENV KEY 'not escape sequences: \a \n'
---
(source_file
(env_instruction
(env_pair
(unquoted_string)
(single_quoted_string)))
(env_instruction
(env_pair
(unquoted_string)
(single_quoted_string
(line_continuation))))
(env_instruction
(env_pair
(unquoted_string)
(single_quoted_string
(escape_sequence)
(escape_sequence))))
(env_instruction
(env_pair
(unquoted_string)
(single_quoted_string
(escape_sequence))))
(env_instruction
(env_pair
(unquoted_string)
(single_quoted_string))))
==========================
Unquoted strings
==========================
ENV KEY value
ENV KEY two\ words
ENV KEY conti\
nue
ENV KEY $FOO${BAR}baz
---
(source_file
(env_instruction
(env_pair
(unquoted_string)
(unquoted_string)))
(env_instruction
(env_pair
(unquoted_string)
(unquoted_string)))
(env_instruction
(env_pair
(unquoted_string)
(unquoted_string
(line_continuation))))
(env_instruction
(env_pair
(unquoted_string)
(unquoted_string
(expansion
(variable))
(expansion
(variable))))))

View file

@ -49,8 +49,8 @@ VOLUME ["/test/myvol"]
(source_file
(volume_instruction
(string_array
(double_quoted_string))))
(json_string_array
(json_string))))
==================
Multiline array
@ -63,7 +63,7 @@ VOLUME ["/test/myvol", \
(source_file
(volume_instruction
(string_array
(double_quoted_string)
(json_string_array
(json_string)
(line_continuation)
(double_quoted_string))))
(json_string))))

View file

@ -46,13 +46,13 @@ module.exports = grammar({
$.mount_param
)
),
choice($.string_array, $.shell_command)
choice($.json_string_array, $.shell_command)
),
cmd_instruction: ($) =>
seq(
alias(/[cC][mM][dD]/, "CMD"),
choice($.string_array, $.shell_command)
choice($.json_string_array, $.shell_command)
),
label_instruction: ($) =>
@ -93,14 +93,14 @@ module.exports = grammar({
entrypoint_instruction: ($) =>
seq(
alias(/[eE][nN][tT][rR][yY][pP][oO][iI][nN][tT]/, "ENTRYPOINT"),
choice($.string_array, $.shell_command)
choice($.json_string_array, $.shell_command)
),
volume_instruction: ($) =>
seq(
alias(/[vV][oO][lL][uU][mM][eE]/, "VOLUME"),
choice(
$.string_array,
$.json_string_array,
seq($.path, repeat(seq($._non_newline_whitespace, $.path)))
)
),
@ -144,7 +144,12 @@ module.exports = grammar({
optional(
seq(
token.immediate("="),
field("default", choice($.double_quoted_string, $.unquoted_string))
field("default",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
)
)
),
@ -171,7 +176,7 @@ module.exports = grammar({
),
shell_instruction: ($) =>
seq(alias(/[sS][hH][eE][lL][lL]/, "SHELL"), $.string_array),
seq(alias(/[sS][hH][eE][lL][lL]/, "SHELL"), $.json_string_array),
maintainer_instruction: ($) =>
seq(
@ -222,7 +227,12 @@ module.exports = grammar({
field("name", $._env_key),
token.immediate("="),
optional(
field("value", choice($.double_quoted_string, $.unquoted_string))
field("value",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
)
),
@ -230,7 +240,12 @@ module.exports = grammar({
seq(
field("name", $._env_key),
token.immediate(/\s+/),
field("value", choice($.double_quoted_string, $.unquoted_string))
field("value",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
),
_env_key: ($) =>
@ -242,7 +257,12 @@ module.exports = grammar({
seq(
field("key", alias(/[-a-zA-Z0-9\._]+/, $.unquoted_string)),
token.immediate("="),
field("value", choice($.double_quoted_string, $.unquoted_string))
field("value",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
),
image_spec: ($) =>
@ -311,15 +331,6 @@ module.exports = grammar({
repeat(choice(token.immediate(/[-a-zA-Z0-9_]+/), $._immediate_expansion))
),
string_array: ($) =>
seq(
"[",
optional(
seq($.double_quoted_string, repeat(seq(",", $.double_quoted_string)))
),
"]"
),
shell_command: ($) =>
seq(
repeat($._comment_line),
@ -359,41 +370,83 @@ module.exports = grammar({
_anon_comment: ($) => seq("#", /.*/),
json_string_array: ($) =>
seq(
"[",
optional(
seq($.json_string, repeat(seq(",", $.json_string)))
),
"]"
),
// Note that JSON strings are different from the other double-quoted
// strings. They don't support $-expansions.
// Convenient reference: https://www.json.org/
json_string: ($) => seq(
'"',
repeat(
choice(
token.immediate(/[^"\\]+/),
alias($.json_escape_sequence, $.escape_sequence)
)
),
'"'
),
json_escape_sequence: ($) => token.immediate(
/\\(?:["\\/bfnrt]|u[0-9A-Fa-f]{4})/
),
double_quoted_string: ($) =>
seq(
'"',
repeat(
choice(
token.immediate(/[^"\n\\\$]+/),
$.escape_sequence,
alias($.double_quoted_escape_sequence, $.escape_sequence),
"\\",
$._immediate_expansion
)
),
'"'
),
// same as double_quoted_string but without $-expansions:
single_quoted_string: ($) =>
seq(
"'",
repeat(
choice(
token.immediate(/[^'\n\\]+/),
alias($.single_quoted_escape_sequence, $.escape_sequence),
"\\",
)
),
"'"
),
unquoted_string: ($) =>
repeat1(
choice(
token.immediate(/[^\s\n\"\\\$]+/),
token.immediate(/[^\s\n\"'\\\$]+/),
token.immediate("\\ "),
$._immediate_expansion
)
),
escape_sequence: ($) =>
token.immediate(
seq(
"\\",
choice(
/[^xuU]/,
/\d{2,3}/,
/x[0-9a-fA-F]{2,}/,
/u[0-9a-fA-F]{4}/,
/U[0-9a-fA-F]{8}/
)
)
),
double_quoted_escape_sequence: ($) => token.immediate(
choice(
"\\\\",
"\\\""
)
),
single_quoted_escape_sequence: ($) => token.immediate(
choice(
"\\\\",
"\\'"
)
),
_non_newline_whitespace: ($) => /[\t ]+/,

View file

@ -13,5 +13,19 @@
},
"devDependencies": {
"tree-sitter-cli": "^0.20.1"
}
},
"tree-sitter": [
{
"file-types": [
"Dockerfile",
"dockerfile",
"docker",
"Containerfile",
"container"
],
"highlights": [
"queries/highlights.scm"
]
}
]
}

View file

@ -35,7 +35,11 @@
(image_digest
"@" @punctuation.special))
(double_quoted_string) @string
[
(double_quoted_string)
(single_quoted_string)
(json_string)
] @string
(expansion
[

View file

@ -200,7 +200,7 @@
"members": [
{
"type": "SYMBOL",
"name": "string_array"
"name": "json_string_array"
},
{
"type": "SYMBOL",
@ -227,7 +227,7 @@
"members": [
{
"type": "SYMBOL",
"name": "string_array"
"name": "json_string_array"
},
{
"type": "SYMBOL",
@ -432,7 +432,7 @@
"members": [
{
"type": "SYMBOL",
"name": "string_array"
"name": "json_string_array"
},
{
"type": "SYMBOL",
@ -459,7 +459,7 @@
"members": [
{
"type": "SYMBOL",
"name": "string_array"
"name": "json_string_array"
},
{
"type": "SEQ",
@ -664,6 +664,10 @@
"type": "SYMBOL",
"name": "double_quoted_string"
},
{
"type": "SYMBOL",
"name": "single_quoted_string"
},
{
"type": "SYMBOL",
"name": "unquoted_string"
@ -806,7 +810,7 @@
},
{
"type": "SYMBOL",
"name": "string_array"
"name": "json_string_array"
}
]
},
@ -999,6 +1003,10 @@
"type": "SYMBOL",
"name": "double_quoted_string"
},
{
"type": "SYMBOL",
"name": "single_quoted_string"
},
{
"type": "SYMBOL",
"name": "unquoted_string"
@ -1041,6 +1049,10 @@
"type": "SYMBOL",
"name": "double_quoted_string"
},
{
"type": "SYMBOL",
"name": "single_quoted_string"
},
{
"type": "SYMBOL",
"name": "unquoted_string"
@ -1122,6 +1134,10 @@
"type": "SYMBOL",
"name": "double_quoted_string"
},
{
"type": "SYMBOL",
"name": "single_quoted_string"
},
{
"type": "SYMBOL",
"name": "unquoted_string"
@ -1440,52 +1456,6 @@
}
]
},
"string_array": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "["
},
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "double_quoted_string"
},
{
"type": "REPEAT",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": ","
},
{
"type": "SYMBOL",
"name": "double_quoted_string"
}
]
}
}
]
},
{
"type": "BLANK"
}
]
},
{
"type": "STRING",
"value": "]"
}
]
},
"shell_command": {
"type": "SEQ",
"members": [
@ -1589,6 +1559,96 @@
}
]
},
"json_string_array": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "["
},
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "json_string"
},
{
"type": "REPEAT",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": ","
},
{
"type": "SYMBOL",
"name": "json_string"
}
]
}
}
]
},
{
"type": "BLANK"
}
]
},
{
"type": "STRING",
"value": "]"
}
]
},
"json_string": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\""
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "[^\"\\\\]+"
}
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "json_escape_sequence"
},
"named": true,
"value": "escape_sequence"
}
]
}
},
{
"type": "STRING",
"value": "\""
}
]
},
"json_escape_sequence": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "\\\\(?:[\"\\\\/bfnrt]|u[0-9A-Fa-f]{4})"
}
},
"double_quoted_string": {
"type": "SEQ",
"members": [
@ -1609,8 +1669,17 @@
}
},
{
"type": "SYMBOL",
"name": "escape_sequence"
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "double_quoted_escape_sequence"
},
"named": true,
"value": "escape_sequence"
},
{
"type": "STRING",
"value": "\\"
},
{
"type": "SYMBOL",
@ -1625,6 +1694,47 @@
}
]
},
"single_quoted_string": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "'"
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "[^'\\n\\\\]+"
}
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "single_quoted_escape_sequence"
},
"named": true,
"value": "escape_sequence"
},
{
"type": "STRING",
"value": "\\"
}
]
}
},
{
"type": "STRING",
"value": "'"
}
]
},
"unquoted_string": {
"type": "REPEAT1",
"content": {
@ -1634,7 +1744,7 @@
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "[^\\s\\n\\\"\\\\\\$]+"
"value": "[^\\s\\n\\\"'\\\\\\$]+"
}
},
{
@ -1651,39 +1761,34 @@
]
}
},
"escape_sequence": {
"double_quoted_escape_sequence": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "SEQ",
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "\\"
"value": "\\\\"
},
{
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^xuU]"
},
{
"type": "PATTERN",
"value": "\\d{2,3}"
},
{
"type": "PATTERN",
"value": "x[0-9a-fA-F]{2,}"
},
{
"type": "PATTERN",
"value": "u[0-9a-fA-F]{4}"
},
{
"type": "PATTERN",
"value": "U[0-9a-fA-F]{8}"
}
]
"type": "STRING",
"value": "\\\""
}
]
}
},
"single_quoted_escape_sequence": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "\\\\"
},
{
"type": "STRING",
"value": "\\'"
}
]
}

View file

@ -30,6 +30,10 @@
"type": "double_quoted_string",
"named": true
},
{
"type": "single_quoted_string",
"named": true
},
{
"type": "unquoted_string",
"named": true
@ -57,11 +61,11 @@
"required": true,
"types": [
{
"type": "shell_command",
"type": "json_string_array",
"named": true
},
{
"type": "string_array",
"type": "shell_command",
"named": true
}
]
@ -124,11 +128,11 @@
"required": true,
"types": [
{
"type": "shell_command",
"type": "json_string_array",
"named": true
},
{
"type": "string_array",
"type": "shell_command",
"named": true
}
]
@ -171,6 +175,10 @@
"type": "double_quoted_string",
"named": true
},
{
"type": "single_quoted_string",
"named": true
},
{
"type": "unquoted_string",
"named": true
@ -363,6 +371,36 @@
]
}
},
{
"type": "json_string",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "escape_sequence",
"named": true
}
]
}
},
{
"type": "json_string_array",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "json_string",
"named": true
}
]
}
},
{
"type": "label_instruction",
"named": true,
@ -400,6 +438,10 @@
"type": "double_quoted_string",
"named": true
},
{
"type": "single_quoted_string",
"named": true
},
{
"type": "unquoted_string",
"named": true
@ -568,6 +610,10 @@
"multiple": true,
"required": true,
"types": [
{
"type": "json_string_array",
"named": true
},
{
"type": "mount_param",
"named": true
@ -579,10 +625,6 @@
{
"type": "shell_command",
"named": true
},
{
"type": "string_array",
"named": true
}
]
}
@ -624,7 +666,22 @@
"required": true,
"types": [
{
"type": "string_array",
"type": "json_string_array",
"named": true
}
]
}
},
{
"type": "single_quoted_string",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "escape_sequence",
"named": true
}
]
@ -736,21 +793,6 @@
]
}
},
{
"type": "string_array",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "double_quoted_string",
"named": true
}
]
}
},
{
"type": "unquoted_string",
"named": true,
@ -801,11 +843,11 @@
"required": true,
"types": [
{
"type": "path",
"type": "json_string_array",
"named": true
},
{
"type": "string_array",
"type": "path",
"named": true
}
]
@ -842,6 +884,10 @@
"type": "$",
"named": false
},
{
"type": "'",
"named": false
},
{
"type": ",",
"named": false
@ -958,6 +1004,10 @@
"type": "[",
"named": false
},
{
"type": "\\",
"named": false
},
{
"type": "\\\n",
"named": false

File diff suppressed because it is too large Load diff