Merge pull request #37 from returntocorp/mj-single-quoted-strings

Distinguish 3 kinds of quoted strings (single, double, JSON)
This commit is contained in:
Camden Cheek 2023-07-06 08:25:44 -06:00 committed by GitHub
commit c0a9d694d9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 5111 additions and 4718 deletions

View file

@ -41,7 +41,7 @@ cmd []
(source_file (source_file
(cmd_instruction (cmd_instruction
(string_array))) (json_string_array)))
================== ==================
Run with shell array Run with shell array
@ -52,8 +52,8 @@ cmd ["echo", "test"]
--- ---
(source_file (source_file
(cmd_instruction (cmd_instruction
(string_array (json_string_array
(double_quoted_string) (json_string)
(double_quoted_string)))) (json_string))))

View file

@ -41,7 +41,7 @@ ENTRYPOINT []
(source_file (source_file
(entrypoint_instruction (entrypoint_instruction
(string_array))) (json_string_array)))
================== ==================
Run with shell array Run with shell array
@ -52,8 +52,7 @@ ENTRYPOINT ["echo", "test"]
--- ---
(source_file (source_file
(entrypoint_instruction (entrypoint_instruction
(string_array (json_string_array
(double_quoted_string) (json_string)
(double_quoted_string)))) (json_string))))

View file

@ -41,7 +41,7 @@ run []
(source_file (source_file
(run_instruction (run_instruction
(string_array))) (json_string_array)))
================== ==================
Run with shell array Run with shell array
@ -52,10 +52,10 @@ run ["echo", "test"]
--- ---
(source_file (source_file
(run_instruction (run_instruction
(string_array (json_string_array
(double_quoted_string) (json_string)
(double_quoted_string)))) (json_string))))
================== ==================
Run with options Run with options

View file

@ -8,7 +8,7 @@ SHELL ["powershell","-command"]
(source_file (source_file
(shell_instruction (shell_instruction
(string_array (json_string_array
(double_quoted_string) (json_string)
(double_quoted_string)))) (json_string))))

151
corpus/strings.txt Normal file
View file

@ -0,0 +1,151 @@
==========================
JSON string arrays
==========================
CMD ["a", "bcd"]
CMD ["\"\\"]
CMD ["\b\f\n\r\t\u0ABC\u12Cd"]
---
(source_file
(cmd_instruction
(json_string_array
(json_string)
(json_string)))
(cmd_instruction
(json_string_array
(json_string
(escape_sequence)
(escape_sequence))))
(cmd_instruction
(json_string_array
(json_string
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)))))
==========================
Double-quoted strings
==========================
ENV KEY "value"
ENV KEY "conti\
nue"
ENV KEY "quotes: \" '"
ENV KEY "backslash: \\"
ENV KEY "not escape sequences: \a \n"
ENV KEY "expansions: $FOO ${BAR}"
---
(source_file
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string)))
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string
(line_continuation))))
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string
(escape_sequence))))
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string
(escape_sequence))))
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string)))
(env_instruction
(env_pair
(unquoted_string)
(double_quoted_string
(expansion
(variable))
(expansion
(variable))))))
==========================
Single-quoted strings
==========================
ENV KEY 'value'
ENV KEY 'conti\
nue'
ENV KEY 'quotes: \' \''
ENV KEY 'backslash: \\'
ENV KEY 'not escape sequences: \a \n'
---
(source_file
(env_instruction
(env_pair
(unquoted_string)
(single_quoted_string)))
(env_instruction
(env_pair
(unquoted_string)
(single_quoted_string
(line_continuation))))
(env_instruction
(env_pair
(unquoted_string)
(single_quoted_string
(escape_sequence)
(escape_sequence))))
(env_instruction
(env_pair
(unquoted_string)
(single_quoted_string
(escape_sequence))))
(env_instruction
(env_pair
(unquoted_string)
(single_quoted_string))))
==========================
Unquoted strings
==========================
ENV KEY value
ENV KEY two\ words
ENV KEY conti\
nue
ENV KEY $FOO${BAR}baz
---
(source_file
(env_instruction
(env_pair
(unquoted_string)
(unquoted_string)))
(env_instruction
(env_pair
(unquoted_string)
(unquoted_string)))
(env_instruction
(env_pair
(unquoted_string)
(unquoted_string
(line_continuation))))
(env_instruction
(env_pair
(unquoted_string)
(unquoted_string
(expansion
(variable))
(expansion
(variable))))))

View file

@ -49,8 +49,8 @@ VOLUME ["/test/myvol"]
(source_file (source_file
(volume_instruction (volume_instruction
(string_array (json_string_array
(double_quoted_string)))) (json_string))))
================== ==================
Multiline array Multiline array
@ -63,7 +63,7 @@ VOLUME ["/test/myvol", \
(source_file (source_file
(volume_instruction (volume_instruction
(string_array (json_string_array
(double_quoted_string) (json_string)
(line_continuation) (line_continuation)
(double_quoted_string)))) (json_string))))

View file

@ -46,13 +46,13 @@ module.exports = grammar({
$.mount_param $.mount_param
) )
), ),
choice($.string_array, $.shell_command) choice($.json_string_array, $.shell_command)
), ),
cmd_instruction: ($) => cmd_instruction: ($) =>
seq( seq(
alias(/[cC][mM][dD]/, "CMD"), alias(/[cC][mM][dD]/, "CMD"),
choice($.string_array, $.shell_command) choice($.json_string_array, $.shell_command)
), ),
label_instruction: ($) => label_instruction: ($) =>
@ -93,14 +93,14 @@ module.exports = grammar({
entrypoint_instruction: ($) => entrypoint_instruction: ($) =>
seq( seq(
alias(/[eE][nN][tT][rR][yY][pP][oO][iI][nN][tT]/, "ENTRYPOINT"), alias(/[eE][nN][tT][rR][yY][pP][oO][iI][nN][tT]/, "ENTRYPOINT"),
choice($.string_array, $.shell_command) choice($.json_string_array, $.shell_command)
), ),
volume_instruction: ($) => volume_instruction: ($) =>
seq( seq(
alias(/[vV][oO][lL][uU][mM][eE]/, "VOLUME"), alias(/[vV][oO][lL][uU][mM][eE]/, "VOLUME"),
choice( choice(
$.string_array, $.json_string_array,
seq($.path, repeat(seq($._non_newline_whitespace, $.path))) seq($.path, repeat(seq($._non_newline_whitespace, $.path)))
) )
), ),
@ -144,7 +144,12 @@ module.exports = grammar({
optional( optional(
seq( seq(
token.immediate("="), token.immediate("="),
field("default", choice($.double_quoted_string, $.unquoted_string)) field("default",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
) )
) )
), ),
@ -171,7 +176,7 @@ module.exports = grammar({
), ),
shell_instruction: ($) => shell_instruction: ($) =>
seq(alias(/[sS][hH][eE][lL][lL]/, "SHELL"), $.string_array), seq(alias(/[sS][hH][eE][lL][lL]/, "SHELL"), $.json_string_array),
maintainer_instruction: ($) => maintainer_instruction: ($) =>
seq( seq(
@ -222,7 +227,12 @@ module.exports = grammar({
field("name", $._env_key), field("name", $._env_key),
token.immediate("="), token.immediate("="),
optional( optional(
field("value", choice($.double_quoted_string, $.unquoted_string)) field("value",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
) )
), ),
@ -230,7 +240,12 @@ module.exports = grammar({
seq( seq(
field("name", $._env_key), field("name", $._env_key),
token.immediate(/\s+/), token.immediate(/\s+/),
field("value", choice($.double_quoted_string, $.unquoted_string)) field("value",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
), ),
_env_key: ($) => _env_key: ($) =>
@ -242,7 +257,12 @@ module.exports = grammar({
seq( seq(
field("key", alias(/[-a-zA-Z0-9\._]+/, $.unquoted_string)), field("key", alias(/[-a-zA-Z0-9\._]+/, $.unquoted_string)),
token.immediate("="), token.immediate("="),
field("value", choice($.double_quoted_string, $.unquoted_string)) field("value",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
), ),
image_spec: ($) => image_spec: ($) =>
@ -311,15 +331,6 @@ module.exports = grammar({
repeat(choice(token.immediate(/[-a-zA-Z0-9_]+/), $._immediate_expansion)) repeat(choice(token.immediate(/[-a-zA-Z0-9_]+/), $._immediate_expansion))
), ),
string_array: ($) =>
seq(
"[",
optional(
seq($.double_quoted_string, repeat(seq(",", $.double_quoted_string)))
),
"]"
),
shell_command: ($) => shell_command: ($) =>
seq( seq(
repeat($._comment_line), repeat($._comment_line),
@ -359,41 +370,83 @@ module.exports = grammar({
_anon_comment: ($) => seq("#", /.*/), _anon_comment: ($) => seq("#", /.*/),
json_string_array: ($) =>
seq(
"[",
optional(
seq($.json_string, repeat(seq(",", $.json_string)))
),
"]"
),
// Note that JSON strings are different from the other double-quoted
// strings. They don't support $-expansions.
// Convenient reference: https://www.json.org/
json_string: ($) => seq(
'"',
repeat(
choice(
token.immediate(/[^"\\]+/),
alias($.json_escape_sequence, $.escape_sequence)
)
),
'"'
),
json_escape_sequence: ($) => token.immediate(
/\\(?:["\\/bfnrt]|u[0-9A-Fa-f]{4})/
),
double_quoted_string: ($) => double_quoted_string: ($) =>
seq( seq(
'"', '"',
repeat( repeat(
choice( choice(
token.immediate(/[^"\n\\\$]+/), token.immediate(/[^"\n\\\$]+/),
$.escape_sequence, alias($.double_quoted_escape_sequence, $.escape_sequence),
"\\",
$._immediate_expansion $._immediate_expansion
) )
), ),
'"' '"'
), ),
// same as double_quoted_string but without $-expansions:
single_quoted_string: ($) =>
seq(
"'",
repeat(
choice(
token.immediate(/[^'\n\\]+/),
alias($.single_quoted_escape_sequence, $.escape_sequence),
"\\",
)
),
"'"
),
unquoted_string: ($) => unquoted_string: ($) =>
repeat1( repeat1(
choice( choice(
token.immediate(/[^\s\n\"\\\$]+/), token.immediate(/[^\s\n\"'\\\$]+/),
token.immediate("\\ "), token.immediate("\\ "),
$._immediate_expansion $._immediate_expansion
) )
), ),
escape_sequence: ($) => double_quoted_escape_sequence: ($) => token.immediate(
token.immediate( choice(
seq( "\\\\",
"\\", "\\\""
choice( )
/[^xuU]/, ),
/\d{2,3}/,
/x[0-9a-fA-F]{2,}/, single_quoted_escape_sequence: ($) => token.immediate(
/u[0-9a-fA-F]{4}/, choice(
/U[0-9a-fA-F]{8}/ "\\\\",
) "\\'"
) )
), ),
_non_newline_whitespace: ($) => /[\t ]+/, _non_newline_whitespace: ($) => /[\t ]+/,

View file

@ -13,5 +13,19 @@
}, },
"devDependencies": { "devDependencies": {
"tree-sitter-cli": "^0.20.1" "tree-sitter-cli": "^0.20.1"
} },
"tree-sitter": [
{
"file-types": [
"Dockerfile",
"dockerfile",
"docker",
"Containerfile",
"container"
],
"highlights": [
"queries/highlights.scm"
]
}
]
} }

View file

@ -35,7 +35,11 @@
(image_digest (image_digest
"@" @punctuation.special)) "@" @punctuation.special))
(double_quoted_string) @string [
(double_quoted_string)
(single_quoted_string)
(json_string)
] @string
(expansion (expansion
[ [

View file

@ -200,7 +200,7 @@
"members": [ "members": [
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "string_array" "name": "json_string_array"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@ -227,7 +227,7 @@
"members": [ "members": [
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "string_array" "name": "json_string_array"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@ -432,7 +432,7 @@
"members": [ "members": [
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "string_array" "name": "json_string_array"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@ -459,7 +459,7 @@
"members": [ "members": [
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "string_array" "name": "json_string_array"
}, },
{ {
"type": "SEQ", "type": "SEQ",
@ -664,6 +664,10 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "double_quoted_string" "name": "double_quoted_string"
}, },
{
"type": "SYMBOL",
"name": "single_quoted_string"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "unquoted_string" "name": "unquoted_string"
@ -806,7 +810,7 @@
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "string_array" "name": "json_string_array"
} }
] ]
}, },
@ -999,6 +1003,10 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "double_quoted_string" "name": "double_quoted_string"
}, },
{
"type": "SYMBOL",
"name": "single_quoted_string"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "unquoted_string" "name": "unquoted_string"
@ -1041,6 +1049,10 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "double_quoted_string" "name": "double_quoted_string"
}, },
{
"type": "SYMBOL",
"name": "single_quoted_string"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "unquoted_string" "name": "unquoted_string"
@ -1122,6 +1134,10 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "double_quoted_string" "name": "double_quoted_string"
}, },
{
"type": "SYMBOL",
"name": "single_quoted_string"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "unquoted_string" "name": "unquoted_string"
@ -1440,52 +1456,6 @@
} }
] ]
}, },
"string_array": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "["
},
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "double_quoted_string"
},
{
"type": "REPEAT",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": ","
},
{
"type": "SYMBOL",
"name": "double_quoted_string"
}
]
}
}
]
},
{
"type": "BLANK"
}
]
},
{
"type": "STRING",
"value": "]"
}
]
},
"shell_command": { "shell_command": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
@ -1589,6 +1559,96 @@
} }
] ]
}, },
"json_string_array": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "["
},
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "json_string"
},
{
"type": "REPEAT",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": ","
},
{
"type": "SYMBOL",
"name": "json_string"
}
]
}
}
]
},
{
"type": "BLANK"
}
]
},
{
"type": "STRING",
"value": "]"
}
]
},
"json_string": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\""
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "[^\"\\\\]+"
}
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "json_escape_sequence"
},
"named": true,
"value": "escape_sequence"
}
]
}
},
{
"type": "STRING",
"value": "\""
}
]
},
"json_escape_sequence": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "\\\\(?:[\"\\\\/bfnrt]|u[0-9A-Fa-f]{4})"
}
},
"double_quoted_string": { "double_quoted_string": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
@ -1609,8 +1669,17 @@
} }
}, },
{ {
"type": "SYMBOL", "type": "ALIAS",
"name": "escape_sequence" "content": {
"type": "SYMBOL",
"name": "double_quoted_escape_sequence"
},
"named": true,
"value": "escape_sequence"
},
{
"type": "STRING",
"value": "\\"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@ -1625,6 +1694,47 @@
} }
] ]
}, },
"single_quoted_string": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "'"
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "[^'\\n\\\\]+"
}
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "single_quoted_escape_sequence"
},
"named": true,
"value": "escape_sequence"
},
{
"type": "STRING",
"value": "\\"
}
]
}
},
{
"type": "STRING",
"value": "'"
}
]
},
"unquoted_string": { "unquoted_string": {
"type": "REPEAT1", "type": "REPEAT1",
"content": { "content": {
@ -1634,7 +1744,7 @@
"type": "IMMEDIATE_TOKEN", "type": "IMMEDIATE_TOKEN",
"content": { "content": {
"type": "PATTERN", "type": "PATTERN",
"value": "[^\\s\\n\\\"\\\\\\$]+" "value": "[^\\s\\n\\\"'\\\\\\$]+"
} }
}, },
{ {
@ -1651,39 +1761,34 @@
] ]
} }
}, },
"escape_sequence": { "double_quoted_escape_sequence": {
"type": "IMMEDIATE_TOKEN", "type": "IMMEDIATE_TOKEN",
"content": { "content": {
"type": "SEQ", "type": "CHOICE",
"members": [ "members": [
{ {
"type": "STRING", "type": "STRING",
"value": "\\" "value": "\\\\"
}, },
{ {
"type": "CHOICE", "type": "STRING",
"members": [ "value": "\\\""
{ }
"type": "PATTERN", ]
"value": "[^xuU]" }
}, },
{ "single_quoted_escape_sequence": {
"type": "PATTERN", "type": "IMMEDIATE_TOKEN",
"value": "\\d{2,3}" "content": {
}, "type": "CHOICE",
{ "members": [
"type": "PATTERN", {
"value": "x[0-9a-fA-F]{2,}" "type": "STRING",
}, "value": "\\\\"
{ },
"type": "PATTERN", {
"value": "u[0-9a-fA-F]{4}" "type": "STRING",
}, "value": "\\'"
{
"type": "PATTERN",
"value": "U[0-9a-fA-F]{8}"
}
]
} }
] ]
} }

View file

@ -30,6 +30,10 @@
"type": "double_quoted_string", "type": "double_quoted_string",
"named": true "named": true
}, },
{
"type": "single_quoted_string",
"named": true
},
{ {
"type": "unquoted_string", "type": "unquoted_string",
"named": true "named": true
@ -57,11 +61,11 @@
"required": true, "required": true,
"types": [ "types": [
{ {
"type": "shell_command", "type": "json_string_array",
"named": true "named": true
}, },
{ {
"type": "string_array", "type": "shell_command",
"named": true "named": true
} }
] ]
@ -124,11 +128,11 @@
"required": true, "required": true,
"types": [ "types": [
{ {
"type": "shell_command", "type": "json_string_array",
"named": true "named": true
}, },
{ {
"type": "string_array", "type": "shell_command",
"named": true "named": true
} }
] ]
@ -171,6 +175,10 @@
"type": "double_quoted_string", "type": "double_quoted_string",
"named": true "named": true
}, },
{
"type": "single_quoted_string",
"named": true
},
{ {
"type": "unquoted_string", "type": "unquoted_string",
"named": true "named": true
@ -363,6 +371,36 @@
] ]
} }
}, },
{
"type": "json_string",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "escape_sequence",
"named": true
}
]
}
},
{
"type": "json_string_array",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "json_string",
"named": true
}
]
}
},
{ {
"type": "label_instruction", "type": "label_instruction",
"named": true, "named": true,
@ -400,6 +438,10 @@
"type": "double_quoted_string", "type": "double_quoted_string",
"named": true "named": true
}, },
{
"type": "single_quoted_string",
"named": true
},
{ {
"type": "unquoted_string", "type": "unquoted_string",
"named": true "named": true
@ -568,6 +610,10 @@
"multiple": true, "multiple": true,
"required": true, "required": true,
"types": [ "types": [
{
"type": "json_string_array",
"named": true
},
{ {
"type": "mount_param", "type": "mount_param",
"named": true "named": true
@ -579,10 +625,6 @@
{ {
"type": "shell_command", "type": "shell_command",
"named": true "named": true
},
{
"type": "string_array",
"named": true
} }
] ]
} }
@ -624,7 +666,22 @@
"required": true, "required": true,
"types": [ "types": [
{ {
"type": "string_array", "type": "json_string_array",
"named": true
}
]
}
},
{
"type": "single_quoted_string",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "escape_sequence",
"named": true "named": true
} }
] ]
@ -736,21 +793,6 @@
] ]
} }
}, },
{
"type": "string_array",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "double_quoted_string",
"named": true
}
]
}
},
{ {
"type": "unquoted_string", "type": "unquoted_string",
"named": true, "named": true,
@ -801,11 +843,11 @@
"required": true, "required": true,
"types": [ "types": [
{ {
"type": "path", "type": "json_string_array",
"named": true "named": true
}, },
{ {
"type": "string_array", "type": "path",
"named": true "named": true
} }
] ]
@ -842,6 +884,10 @@
"type": "$", "type": "$",
"named": false "named": false
}, },
{
"type": "'",
"named": false
},
{ {
"type": ",", "type": ",",
"named": false "named": false
@ -958,6 +1004,10 @@
"type": "[", "type": "[",
"named": false "named": false
}, },
{
"type": "\\",
"named": false
},
{ {
"type": "\\\n", "type": "\\\n",
"named": false "named": false

File diff suppressed because it is too large Load diff