Merge pull request #56 from finalfantasia/master

[clojure] Improve the regular expressions for various symbols
pull/2748/head
Alexandru Dima 6 years ago committed by GitHub
commit f9595842a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -8,7 +8,7 @@ import { registerLanguage } from '../_.contribution';
registerLanguage({ registerLanguage({
id: 'clojure', id: 'clojure',
extensions: ['.clj', '.clojure'], extensions: ['.clj', '.cljs', '.cljc', '.edn'],
aliases: ['clojure', 'Clojure'], aliases: ['clojure', 'Clojure'],
loader: () => import('./clojure') loader: () => import('./clojure')
}); });

@ -754,13 +754,72 @@ testTokenization('clojure', [
], 'string'), ], 'string'),
// strings // strings
createTestCases([ [
'\"I\'m a little teapot.\"', {
'\"I\'m a \\\"little\\\" teapot.\"', line: '"I\'m a little teapot."',
'\"I\'m', // this is tokens: [
'a little', // a multi-line {startIndex: 0, type: 'string.clj'},
'teapot.\"' // string ]
], 'string'), },
{
line: '"I\'m a \\"little\\" teapot."',
tokens: [
{startIndex: 0, type: 'string.clj'},
{startIndex: 7, type: 'string.escape.clj'},
{startIndex: 9, type: 'string.clj'},
{startIndex: 15, type: 'string.escape.clj'},
{startIndex: 17, type: 'string.clj'},
]
}
],
// multi-line strings
[
{
line: '"I\'m',
tokens: [
{startIndex: 0, type: 'string.clj'},
]
},
{
line: '\\"a little\\"',
tokens: [
{startIndex: 0, type: 'string.escape.clj'},
{startIndex: 2, type: 'string.clj'},
{startIndex: 10, type: 'string.escape.clj'},
]
},
{
line: 'teapot."',
tokens: [
{startIndex: 0, type: 'string.clj'},
]
}
],
// strings with other escapes in them (\" \' \\ \b \f \n \r \t)
[{
line: '"the escape \\" \\\' \\\\ \\b \\f \\n \\r \\t characters"',
tokens: [
{startIndex: 0, type: 'string.clj'},
{startIndex: 12, type: 'string.escape.clj'},
{startIndex: 14, type: 'string.clj'},
{startIndex: 15, type: 'string.escape.clj'},
{startIndex: 17, type: 'string.clj'},
{startIndex: 18, type: 'string.escape.clj'},
{startIndex: 20, type: 'string.clj'},
{startIndex: 21, type: 'string.escape.clj'},
{startIndex: 23, type: 'string.clj'},
{startIndex: 24, type: 'string.escape.clj'},
{startIndex: 26, type: 'string.clj'},
{startIndex: 27, type: 'string.escape.clj'},
{startIndex: 29, type: 'string.clj'},
{startIndex: 30, type: 'string.escape.clj'},
{startIndex: 32, type: 'string.clj'},
{startIndex: 33, type: 'string.escape.clj'},
{startIndex: 35, type: 'string.clj'},
]
}],
// comments // comments
createTestCases([ createTestCases([
@ -792,6 +851,30 @@ testTokenization('clojure', [
{startIndex: 0, type: 'comment.clj'}, {startIndex: 0, type: 'comment.clj'},
], ],
}, },
{
line: '(comments foo bar)',
tokens: [
{startIndex: 0, type: 'delimiter.parenthesis.clj'},
{startIndex: 1, type: 'identifier.clj'},
{startIndex: 9, type: 'white.clj'},
{startIndex: 10, type: 'identifier.clj'},
{startIndex: 13, type: 'white.clj'},
{startIndex: 14, type: 'identifier.clj'},
{startIndex: 17, type: 'delimiter.parenthesis.clj'},
]
},
{
line: '(comment6 foo bar)',
tokens: [
{startIndex: 0, type: 'delimiter.parenthesis.clj'},
{startIndex: 1, type: 'identifier.clj'},
{startIndex: 9, type: 'white.clj'},
{startIndex: 10, type: 'identifier.clj'},
{startIndex: 13, type: 'white.clj'},
{startIndex: 14, type: 'identifier.clj'},
{startIndex: 17, type: 'delimiter.parenthesis.clj'},
]
},
{ {
line: '(comment foo', line: '(comment foo',
tokens: [ tokens: [

@ -13,20 +13,24 @@ export const conf: IRichLanguageConfiguration = {
lineComment: ';;', lineComment: ';;',
}, },
brackets: [['(', ')'], ['[', ']'], ['{', '}']], brackets: [
['[', ']'],
['(', ')'],
['{', '}']
],
autoClosingPairs: [ autoClosingPairs: [
{open: '(', close: ')'},
{open: '[', close: ']'}, {open: '[', close: ']'},
{open: '{', close: '}'},
{open: '"', close: '"'}, {open: '"', close: '"'},
{open: '(', close: ')'},
{open: '{', close: '}'},
], ],
surroundingPairs: [ surroundingPairs: [
{open: '(', close: ')'},
{open: '[', close: ']'}, {open: '[', close: ']'},
{open: '{', close: '}'},
{open: '"', close: '"'}, {open: '"', close: '"'},
{open: '(', close: ')'},
{open: '{', close: '}'},
], ],
}; };
@ -36,11 +40,26 @@ export const language = <ILanguage>{
tokenPostfix: '.clj', tokenPostfix: '.clj',
brackets: [ brackets: [
{open: '[', close: ']', token: 'delimiter.square'},
{open: '(', close: ')', token: 'delimiter.parenthesis'}, {open: '(', close: ')', token: 'delimiter.parenthesis'},
{open: '{', close: '}', token: 'delimiter.curly'}, {open: '{', close: '}', token: 'delimiter.curly'},
{open: '[', close: ']', token: 'delimiter.square'},
], ],
constants: ['true', 'false', 'nil'],
// delimiters: /[\\\[\]\s"#'(),;@^`{}~]|$/,
numbers: /^(?:[+\-]?\d+(?:(?:N|(?:[eE][+\-]?\d+))|(?:\.?\d*(?:M|(?:[eE][+\-]?\d+))?)|\/\d+|[xX][0-9a-fA-F]+|r[0-9a-zA-Z]+)?(?=[\\\[\]\s"#'(),;@^`{}~]|$))/,
characters: /^(?:\\(?:backspace|formfeed|newline|return|space|tab|o[0-7]{3}|u[0-9A-Fa-f]{4}|x[0-9A-Fa-f]{4}|.)?(?=[\\\[\]\s"(),;@^`{}~]|$))/,
escapes: /^\\(?:["'\\bfnrt]|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
// simple-namespace := /^[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*/
// simple-symbol := /^(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)/
// qualified-symbol := (<simple-namespace>(<.><simple-namespace>)*</>)?<simple-symbol>
qualifiedSymbols: /^(?:(?:[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*(?:\.[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)*\/)?(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)*(?=[\\\[\]\s"(),;@^`{}~]|$))/,
specialForms: [ specialForms: [
'.', '.',
'catch', 'catch',
@ -712,14 +731,6 @@ export const language = <ILanguage>{
'zipmap', 'zipmap',
], ],
constants: ['true', 'false', 'nil'],
symbolCharacter: /[!#'*+\-.\/:<=>?_\w\xa1-\uffff]/,
numbers: /[+\-]?\d+(?:(?:N|(?:[eE][+\-]?\d+))|(?:\.?\d*(?:M|(?:[eE][+\-]?\d+))?)|\/\d+|[xX][0-9a-fA-F]+|r[0-9a-zA-Z]+)?/,
characters: /\\(?:backspace|formfeed|newline|return|space|tab|x[0-9A-Fa-f]{4}|u[0-9A-Fa-f]{4}|o[0-7]{3}|@symbolCharacter|[\\"()\[\]{}])/,
tokenizer: { tokenizer: {
root: [ root: [
// whitespaces and comments // whitespaces and comments
@ -743,32 +754,29 @@ export const language = <ILanguage>{
// reader macro characters // reader macro characters
[/[#'@^`~]/, 'meta'], [/[#'@^`~]/, 'meta'],
// keywords
[/:@symbolCharacter+/, 'constant'],
// symbols // symbols
[/@symbolCharacter+/, { [/@qualifiedSymbols/, {
cases: { cases: {
'@specialForms': 'keyword', '^:.+$': 'constant', // Clojure keywords (e.g., `:foo/bar`)
'@coreSymbols': 'keyword', '@specialForms': 'keyword',
'@constants': 'constant', '@coreSymbols': 'keyword',
'@default': 'identifier', '@constants': 'constant',
'@default': 'identifier',
},
}, },
},
], ],
], ],
whitespace: [ whitespace: [
[/\s+/, 'white'], [/\s+/, 'white'],
[/;.*$/, 'comment'], [/;.*$/, 'comment'],
[/\(comment/, 'comment', '@comment'], [/\(comment\b/, 'comment', '@comment'],
], ],
comment: [ comment: [
[/\(/, 'comment', '@push'], [/\(/, 'comment', '@push'],
[/\)/, 'comment', '@pop'], [/\)/, 'comment', '@pop'],
[/[^)]/, 'comment'], [/[^()]/, 'comment'],
], ],
string: [ string: [
@ -776,9 +784,9 @@ export const language = <ILanguage>{
], ],
multiLineString: [ multiLineString: [
[/[^\\"]+/, 'string'], [/"/, 'string', '@popall'],
[/@characters/, 'string'], [/@escapes/, 'string.escape'],
[/"/, 'string', '@pop'] [/./, 'string']
], ],
}, },
}; };

Loading…
Cancel
Save