[clojure] Improve Clojure syntax highlighting.

This commit:
- Updates the keyword list with public symbols from `clojure.core` for
Clojure 1.9.
- Improves syntax highlighting of symbols, strings, character literals,
and number literals.
- Adds rules for syntax highlighting Clojure keywords and reader macro
- Fixes an issue where forms following a single semicolon (`;`) are not
marked as comments.
- Fixes an issue where parts of multi-line strings were not correctly
syntax highlighted.
- Adds additional test coverage for tokenization code.
Abdussalam Abdurrahman 7 years ago
parent b276d3ea83
commit 073f212f63

@ -5,86 +5,775 @@
'use strict';
import { testTokenization } from '../test/testRunner';
import {ITestItem, testTokenization} from '../test/testRunner';
testTokenization('clojure', [
// Keywords
line: 'defmacro some',
tokens: [
{ startIndex: 0, type: 'keyword.clj' },
{ startIndex: 8, type: 'white.clj' },
{ startIndex: 9, type: 'variable.clj' },
const specialForms = [
line: 'comment "text comment"',
tokens: [
{ startIndex: 0, type: 'keyword.clj' },
{ startIndex: 7, type: 'white.clj' },
{ startIndex: 8, type: 'string.clj' },
line: 'in-ns "user',
tokens: [
{ startIndex: 0, type: 'keyword.clj' },
{ startIndex: 5, type: 'white.clj' },
{ startIndex: 6, type: 'string.clj' },
const coreSymbols = [
// comments
line: ';; comment',
tokens: [{ startIndex: 0, type: 'comment.clj' }],
function createTestCases(specialForms: string[], type: string): ITestItem[] {
const testCases = [];
// strings
line: '"\\n string "',
for (const specialForm of specialForms) {
line: `${specialForm}`,
tokens: [
{ startIndex: 0, type: 'string.clj' },
{ startIndex: 1, type: 'string.escape.clj' },
{ startIndex: 3, type: 'string.clj' },
{startIndex: 0, type: `${type}.clj`},
line: '" string \\',
tokens: [{ startIndex: 0, type: 'string.clj' }],
line: 'multiline',
tokens: [{ startIndex: 0, type: 'string.clj' }],
line: ' ',
tokens: [
// previous line needs to be terminated with \
{ startIndex: 0, type: 'white.clj' },
return testCases;
testTokenization('clojure', [
// special forms
createTestCases(specialForms, 'keyword'),
// core symbols
createTestCases(coreSymbols, 'keyword'),
// atoms
createTestCases(['false', 'nil', 'true'], 'constant'),
// keywords
createTestCases([':foo', '::bar', ':foo/bar', ':foo.bar/baz'], 'constant'),
// numbers
line: '1e2',
tokens: [{ startIndex: 0, type: 'number.float.clj' }],
line: '0xff',
tokens: [{ startIndex: 0, type: 'number.hex.clj' }],
'42', '+42', '-421',
'42N', '+42N', '-42N',
'0.42', '+0.42', '-0.42',
'42M', '+42M', '-42M',
'42.42M', '+42.42M', '-42.42M',
'1/42', '+1/42', '-1/42',
'0x42af', '+0x42af', '-0x42af',
'0x42AF', '+0x42AF', '-0x42AF',
'1e2', '1e+2', '1e-2',
'+1e2', '+1e+2', '+1e-2',
'-1e2', '-1e+2', '-1e-2',
'-1.0e2', '-0.1e+2', '-1.01e-2',
'1E2', '1E+2', '1E-2',
'+1E2', '+1E+2', '+1E-2',
'-1E2', '-1E+2', '-1E-2',
'-1.0E2', '-0.1E+2', '-1.01E-2',
'2r101010', '+2r101010', '-2r101010',
'2r101010', '+2r101010', '-2r101010',
'8r52', '+8r52', '-8r52',
'36rhello', '+36rhello', '-36rhello',
'36rz', '+36rz', '-36rz',
'36rZ', '+36rZ', '-36rZ',
], 'number'),
// characters
], 'string'),
// strings
'\"I\'m a little teapot.\"',
'\"I\'m a \\\"little\\\" teapot.\"',
'\"I\'m', // this is
'a little', // a multi-line
'teapot.\"' // string
], 'string'),
// comments
'; this is an in-line comment.',
';; this is a line comment.',
], 'comment'),
// reader macro characters
], 'meta')

@ -13,20 +13,20 @@ export const conf: IRichLanguageConfiguration = {
lineComment: ';;',
brackets: [['(', ')'], ['{', '}'], ['[', ']']],
brackets: [['(', ')'], ['[', ']'], ['{', '}']],
autoClosingPairs: [
{ open: '{', close: '}' },
{ open: '[', close: ']' },
{ open: '(', close: ')' },
{ open: '"', close: '"' },
{open: '(', close: ')'},
{open: '[', close: ']'},
{open: '{', close: '}'},
{open: '"', close: '"'},
surroundingPairs: [
{ open: '{', close: '}' },
{ open: '[', close: ']' },
{ open: '(', close: ')' },
{ open: '"', close: '"' },
{open: '(', close: ')'},
{open: '[', close: ']'},
{open: '{', close: '}'},
{open: '"', close: '"'},
@ -36,192 +36,740 @@ export const language = <ILanguage>{
tokenPostfix: '.clj',
brackets: [
{ open: '(', close: ')', token: 'delimiter.parenthesis' },
{ open: '{', close: '}', token: 'delimiter.curly' },
{ open: '[', close: ']', token: 'delimiter.square' },
{open: '(', close: ')', token: 'delimiter.parenthesis'},
{open: '{', close: '}', token: 'delimiter.curly'},
{open: '[', close: ']', token: 'delimiter.square'},
keywords: [
specialForms: [
coreSymbols: [
constants: ['true', 'false', 'nil'],
operators: [
symbolCharacter: /[!#'*+\-.\/:<=>?_\w\xa1-\uffff]/,
numbers: /^[+\-]?\d+(?:(?:N|(?:[eE][+\-]?\d+))|(?:\.?\d*(?:M|(?:[eE][+\-]?\d+))?)|\/\d+|[xX][0-9a-fA-F]+|r[0-9a-zA-Z]+)?/,
characters: /\\(?:@symbolCharacter+|[\\"()\[\]{}]|x[0-9A-Fa-f]{4}|u[0-9A-Fa-f]{4}|o[0-7]{3})/,
tokenizer: {
root: [
[/0[xX][0-9a-fA-F]+/, 'number.hex'],
[/[+-]?\d+(?:(?:\.\d*)?(?:[eE][+-]?\d+)?)?/, 'number.float'],
// numbers
[/@numbers/, 'number'],
['keyword', 'white', 'variable'],
// characters
[/@characters/, 'string'],
cases: {
'@keywords': 'keyword',
'@constants': 'constant',
'@operators': 'operators',
'@default': 'identifier',
// brackets
[/[()\[\]{}]/, '@brackets'],
// regular expressions
[/\/#"(?:\.|(?:")|[^"\n])*"\/g/, 'regexp'],
// inline comments
[/;.*$/, 'comment'],
// reader macro characters
[/[#'@^`~]/, 'meta'],
// keywords
[/:@symbolCharacter+/, 'constant'],
// symbols
[/@symbolCharacter+/, {
cases: {
'@specialForms': 'keyword',
'@coreSymbols': 'keyword',
'@constants': 'constant',
'@default': 'identifier',
[/\/#"(?:\.|(?:\")|[^""\n])*"\/g/, 'regexp'],
{ include: '@whitespace' },
{ include: '@strings' },
{include: '@whitespace'},
{include: '@string'},
whitespace: [[/[ \t\r\n]+/, 'white'], [/;;.*$/, 'comment']],
whitespace: [
[/[ \t\r\n]+/, 'white'],
[/;;.*$/, 'comment']],
strings: [
[/"$/, 'string', '@popall'],
[/"(?=.)/, 'string', '@multiLineString'],
string: [
[/"/, 'string', '@multiLineString'],
multiLineString: [
[/\\./, 'string.escape'],
[/"/, 'string', '@popall'],
[/.(?=.*")/, 'string'],
[/.*\\$/, 'string'],
[/.*$/, 'string', '@popall'],
[/[^\\"$]+/, 'string'],
[/@characters/, 'string'],
[/"/, 'string', '@pop']
