Skip to content

Commit

Permalink
fix: align identifier and string parsing and rendering with CSS stand…
Browse files Browse the repository at this point in the history
…ards, closes #36, closes #37
  • Loading branch information
mdevils committed Dec 15, 2023
1 parent 33413e8 commit ac0dbc0
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 45 deletions.
47 changes: 37 additions & 10 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import {
getXmlOptions,
SyntaxDefinition
} from './syntax-definitions.js';
import {digitsChars, isHex, isIdent, isIdentStart, quoteChars, stringEscapeChars, whitespaceChars} from './utils.js';
import {digitsChars, isHex, isIdent, isIdentStart, maxHexLength, quoteChars, whitespaceChars} from './utils.js';

/**
* This error is thrown when parser encounters problems in CSS string.
Expand Down Expand Up @@ -225,17 +225,23 @@ export function createParser(
}
}

/**
* @see https://www.w3.org/TR/css-syntax/#hex-digit-diagram
*/
function parseHex() {
let hex = readAndNext();
while (isHex(chr)) {
let count = 1;
while (isHex(chr) && count < maxHexLength) {
hex += readAndNext();
count++;
}
if (is(' ')) {
next();
}
skipSingleWhitespace();
return String.fromCharCode(parseInt(hex, 16));
}

/**
* @see https://www.w3.org/TR/css-syntax/#string-token-diagram
*/
function parseString(quote: string): string {
let result = '';
pass(quote);
Expand All @@ -245,25 +251,33 @@ export function createParser(
return result;
} else if (is('\\')) {
next();
let esc;
if (is(quote)) {
result += quote;
} else if ((esc = stringEscapeChars[chr]) !== undefined) {
result += esc;
next();
} else if (chr === '\n' || chr === '\f') {
next();
} else if (chr === '\r') {
next();
if (is('\n')) {
next();
}
} else if (isHex(chr)) {
result += parseHex();
continue;
} else {
result += chr;
next();
}
} else {
result += chr;
next();
}
next();
}
return result;
}

/**
* @see https://www.w3.org/TR/css-syntax/#ident-token-diagram
*/
function parseIdentifier(): string | null {
if (!isIdentStart(chr)) {
return null;
Expand Down Expand Up @@ -321,6 +335,19 @@ export function createParser(
return result.trim();
}

function skipSingleWhitespace() {
if (chr === ' ' || chr === '\t' || chr === '\f' || chr === '\n') {
next();
return;
}
if (chr === '\r') {
next();
}
if (chr === '\n') {
next();
}
}

function skipWhitespace() {
while (whitespaceChars[chr]) {
next();
Expand Down
4 changes: 2 additions & 2 deletions src/render.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import {AstEntity, AstNamespaceName, AstNoNamespace, AstSubstitution, AstWildcardNamespace} from './ast.js';
import {escapeIdentifier, escapeStr} from './utils.js';
import {escapeIdentifier, escapeString} from './utils.js';

const errorPrefix = `css-selector-parser render error: `;

Expand Down Expand Up @@ -96,7 +96,7 @@ export function render(entity: AstEntity): string {
if (operator && value) {
result += operator;
if (value.type === 'String') {
result += escapeStr(value.value);
result += escapeString(value.value);
} else if (value.type === 'Substitution') {
result += renderSubstitution(value);
} else {
Expand Down
49 changes: 20 additions & 29 deletions src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ export function isHex(c: string) {
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9');
}

export const identSpecialChars: Record<string, boolean> = {
export const identEscapeChars: Record<string, boolean> = {
'!': true,
'"': true,
'#': true,
Expand Down Expand Up @@ -49,20 +49,12 @@ export const identSpecialChars: Record<string, boolean> = {
'~': true
};

export const strReplacementsRev: Record<string, string> = {
'\n': '\\n',
'\r': '\\r',
'\t': '\\t',
'\f': '\\f',
'\v': '\\v'
};

export const stringEscapeChars: Record<string, string> = {
n: '\n',
r: '\r',
t: '\t',
f: '\f',
'\\': '\\'
export const stringRenderEscapeChars: Record<string, boolean> = {
'\n': true,
'\r': true,
'\t': true,
'\f': true,
'\v': true
};

export const whitespaceChars: Record<string, boolean> = {
Expand Down Expand Up @@ -91,24 +83,26 @@ export const digitsChars: Record<string, boolean> = {
9: true
};

export const maxHexLength = 6;

export function escapeIdentifier(s: string) {
const len = s.length;
let result = '';
let i = 0;
while (i < len) {
const chr = s.charAt(i);
if (identSpecialChars[chr]) {
if (identEscapeChars[chr] || (chr === '-' && i === 1 && s.charAt(0) === '-')) {
result += '\\' + chr;
} else {
if (
!(
chr === '_' ||
chr === '-' ||
(chr >= 'A' && chr <= 'Z') ||
(chr >= 'a' && chr <= 'z') ||
(i !== 0 && chr >= '0' && chr <= '9')
)
chr === '-' ||
chr === '_' ||
(chr >= 'A' && chr <= 'Z') ||
(chr >= 'a' && chr <= 'z') ||
(chr >= '0' && chr <= '9' && i !== 0 && !(i === 1 && s.charAt(0) === '-'))
) {
result += chr;
} else {
let charCode = chr.charCodeAt(0);
if ((charCode & 0xf800) === 0xd800) {
const extraCharCode = s.charCodeAt(i++);
Expand All @@ -118,28 +112,25 @@ export function escapeIdentifier(s: string) {
charCode = ((charCode & 0x3ff) << 10) + (extraCharCode & 0x3ff) + 0x10000;
}
result += '\\' + charCode.toString(16) + ' ';
} else {
result += chr;
}
}
i++;
}
return result.trim();
}

export function escapeStr(s: string) {
export function escapeString(s: string) {
const len = s.length;
let result = '';
let i = 0;
let replacement: string;
while (i < len) {
let chr = s.charAt(i);
if (chr === '"') {
chr = '\\"';
} else if (chr === '\\') {
chr = '\\\\';
} else if ((replacement = strReplacementsRev[chr]) !== undefined) {
chr = replacement;
} else if (stringRenderEscapeChars[chr]) {
chr = '\\' + chr.charCodeAt(0).toString(16) + (i === len - 1 ? '' : ' ');
}
result += chr;
i++;
Expand Down
46 changes: 46 additions & 0 deletions test/parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,24 @@ describe('parse()', () => {
})
);
});
it('should parse an identifier with hex-encoded characters', () => {
const astSelector = ast.selector({
rules: [
ast.rule({
items: [ast.id({name: 'hello\nworld'})]
})
]
});
expect(parse('#hello\\aworld')).toEqual(astSelector);
expect(parse('#hello\\a world')).toEqual(astSelector);
expect(parse('#hello\\a\tworld')).toEqual(astSelector);
expect(parse('#hello\\a\fworld')).toEqual(astSelector);
expect(parse('#hello\\a\nworld')).toEqual(astSelector);
expect(parse('#hello\\a\nworld')).toEqual(astSelector);
expect(parse('#hello\\a\rworld')).toEqual(astSelector);
expect(parse('#hello\\a\r\nworld')).toEqual(astSelector);
expect(parse('#hello\\00000aworld')).toEqual(astSelector);
});
it('should fail on an identifier starting with multiple hyphens', () => {
expect(() => parse('#--id')).toThrow('Identifiers cannot start with two hyphens with strict mode on.');
});
Expand Down Expand Up @@ -584,6 +602,34 @@ describe('parse()', () => {
})
);
});
it('should properly parse escapes', () => {
const astSelector = ast.selector({
rules: [
ast.rule({
items: [
ast.attribute({
name: 'attr',
operator: '=',
value: ast.string({
value: 'hello\nworld'
})
})
]
})
]
});
expect(parse('[attr="hello\\aworld"]')).toEqual(astSelector);
expect(parse('[attr="hell\\o\\aworld"]')).toEqual(astSelector);
expect(parse('[attr="hell\\\no\\aworld"]')).toEqual(astSelector);
expect(parse('[attr="hello\\a world"]')).toEqual(astSelector);
expect(parse('[attr="hello\\a\tworld"]')).toEqual(astSelector);
expect(parse('[attr="hello\\a\fworld"]')).toEqual(astSelector);
expect(parse('[attr="hello\\a\nworld"]')).toEqual(astSelector);
expect(parse('[attr="hello\\a\nworld"]')).toEqual(astSelector);
expect(parse('[attr="hello\\a\rworld"]')).toEqual(astSelector);
expect(parse('[attr="hello\\a\r\nworld"]')).toEqual(astSelector);
expect(parse('[attr="hello\\00000aworld"]')).toEqual(astSelector);
});
it('should properly parse single quotes', () => {
expect(parse("[ attr = 'val\\'\\ue\\20' i ]")).toEqual(
ast.selector({
Expand Down
16 changes: 12 additions & 4 deletions test/render.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ const testCases = {
'tagname[x="y \\""]': 'tagname[x="y \\""]',
'tagname[x="y\'"]': 'tagname[x="y\'"]',
"tagname[x='y \\'']": 'tagname[x="y \'"]',
"div[role='a\\00000ab']": 'div[role="a\\a b"]',
"div[role='\\a']": 'div[role="\\a"]',
'tag1 tag2': 'tag1 tag2',
'ns1|tag1': 'ns1|tag1',
'|tag1': '|tag1',
Expand Down Expand Up @@ -85,7 +87,7 @@ const testCases = {
'.cls\\n\\\\name\\.\\[': '.clsn\\\\name\\.\\[',
'[attr\\n\\\\name\\.\\[=a1]': '[attrn\\\\name\\.\\[="a1"]',
':pseudo\\n\\\\name\\.\\[\\((123)': ':pseudon\\\\name\\.\\[\\((\\31 23)',
'[attr="val\nval"]': '[attr="val\\nval"]',
'[attr="val\nval"]': '[attr="val\\a val"]',
'[attr="val\\"val"]': '[attr="val\\"val"]',
'[attr="val\\00a0val"]': '[attr="val val"]',
'tag\\00a0 tag': 'tag\\a0 tag',
Expand All @@ -97,9 +99,15 @@ const testCases = {
'#\\3123': '#\\3123',
'#\\31 23': '#\\31 23',
'#\\00031 23': '#\\31 23',
'#\\0003123': '#\\3123',
'#\\0004123': '#\\4123',
'#\\0o': '#\\0 o'
'#\\003123': '#\\3123',
'#\\0003123': '#\\312 3',
'#\\004123': '#\\4123',
'#\\0004123': '#\\412 3',
'#\\0o': '#\\0 o',
'#--a': '#-\\-a',
'#--\\30': '#-\\-0',
'#-\\30': '#-\\30',
'#\\30': '#\\30'
};

describe('render()', () => {
Expand Down

0 comments on commit ac0dbc0

Please sign in to comment.