Skip to content

Commit a63143a

Browse files
Copilotjogibear9988
andcommitted
perf: use charCodeAt comparisons and sticky regexes for fast lexing
Inspired by the es-module-shims fast lexer approach: - Replace string character comparisons with charCodeAt() numeric comparisons - Convert all ^-anchored regexes to y-flag (sticky) regexes to avoid creating temporary substring slices on every matchRegex call - Define character code constants (Ch_*) for readability - Use charCodeAt in stringSearch utilities for hot-path comparisons - Add _advanceRange() method to avoid intermediate string allocation Co-authored-by: jogibear9988 <364896+jogibear9988@users.noreply.github.com>
1 parent 9d9cdd4 commit a63143a

3 files changed

Lines changed: 178 additions & 86 deletions

File tree

src/parse/index.ts

Lines changed: 68 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,40 @@ import {
3636
indexOfArrayWithBracketAndQuoteSupport,
3737
splitWithBracketAndQuoteSupport,
3838
} from '../utils/stringSearch';
39-
import { Lexer } from './lexer';
39+
import { Ch_AT, Ch_CLOSE, Ch_SLASH, Ch_STAR, Lexer } from './lexer';
4040

4141
// http://www.w3.org/TR/CSS21/grammar.html
4242
// https://github.com/visionmedia/css-parse/pull/49#issuecomment-30088027
4343
// New rule => https://www.w3.org/TR/CSS22/syndata.html#comments
4444
// [^] is equivalent to [.\n\r]
4545
const commentRegex = /\/\*[^]*?(?:\*\/|$)/g;
4646

47+
// ─── Sticky regexes (y-flag) ────────────────────────────────────────────────
48+
// Using sticky regexes matched against the full input string avoids
49+
// creating a temporary substring on every `matchRegex` call.
50+
const re_comment = /\/\*[^]*?\*\//y;
51+
const re_propName = /(\*?[-#/*\\\w]+(\[[0-9a-z_-]+\])?)\s*/y;
52+
const re_keyframeValue = /((\d+\.\d+|\.\d+|\d+)%?|[a-z]+)\s*/y;
53+
const re_keyframesName = /@([-\w]+)?keyframes\s*/y;
54+
const re_identifier = /([-\w]+)\s*/y;
55+
const re_supports = /@supports *([^{]+)/y;
56+
const re_host = /@host\s*/y;
57+
const re_container = /@container *([^{]+)/y;
58+
const re_layer = /@layer *([^{;@]+)/y;
59+
const re_media = /@media *([^{]+)/y;
60+
const re_customMedia = /@custom-media\s+(--\S+)\s+([^{;\s][^{;]*);/y;
61+
const re_page = /@page */y;
62+
const re_document = /@([-\w]+)?document *([^{]+)/y;
63+
const re_fontFace = /@font-face\s*/y;
64+
const re_property = /@property\s+(--[-\w]+)\s*/y;
65+
const re_counterStyle = /@counter-style\s+([-\w]+)\s*/y;
66+
const re_fontFeatureValues = /@font-feature-values\s+([^{]+)/y;
67+
const re_scope = /@scope\s*([^{]*)/y;
68+
const re_viewTransition = /@view-transition\s*/y;
69+
const re_positionTry = /@position-try\s+(--[-\w]+)\s*/y;
70+
const re_startingStyle = /@starting-style\s*/y;
71+
const re_genericAtRule = /@([-\w]+)\s*/y;
72+
4773
export const parse = (
4874
css: string,
4975
options?: { source?: string; silent?: boolean },
@@ -132,7 +158,7 @@ export const parse = (
132158
lexer.skipWhitespace();
133159
comments(rules);
134160
while (lexer.hasMore) {
135-
if (lexer.charAt() === '}') {
161+
if (lexer.charCodeAt() === Ch_CLOSE) {
136162
if (options?.silent) {
137163
// Skip stray closing braces at top level
138164
error("extra '}'");
@@ -188,11 +214,11 @@ export const parse = (
188214
*/
189215
function comment(): CssCommentAST | undefined {
190216
const pos = position();
191-
if ('/' !== lexer.charAt() || '*' !== lexer.charAt(1)) {
217+
if (lexer.charCodeAt() !== Ch_SLASH || lexer.charCodeAt(1) !== Ch_STAR) {
192218
return;
193219
}
194220

195-
const m = lexer.matchRegex(/^\/\*[^]*?\*\//);
221+
const m = lexer.matchRegex(re_comment);
196222
if (!m) {
197223
return error('End of comment missing');
198224
}
@@ -228,9 +254,7 @@ export const parse = (
228254
const pos = position();
229255

230256
// prop
231-
const propMatch = lexer.matchRegex(
232-
/^(\*?[-#/*\\\w]+(\[[0-9a-z_-]+\])?)\s*/,
233-
);
257+
const propMatch = lexer.matchRegex(re_propName);
234258
if (!propMatch) {
235259
return;
236260
}
@@ -284,7 +308,11 @@ export const parse = (
284308
decl = declaration();
285309
}
286310
// In silent mode, try to recover from errors by skipping to next semicolon
287-
while (options?.silent && lexer.hasMore && lexer.charAt() !== '}') {
311+
while (
312+
options?.silent &&
313+
lexer.hasMore &&
314+
lexer.charCodeAt() !== Ch_CLOSE
315+
) {
288316
const remaining = lexer.remaining;
289317
const semiPos = remaining.indexOf(';');
290318
const bracePos = remaining.indexOf('}');
@@ -345,9 +373,9 @@ export const parse = (
345373
}
346374
comments(items);
347375

348-
while (lexer.hasMore && lexer.charAt() !== '}') {
376+
while (lexer.hasMore && lexer.charCodeAt() !== Ch_CLOSE) {
349377
// nested at-rule
350-
if (lexer.charAt() === '@') {
378+
if (lexer.charCodeAt() === Ch_AT) {
351379
const ar = atRule();
352380
if (ar) {
353381
items.push(ar);
@@ -404,9 +432,9 @@ export const parse = (
404432
const items: Array<CssAtRuleAST | CssDeclarationAST | CssCommentAST> = [];
405433
whitespace();
406434
comments(items);
407-
while (lexer.hasMore && lexer.charAt() !== '}') {
435+
while (lexer.hasMore && lexer.charCodeAt() !== Ch_CLOSE) {
408436
// at-rule
409-
if (lexer.charAt() === '@') {
437+
if (lexer.charCodeAt() === Ch_AT) {
410438
const ar = atRule();
411439
if (ar) {
412440
items.push(ar);
@@ -457,11 +485,11 @@ export const parse = (
457485
const vals = [];
458486
const pos = position();
459487

460-
let m = lexer.matchRegex(/^((\d+\.\d+|\.\d+|\d+)%?|[a-z]+)\s*/);
488+
let m = lexer.matchRegex(re_keyframeValue);
461489
while (m) {
462490
vals.push(m[1]);
463491
lexer.tryCommaAndWhitespace();
464-
m = lexer.matchRegex(/^((\d+\.\d+|\.\d+|\d+)%?|[a-z]+)\s*/);
492+
m = lexer.matchRegex(re_keyframeValue);
465493
}
466494

467495
if (!vals.length) {
@@ -480,15 +508,15 @@ export const parse = (
480508
*/
481509
function atKeyframes(): CssKeyframesAST | undefined {
482510
const pos = position();
483-
const m1 = lexer.matchRegex(/^@([-\w]+)?keyframes\s*/);
511+
const m1 = lexer.matchRegex(re_keyframesName);
484512

485513
if (!m1) {
486514
return;
487515
}
488516
const vendor = m1[1];
489517

490518
// identifier
491-
const m2 = lexer.matchRegex(/^([-\w]+)\s*/);
519+
const m2 = lexer.matchRegex(re_identifier);
492520
if (!m2) {
493521
return error('@keyframes missing name');
494522
}
@@ -523,7 +551,7 @@ export const parse = (
523551
*/
524552
function atSupports(): CssSupportsAST | undefined {
525553
const pos = position();
526-
const m = lexer.matchRegex(/^@supports *([^{]+)/);
554+
const m = lexer.matchRegex(re_supports);
527555

528556
if (!m) {
529557
return;
@@ -552,7 +580,7 @@ export const parse = (
552580
*/
553581
function atHost() {
554582
const pos = position();
555-
const m = lexer.matchRegex(/^@host\s*/);
583+
const m = lexer.matchRegex(re_host);
556584

557585
if (!m) {
558586
return;
@@ -579,7 +607,7 @@ export const parse = (
579607
*/
580608
function atContainer(): CssContainerAST | undefined {
581609
const pos = position();
582-
const m = lexer.matchRegex(/^@container *([^{]+)/);
610+
const m = lexer.matchRegex(re_container);
583611

584612
if (!m) {
585613
return;
@@ -608,7 +636,7 @@ export const parse = (
608636
*/
609637
function atLayer(): CssLayerAST | undefined {
610638
const pos = position();
611-
const m = lexer.matchRegex(/^@layer *([^{;@]+)/);
639+
const m = lexer.matchRegex(re_layer);
612640

613641
if (!m) {
614642
return;
@@ -641,7 +669,7 @@ export const parse = (
641669
*/
642670
function atMedia(): CssMediaAST | undefined {
643671
const pos = position();
644-
const m = lexer.matchRegex(/^@media *([^{]+)/);
672+
const m = lexer.matchRegex(re_media);
645673

646674
if (!m) {
647675
return;
@@ -670,7 +698,7 @@ export const parse = (
670698
*/
671699
function atCustomMedia(): CssCustomMediaAST | undefined {
672700
const pos = position();
673-
const m = lexer.matchRegex(/^@custom-media\s+(--\S+)\s+([^{;\s][^{;]*);/);
701+
const m = lexer.matchRegex(re_customMedia);
674702
if (!m) {
675703
return;
676704
}
@@ -704,7 +732,8 @@ export const parse = (
704732
'right-bottom',
705733
];
706734
const pageMarginBoxRegex = new RegExp(
707-
`^@(${pageMarginBoxNames.join('|')})(?![\\w-])\\s*`,
735+
`@(${pageMarginBoxNames.join('|')})(?![\\w-])\\s*`,
736+
'y',
708737
);
709738

710739
function atPageMarginBox(): CssPageMarginBoxAST | undefined {
@@ -741,7 +770,7 @@ export const parse = (
741770
*/
742771
function atPage(): CssPageAST | undefined {
743772
const pos = position();
744-
const m = lexer.matchRegex(/^@page */);
773+
const m = lexer.matchRegex(re_page);
745774
if (!m) {
746775
return;
747776
}
@@ -755,8 +784,8 @@ export const parse = (
755784
comments(decls);
756785

757786
// declarations and nested at-rules (margin boxes)
758-
while (lexer.hasMore && lexer.charAt() !== '}') {
759-
if (lexer.charAt() === '@') {
787+
while (lexer.hasMore && lexer.charCodeAt() !== Ch_CLOSE) {
788+
if (lexer.charCodeAt() === Ch_AT) {
760789
const ar = atRule();
761790
if (ar) {
762791
decls.push(ar);
@@ -789,7 +818,7 @@ export const parse = (
789818
*/
790819
function atDocument(): CssDocumentAST | undefined {
791820
const pos = position();
792-
const m = lexer.matchRegex(/^@([-\w]+)?document *([^{]+)/);
821+
const m = lexer.matchRegex(re_document);
793822
if (!m) {
794823
return;
795824
}
@@ -820,7 +849,7 @@ export const parse = (
820849
*/
821850
function atFontFace(): CssFontFaceAST | undefined {
822851
const pos = position();
823-
const m = lexer.matchRegex(/^@font-face\s*/);
852+
const m = lexer.matchRegex(re_fontFace);
824853
if (!m) {
825854
return;
826855
}
@@ -853,7 +882,7 @@ export const parse = (
853882
*/
854883
function atProperty(): CssPropertyAST | undefined {
855884
const pos = position();
856-
const m = lexer.matchRegex(/^@property\s+(--[-\w]+)\s*/);
885+
const m = lexer.matchRegex(re_property);
857886
if (!m) {
858887
return;
859888
}
@@ -885,7 +914,7 @@ export const parse = (
885914
*/
886915
function atCounterStyle(): CssCounterStyleAST | undefined {
887916
const pos = position();
888-
const m = lexer.matchRegex(/^@counter-style\s+([-\w]+)\s*/);
917+
const m = lexer.matchRegex(re_counterStyle);
889918
if (!m) {
890919
return;
891920
}
@@ -917,7 +946,7 @@ export const parse = (
917946
*/
918947
function atFontFeatureValues(): CssFontFeatureValuesAST | undefined {
919948
const pos = position();
920-
const m = lexer.matchRegex(/^@font-feature-values\s+([^{]+)/);
949+
const m = lexer.matchRegex(re_fontFeatureValues);
921950
if (!m) {
922951
return;
923952
}
@@ -945,7 +974,7 @@ export const parse = (
945974
*/
946975
function atScope(): CssScopeAST | undefined {
947976
const pos = position();
948-
const m = lexer.matchRegex(/^@scope\s*([^{]*)/);
977+
const m = lexer.matchRegex(re_scope);
949978
if (!m) {
950979
return;
951980
}
@@ -973,7 +1002,7 @@ export const parse = (
9731002
*/
9741003
function atViewTransition(): CssViewTransitionAST | undefined {
9751004
const pos = position();
976-
const m = lexer.matchRegex(/^@view-transition\s*/);
1005+
const m = lexer.matchRegex(re_viewTransition);
9771006
if (!m) {
9781007
return;
9791008
}
@@ -1003,7 +1032,7 @@ export const parse = (
10031032
*/
10041033
function atPositionTry(): CssPositionTryAST | undefined {
10051034
const pos = position();
1006-
const m = lexer.matchRegex(/^@position-try\s+(--[-\w]+)\s*/);
1035+
const m = lexer.matchRegex(re_positionTry);
10071036
if (!m) {
10081037
return;
10091038
}
@@ -1035,7 +1064,7 @@ export const parse = (
10351064
*/
10361065
function atStartingStyle(): CssStartingStyleAST | undefined {
10371066
const pos = position();
1038-
const m = lexer.matchRegex(/^@starting-style\s*/);
1067+
const m = lexer.matchRegex(re_startingStyle);
10391068
if (!m) {
10401069
return;
10411070
}
@@ -1077,9 +1106,10 @@ export const parse = (
10771106
name: string,
10781107
): () => T1 | undefined {
10791108
const re = new RegExp(
1080-
'^@' +
1109+
'@' +
10811110
name +
10821111
'\\s*((?::?[^;\'"]|"(?:\\\\"|[^"])*?"|\'(?:\\\\\'|[^\'])*?\')+)(?:;|$)',
1112+
'y',
10831113
);
10841114

10851115
return (): T1 | undefined => {
@@ -1100,7 +1130,7 @@ export const parse = (
11001130
*/
11011131
function atGeneric(): CssGenericAtRuleAST | undefined {
11021132
const pos = position();
1103-
const m = lexer.matchRegex(/^@([-\w]+)\s*/);
1133+
const m = lexer.matchRegex(re_genericAtRule);
11041134
if (!m) {
11051135
return;
11061136
}
@@ -1146,7 +1176,7 @@ export const parse = (
11461176
* Parse at rule.
11471177
*/
11481178
function atRule(): CssAtRuleAST | undefined {
1149-
if (lexer.charAt() !== '@') {
1179+
if (lexer.charCodeAt() !== Ch_AT) {
11501180
return;
11511181
}
11521182

0 commit comments

Comments
 (0)