From 0e3e54eac847e1d1bea2648723213cf906d8f20c Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 26 May 2026 18:56:45 -0300 Subject: [PATCH 01/17] Java regex added char class intersection, union (nesting) & subtraction. --- .../org/evomaster/core/parser/RegexJava.g4 | 20 +++- .../core/parser/GeneRegexJavaVisitor.kt | 40 +++++++- .../core/utils/MultiCharacterRange.kt | 91 ++++++++++++++++++- .../core/parser/GeneRegexJavaVisitorTest.kt | 22 +++++ 4 files changed, 164 insertions(+), 9 deletions(-) diff --git a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 index 6eeee8a209..5a5fc8d8a0 100644 --- a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 +++ b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 @@ -189,15 +189,25 @@ patternCharacter | BRACE_close | BRACKET_close | COLON + | INTERSECTION ; +INTERSECTION : '&&' ; + characterClass - //TODO check if lookahead needed, or implicit in rule order resoution - //[ [lookahead ∉ {^}] ClassRanges ] - : BRACKET_open CARET classRanges BRACKET_close - | BRACKET_open classRanges BRACKET_close - ; + : BRACKET_open CARET classContents BRACKET_close + | BRACKET_open classContents BRACKET_close + ; + +classContents + : classUnion (INTERSECTION classUnion)* + ; + +classUnion + : characterClass+ // one or more nested classes = UNION + | classRanges // bare ranges + ; classRanges : diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index 8f3d784922..581fe2623a 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -3,6 +3,7 @@ package org.evomaster.core.parser import org.evomaster.core.search.gene.Gene import org.evomaster.core.search.gene.regex.* import org.evomaster.core.utils.CharacterRange +import org.evomaster.core.utils.MultiCharacterRange import org.evomaster.core.utils.ParsedFlagExpression import org.evomaster.core.utils.RegexFlags @@ -398,11 +399,44 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val negated = ctx.CARET() != null - val ranges = ctx.classRanges().accept(this).data as List + val innerMultiCharRanges = ctx.classContents().accept(this).data as MultiCharacterRange - val gene = CharacterRangeRxGene(negated, ranges, currentFlags) + val multiCharRanges = MultiCharacterRange(negated, innerMultiCharRanges) - return VisitResult(gene) + return if (ctx.parent is RegexJavaParser.AtomContext){ + // top level character class, create gene + VisitResult(CharacterRangeRxGene(multiCharRanges, currentFlags)) + } else { + // nested char class, apply negation (if necessary) + VisitResult(data = multiCharRanges) + } + } + + override fun visitClassContents(ctx: RegexJavaParser.ClassContentsContext): VisitResult { + + // intersect the unions of ranges + val mcr = ctx.classUnion() + .map { it.accept(this).data as MultiCharacterRange } + .reduce { acc, item -> MultiCharacterRange.intersect(acc, item) } + + return VisitResult(data=mcr) + } + + override fun visitClassUnion(ctx: RegexJavaParser.ClassUnionContext): VisitResult { + + return if (ctx.characterClass().isNotEmpty()) { + // union of char classes + val mcr = ctx.characterClass() + .map { it.accept(this).data as MultiCharacterRange } + .reduce { acc, item -> MultiCharacterRange.union(acc, item) } + + VisitResult(data=mcr) + } else { + // single classRanges + val ranges = ctx.classRanges().accept(this).data as List + + VisitResult(data=MultiCharacterRange(false, ranges)) + } } override fun visitClassRanges(ctx: RegexJavaParser.ClassRangesContext): VisitResult { diff --git a/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt index 8ea9050bae..94b1fc1347 100644 --- a/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt +++ b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt @@ -16,6 +16,14 @@ class MultiCharacterRange internal constructor(val ranges: List) return MultiCharacterRange(negated, characters.map { CharacterRange(it, it) }) } + operator fun invoke(negated: Boolean, multiCharRange: MultiCharacterRange): MultiCharacterRange { + return if (negated) { + MultiCharacterRange(true, multiCharRange.ranges) + } else { + multiCharRange + } + } + operator fun invoke(negated: Boolean, ranges: List): MultiCharacterRange { if (ranges.isEmpty()) { throw IllegalArgumentException("No defined ranges") @@ -26,7 +34,7 @@ class MultiCharacterRange internal constructor(val ranges: List) if (negated) { internalRanges.add(CharacterRange(Character.MIN_VALUE, Character.MAX_VALUE)) } - for (range in ranges) { + for (range in ranges.sortedBy { it.start.code }) { internalRanges = if (negated) { remove(internalRanges, CharacterRange(range.start, range.end)) } else { @@ -93,6 +101,87 @@ class MultiCharacterRange internal constructor(val ranges: List) } }.toMutableList() } + + /** + * Create an intersection from two [org.evomaster.core.utils.MultiCharacterRange] instances + * Used to allow character class intersections (e.g.: `[a-z0-9&&[0-9A-Z]]`). + */ + fun intersect(a: MultiCharacterRange, b: MultiCharacterRange): MultiCharacterRange { + val result = mutableListOf() + + var idxA = 0 + var idxB = 0 + + val lenA = a.size + val lenB = b.size + + while (idxA < lenA && idxB < lenB) { + val start = maxOf(a[idxA].start, b[idxB].start) + val end = minOf(a[idxA].end, b[idxB].end) + + if (start <= end) { + result.add(CharacterRange(start, end)) + } + + if ( a[idxA].end < b[idxB].end ) { + idxA++ + } else { + idxB++ + } + } + + return MultiCharacterRange(result) + } + + /** + * Creates a union from two [MultiCharacterRange] instances, merging overlapping + * and adjacent ranges into a single normalized [MultiCharacterRange]. + * Used to allow character class unions (e.g.: `[[a-c][x-z]]`). + */ + fun union(a: MultiCharacterRange, b: MultiCharacterRange): MultiCharacterRange { + val result = mutableListOf() + var idxA = 0 + var idxB = 0 + + while (idxA < a.size && idxB < b.size) { + // pick the range with the smaller start + val (start, end) = if (a[idxA].start <= b[idxB].start) { + a[idxA].start to a[idxA].end.also { idxA++ } + } else { + b[idxB].start to b[idxB].end.also { idxB++ } + } + + // merge with last range in result if overlapping or adjacent + if (result.isNotEmpty() && start.code <= result.last().end.code + 1) { + val last = result.removeLast() + result.add(CharacterRange(last.start, maxOf(last.end, end))) + } else { + result.add(CharacterRange(start, end)) + } + } + + // append remaining ranges from whichever list isn't exhausted + while (idxA < a.size) { + val curr = a[idxA++] + if (result.isNotEmpty() && curr.start.code <= result.last().end.code + 1) { + val last = result.removeLast() + result.add(CharacterRange(last.start, maxOf(last.end, curr.end))) + } else { + result.add(curr) + } + } + while (idxB < b.size) { + val curr = b[idxB++] + if (result.isNotEmpty() && curr.start.code <= result.last().end.code + 1) { + val last = result.removeLast() + result.add(CharacterRange(last.start, maxOf(last.end, curr.end))) + } else { + result.add(curr) + } + } + + return MultiCharacterRange(result) + } } /** diff --git a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt index 73edc55d60..f1f4d9f383 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt @@ -251,4 +251,26 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { override fun testJSExclusiveEscapes() { // JS exclusive } + + @Test + fun testIntersection(){ + checkSameAsJava("&&") + checkSameAsJava("[abc-e[f-h]ij-l[m]n]") + checkSameAsJava("[a&&a][a&&a&&a]") + checkSameAsJava("[a-z&&[aeiou]]") + checkSameAsJava("[a-z&&[^aeiou]]") + checkSameAsJava("[a-z&&[a-p]&&[f-z]]") + checkSameAsJava("[ac-e&&[a-d]]") + checkSameAsJava("[\\w&&[a-z]]") + checkSameAsJava("[a-z&&[b-y]]") + checkSameAsJava("[a-z0-9&&[A-Z0-9]&&[2B4C]]") + checkSameAsJava("[[a-c][x-z]&&[b-y]]") + checkSameAsJava("[a-c&&[b-d]e-g]") + checkSameAsJava("[^a-z&&[^aeiou]]") + checkSameAsJava("[\\s&&[^\\n]]") + checkSameAsJava("[a-c&&[c-e]]") + checkSameAsJava("[a-z&&[a-z]]") + checkSameAsJava("[a-ce-g&&[b-f]]") + checkSameAsJava("[[a-z&&[a-p]]&&[f-z]]") + } } \ No newline at end of file From 0364ea7c5f9a923c9f39590866456c672e2eb8a8 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 26 May 2026 21:31:42 -0300 Subject: [PATCH 02/17] Allowing empty MultiCharacterRange. --- .../search/gene/regex/CharacterClassEscapeRxGene.kt | 9 ++++++++- .../core/search/gene/regex/CharacterRangeRxGene.kt | 10 ++++++++-- .../org/evomaster/core/utils/MultiCharacterRange.kt | 12 +++--------- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt index 06797e7932..aa50fd01fb 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt @@ -82,7 +82,7 @@ class CharacterClassEscapeRxGene( // create both normal and negated version for all .flatMap { (key, value) -> listOf( - key to MultiCharacterRange(value), + key to MultiCharacterRange(false, value), "^$key" to MultiCharacterRange(true, value) ) }.toMap() @@ -132,6 +132,10 @@ class CharacterClassEscapeRxGene( } } + override fun isMutable(): Boolean { + return multiCharRange.isNotEmpty + } + override fun checkForLocallyValidIgnoringChildren() : Boolean{ // we pass the same embedded flags to the regex to accurately match the expected behavior return value.matches(Regex("${flags.getScopeString()}\\$type")) @@ -193,6 +197,9 @@ class CharacterClassEscapeRxGene( } override fun getValueAsPrintableString(previousGenes: List, mode: GeneUtils.EscapeMode?, targetFormat: OutputFormat?, extraCheck: Boolean): String { + if (multiCharRange.isEmpty) { + throw IllegalStateException("Cannot get value from empty CharacterClassEscape") + } return if (!flags.isCaseable(value[0])) { value[0].toString() } diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index a921d8497f..1e6553ed7f 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -15,7 +15,7 @@ import org.evomaster.core.utils.MultiCharacterRange import org.evomaster.core.utils.RegexFlags import org.slf4j.LoggerFactory -class CharacterRangeRxGene private constructor( +class CharacterRangeRxGene( /** * this represents the valid ranges for a character class, removing overlaps and applying negation */ @@ -30,7 +30,7 @@ class CharacterRangeRxGene private constructor( private val log = LoggerFactory.getLogger(CharacterRangeRxGene::class.java) } - var value : Char = validRanges[0].start + var value : Char = if (validRanges.isEmpty) '\u0000' else validRanges[0].start /** * Whether to output the character in uppercase. @@ -49,6 +49,9 @@ class CharacterRangeRxGene private constructor( } override fun isMutable(): Boolean { + if (validRanges.isEmpty) { + return false + } // check if there is more than one character or if the character is caseable return validRanges.charCount > 1 || flags.isCaseable(value) } @@ -134,6 +137,9 @@ class CharacterRangeRxGene private constructor( TODO should \ be handled specially? In any case, would have same handling as AnyCharacterRxGene */ + if (validRanges.isEmpty) { + throw IllegalStateException("Cannot get value from empty CharacterRange") + } return if (!flags.isCaseable(value)) { value.toString() } diff --git a/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt index 94b1fc1347..3101f44d8f 100644 --- a/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt +++ b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt @@ -5,10 +5,6 @@ import org.slf4j.LoggerFactory class MultiCharacterRange internal constructor(val ranges: List) { - init { - require(ranges.isNotEmpty()) { "MultiCharacterRange cannot be created with an empty list" } - } - companion object { private val log = LoggerFactory.getLogger(MultiCharacterRange::class.java) @@ -25,9 +21,6 @@ class MultiCharacterRange internal constructor(val ranges: List) } operator fun invoke(negated: Boolean, ranges: List): MultiCharacterRange { - if (ranges.isEmpty()) { - throw IllegalArgumentException("No defined ranges") - } var internalRanges = mutableListOf() @@ -204,10 +197,11 @@ class MultiCharacterRange internal constructor(val ranges: List) } currentRangeMinValue = currentRangeMaxValue } - assert(false) // internal ranges being empty should never happen - return '0' + throw IllegalStateException("Cannot sample characters from an empty char range") } + val isEmpty: Boolean get() = ranges.isEmpty() + val isNotEmpty: Boolean get() = ranges.isNotEmpty() val size: Int get() = ranges.size val charCount :Int = ranges.sumOf{ it.size } operator fun get(index: Int): CharacterRange = ranges[index] From 2b4638dcc7d513aac5090062e611435442dabfd7 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 26 May 2026 21:39:11 -0300 Subject: [PATCH 03/17] Allowing invalid back refs as empty regex. --- .../core/parser/GeneRegexJavaVisitor.kt | 16 +++++++++------- .../search/gene/regex/BackReferenceRxGene.kt | 9 +++++++-- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index 581fe2623a..5f879e3abb 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -47,7 +47,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ * Same as [captureGroups] but for named backreferences, which can be accessed * with their name or number. */ - private val namedCaptureGroups = mutableMapOf() + private val namedCaptureGroups = mutableMapOf() /** * Tracks the flags active in the current lexical scope. @@ -599,15 +599,18 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ maxDigits > allDigits.length -> allDigits.length allDigits.take(maxDigits).toInt() <= captureGroups.size -> maxDigits maxDigits > 1 -> maxDigits - 1 - else -> throw IllegalStateException( - "Backreference ${txt.take(2)} refers to group ${allDigits[0]} but only ${captureGroups.size} " + - "capture group(s) have been defined so far" - ) + else -> 1 } val n = allDigits.take(backRefDigitCount).toInt() - val result = VisitResult(BackReferenceRxGene(n, captureGroups[n - 1]!!)) + val gene = if (captureGroups.size > n-1) { + BackReferenceRxGene(n, captureGroups[n - 1]) + } else { + BackReferenceRxGene(n, null) + } + + val result = VisitResult(gene) val remainingChars = allDigits.drop(backRefDigitCount) @@ -624,7 +627,6 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ // strip "\k<" and ">" val name = txt.drop(3).dropLast(1) val group = namedCaptureGroups[name] - ?: throw IllegalStateException("Named backreference \\k<$name> refers to unknown group '$name'") val groupIndex = captureGroups.indexOf(group) + 1 // 1-based, for the gene name return VisitResult(BackReferenceRxGene(groupIndex, group)) } diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt index 4c79744212..2b77094039 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt @@ -17,7 +17,7 @@ import org.evomaster.core.search.service.mutator.genemutation.SubsetGeneMutation */ class BackReferenceRxGene( val groupIndex: Int, - val captureGroup: DisjunctionListRxGene + val captureGroup: DisjunctionListRxGene? ) : RxAtom, SimpleGene("\\$groupIndex") { override fun checkForLocallyValidIgnoringChildren(): Boolean = true @@ -59,7 +59,12 @@ class BackReferenceRxGene( mode: GeneUtils.EscapeMode?, targetFormat: OutputFormat?, extraCheck: Boolean - ): String = captureGroup.getValueAsPrintableString(targetFormat = null) + ): String { + if (captureGroup == null) { + throw IllegalStateException("Cannot get value from invalid backreference \\$groupIndex") + } + return captureGroup.getValueAsPrintableString(previousGenes, mode, targetFormat) + } override fun containsSameValueAs(other: Gene): Boolean { if (other !is BackReferenceRxGene) return false From ab958618175e782cf3d334fea82047dc4ebc19b3 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 26 May 2026 21:58:39 -0300 Subject: [PATCH 04/17] Identifying potential empty regex for later filtering. --- .../core/search/gene/regex/BackReferenceRxGene.kt | 4 ++++ .../core/search/gene/regex/CharacterRangeRxGene.kt | 2 ++ .../search/gene/regex/DisjunctionListRxGene.kt | 2 ++ .../core/search/gene/regex/DisjunctionRxGene.kt | 2 ++ .../core/search/gene/regex/QuantifierRxGene.kt | 14 +++++++++++++- .../org/evomaster/core/search/gene/regex/RxTerm.kt | 9 ++++++++- 6 files changed, 31 insertions(+), 2 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt index 2b77094039..c2ab9d6132 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt @@ -20,6 +20,10 @@ class BackReferenceRxGene( val captureGroup: DisjunctionListRxGene? ) : RxAtom, SimpleGene("\\$groupIndex") { + override fun isEffectivelyEmpty(): Boolean { + return captureGroup == null || captureGroup.isEffectivelyEmpty() + } + override fun checkForLocallyValidIgnoringChildren(): Boolean = true /** diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index 1e6553ed7f..6538718832 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -38,6 +38,8 @@ class CharacterRangeRxGene( */ var useUpperCase: Boolean = false + override fun isEffectivelyEmpty(): Boolean = validRanges.isEmpty + override fun checkForLocallyValidIgnoringChildren() : Boolean{ return validRanges.any { value in it || diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionListRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionListRxGene.kt index 43676bcfb4..6907b4caf9 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionListRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionListRxGene.kt @@ -29,6 +29,8 @@ class DisjunctionListRxGene( private val log: Logger = LoggerFactory.getLogger(DisjunctionListRxGene::class.java) } + override fun isEffectivelyEmpty(): Boolean = disjunctions.all { it.isEffectivelyEmpty() } + override fun checkForLocallyValidIgnoringChildren(): Boolean { return activeDisjunction >= 0 && activeDisjunction < disjunctions.size } diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt index b753e46a9a..19a47a2961 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt @@ -47,6 +47,8 @@ class DisjunctionRxGene( private val log : Logger = LoggerFactory.getLogger(DisjunctionRxGene::class.java) } + override fun isEffectivelyEmpty(): Boolean = + terms.isNotEmpty() && terms.any { (it as? RxTerm)?.isEffectivelyEmpty() == true } override fun checkForLocallyValidIgnoringChildren() : Boolean{ return true diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt index d6d5469404..955944a7fe 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt @@ -62,7 +62,7 @@ class QuantifierRxGene( max } - if(min == limitedMax && !template.isMutable()){ + if(!isTemplateEffectivelyEmpty() && min == limitedMax && !template.isMutable()){ /* this means this whole gene is immutable. still need to initialize it */ @@ -74,6 +74,10 @@ class QuantifierRxGene( } } + private fun isTemplateEffectivelyEmpty() : Boolean = (template as? RxTerm)?.isEffectivelyEmpty() == true + + override fun isEffectivelyEmpty(): Boolean = min > 0 && isTemplateEffectivelyEmpty() + override fun checkForLocallyValidIgnoringChildren() : Boolean{ val n = getViewOfChildren().size return n in min..limitedMax @@ -116,6 +120,9 @@ class QuantifierRxGene( } override fun isMutable(): Boolean { + if (isTemplateEffectivelyEmpty()) { + return false + } return min != limitedMax || template.isMutable() } @@ -176,6 +183,11 @@ class QuantifierRxGene( } fun addNewAtom(randomness: Randomness, forceNewValue: Boolean){ + + if (isTemplateEffectivelyEmpty()) { + return + } + val base = template.copy() base.resetLocalIdRecursively() base.doInitialize(randomness) diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt index 274bfb5adb..bf5af37879 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt @@ -4,4 +4,11 @@ import org.evomaster.core.search.StructuralElement import org.evomaster.core.search.gene.Gene -interface RxTerm \ No newline at end of file +interface RxTerm { + /** + * Returns true if this gene can never produce a valid value, + * for example an empty character class intersection like [a&&b]. + * Used at construction time to filter unsatisfiable branches from disjunctions. + */ + fun isEffectivelyEmpty(): Boolean = false +} \ No newline at end of file From 0de98aafb0ccb16ad2a863321e122beb8c74e8ee Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 26 May 2026 21:59:23 -0300 Subject: [PATCH 05/17] quick fix --- .../org/evomaster/core/search/gene/regex/QuantifierRxGene.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt index 955944a7fe..0615846bb2 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt @@ -49,8 +49,8 @@ class QuantifierRxGene( if (min < 0) { throw IllegalArgumentException("Invalid min value '$min': should be positive") } - if (max < 1) { - throw IllegalArgumentException("Invalid max value '$max': should be at least 1") + if (max < 0) { + throw IllegalArgumentException("Invalid max value '$max': should be positive") } if (min > max) { throw IllegalArgumentException("Invalid min-max values '$min-$max': min is greater than max") From 218807078d2e74e288a418093fa488f5b641548b Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 26 May 2026 22:20:01 -0300 Subject: [PATCH 06/17] Filtering empty regex alternatives from patterns, allowing for sampling from regex with at least one non-empty alternative. --- .../core/parser/GeneRegexJavaVisitor.kt | 102 +++++++++++++----- .../org/evomaster/core/parser/VisitResult.kt | 3 +- 2 files changed, 80 insertions(+), 25 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index 5f879e3abb..17b26756a4 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -73,6 +73,25 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ ) } + private fun buildDisjunctionList(ctx: RegexJavaParser.DisjunctionContext): DisjunctionListRxGene? { + val res = ctx.accept(this) + val validDisjunctions = res.genes.map { it as DisjunctionRxGene } + + if (validDisjunctions.isEmpty()) { + return null + } + + val disjList = DisjunctionListRxGene(validDisjunctions) + + //TODO tmp hack until full handling of ^$. Assume full match when nested disjunctions + for (gene in disjList.disjunctions) { + gene.extraPrefix = false + gene.extraPostfix = false + gene.matchStart = true + gene.matchEnd = true + } + return disjList + } override fun visitPattern(ctx: RegexJavaParser.PatternContext): VisitResult { @@ -80,6 +99,10 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val text = RegexUtils.getRegexExpByParserRuleContext(ctx) + if (res.genes.isEmpty()) { + throw IllegalStateException("Regex is unsatisfiable: no valid strings can be generated: $text") + } + val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene }) // we remove the token from end of the string to store as sourceRegex @@ -101,9 +124,16 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val matchStart = assertionMatches.first val matchEnd = assertionMatches.second - val disj = DisjunctionRxGene("disj", altRes.genes.map { it }, matchStart, matchEnd) + val res = VisitResult() - val res = VisitResult(disj) + if (!altRes.hadFilteredContent) { + val disj = DisjunctionRxGene("disj", altRes.genes.map { it }, matchStart, matchEnd) + // add if genuinely empty (matches "") OR has non-empty terms + if (disj.terms.isEmpty() || !disj.isEffectivelyEmpty()) { + res.genes.add(disj) + } + // else: terms exist but all effectively empty ([a&&b], \1 etc), skip + } if(ctx.disjunction() != null){ val disjRes = ctx.disjunction().accept(this) @@ -142,6 +172,14 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ for (j in i + 1 until ctx.term().size) { val resTerm = ctx.term()[j].accept(this) + if (resTerm.hadFilteredContent) { + currentFlags = previous + val r = VisitResult() + r.hadFilteredContent = true + r.data = Pair(false, false) + return r + } + // this condition isolates the back ref case, preserving original behavior otherwise. if (ctx.term()[j].atom()?.atomEscape()?.BackReference() != null){ // if term is a BackReference we addAll genes from result as there may be more than one if digits are dropped @@ -161,6 +199,13 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val resTerm = ctx.term()[i].accept(this) val gene = resTerm.genes.firstOrNull() + if (resTerm.hadFilteredContent) { + val r = VisitResult() + r.hadFilteredContent = true + r.data = Pair(false, false) + return r + } + // this condition isolates the back ref case, preserving original behavior otherwise. if (ctx.term()[i].atom()?.atomEscape()?.BackReference() != null){ // if term is a BackReference we addAll genes from result as there may be more than one if digits are dropped @@ -169,7 +214,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ // term is not a back ref: we use the default behavior, term results may only have 0-1 genes // if there is a gene, we add it to result res.genes.add(gene) - } else { + } else if (resTerm.data is String) { val assertion = resTerm.data as String if(i==0 && assertion == "^"){ @@ -202,6 +247,23 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ } val resAtom = ctx.atom().accept(this) + + if (resAtom.hadFilteredContent) { + // check if there's a quantifier that allows zero matches + // if so, the term is valid and produces "", don't propagate hadFilteredContent + if (ctx.quantifier() != null) { + val limits = ctx.quantifier().accept(this).data as Pair + if (limits.first == 0) { + // min=0, so zero matches is valid, term produces "", not unsatisfiable + return res // empty genes, no hadFilteredContent + } + } + // no quantifier or min>0, propagate + + res.hadFilteredContent = true + return res + } + val atom = resAtom.genes.firstOrNull() ?: return res @@ -306,21 +368,15 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ currentFlags = merged - val res = ctx.disjunction().accept(this) + val disjList = buildDisjunctionList(ctx.disjunction()) currentFlags = previous - val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene }) - - //TODO tmp hack until full handling of ^$. Assume full match when nested disjunctions - for (gene in disjList.disjunctions) { - gene.extraPrefix = false - gene.extraPostfix = false - gene.matchStart = true - gene.matchEnd = true + return if (disjList != null) { + VisitResult(disjList) + } else { + VisitResult().also{ it.hadFilteredContent = true } } - - return VisitResult(disjList) } if(ctx.quote() != null){ @@ -356,15 +412,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val res = ctx.disjunction().accept(this) - val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene }) - - //TODO tmp hack until full handling of ^$. Assume full match when nested disjunctions - for(gene in disjList.disjunctions){ - gene.extraPrefix = false - gene.extraPostfix = false - gene.matchStart = true - gene.matchEnd = true - } + val disjList = buildDisjunctionList(ctx.disjunction()) val isCapturingGroup = !ctx.text.startsWith("(?:") val isNamedCaptureGroup = ctx.NAMED_CAPTURE_GROUP_OPEN() != null @@ -380,7 +428,13 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ namedCaptureGroups[name] = disjList } - return VisitResult(disjList) + return if (disjList != null) { + VisitResult(disjList) + } else { + VisitResult().also{ + it.hadFilteredContent = true + } + } } if(ctx.DOT() != null){ diff --git a/core/src/main/kotlin/org/evomaster/core/parser/VisitResult.kt b/core/src/main/kotlin/org/evomaster/core/parser/VisitResult.kt index 61f6bd4987..d780d5f44b 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/VisitResult.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/VisitResult.kt @@ -7,7 +7,8 @@ import org.evomaster.core.search.gene.Gene */ class VisitResult( val genes: MutableList = mutableListOf(), - var data: Any? = null + var data: Any? = null, + var hadFilteredContent: Boolean = false ){ constructor(gene: Gene) : this() { From 45db61deaa5ee21c5fa5ecddbd9bbb9a56590416 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 26 May 2026 22:51:08 -0300 Subject: [PATCH 07/17] Added some tests regarding empty regex alternatives --- .../core/parser/GeneRegexJavaVisitorTest.kt | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt index f1f4d9f383..e57ff7cb70 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt @@ -2,6 +2,7 @@ package org.evomaster.core.parser import org.evomaster.core.search.gene.regex.RegexGene import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows /** * Created by arcuri82 on 11-Sep-19. @@ -272,5 +273,37 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { checkSameAsJava("[a-z&&[a-z]]") checkSameAsJava("[a-ce-g&&[b-f]]") checkSameAsJava("[[a-z&&[a-p]]&&[f-z]]") + checkSameAsJava("[[a-c&&[d-f]][x-z]]") + checkSameAsJava("[a-c&&[b-d]]|[x&&y]") + } + + @Test + fun testEmptyAlternatives() { + assertThrows{ checkSameAsJava("[a&&b]") } + checkSameAsJava("[a&&b]|c") + checkSameAsJava("0|[a&&b]|c") + assertThrows { checkSameAsJava("[a&&b]|[c&&d]") } + assertThrows { checkSameAsJava("[a&&b]|[c&&d]|[e&&f]") } + checkSameAsJava("a|[b&&c]|d") + checkSameAsJava("([a&&b]|c)d") + } + + @Test + fun testInvalidBackRefs() { + assertThrows { checkSameAsJava("\\1") } + checkSameAsJava("\\1|c") + assertThrows { checkSameAsJava("(a)\\2") } + checkSameAsJava("(a)\\2|b") + assertThrows { checkSameAsJava("(\\1)") } + checkSameAsJava("(\\1|a)") + assertThrows { checkSameAsJava("\\1(a)") } + checkSameAsJava("\\1(a)|b") + checkSameAsJava("(a)(\\1|\\2|c)") + assertThrows { checkSameAsJava("\\1|\\2|\\3") } + checkSameAsJava("(\\2|a)|b") + checkSameAsJava("\\1|[a&&b]|c") + assertThrows { checkSameAsJava("\\1|[a&&b]") } + checkSameAsJava("([a&b])|b\\1") + assertThrows { checkSameAsJava("([a&&b])|b\\1") } } } \ No newline at end of file From a9b26b4553fcc6bf03be845b354aa9ed62fdd402 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 26 May 2026 22:54:32 -0300 Subject: [PATCH 08/17] Added more tests regarding empty regex alternatives --- .../core/parser/GeneRegexJavaVisitorTest.kt | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt index e57ff7cb70..b1af8b0807 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt @@ -305,5 +305,90 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { assertThrows { checkSameAsJava("\\1|[a&&b]") } checkSameAsJava("([a&b])|b\\1") assertThrows { checkSameAsJava("([a&&b])|b\\1") } + assertThrows { checkSameAsJava("\\k") } + } + + @Test + fun testEmptyWithFlagGroup() { + checkSameAsJava("(?i:)") + checkSameAsJava("(?i:)|c") + assertThrows { checkSameAsJava("(?i:[a&&b])") } + checkSameAsJava("(?i:[a&&b])|c") + checkSameAsJava("(?i:[a&&b]|c)") + assertThrows { checkSameAsJava("(?i:(?u:[a&&b]))") } + checkSameAsJava("(?i:(?u:[a&&b])|c)") + } + + @Test + fun testEmptyWithFlagScope() { + checkSameAsJava("(?iu)") + checkSameAsJava("^(?iu)") + assertThrows { checkSameAsJava("(?iu)[a&&b]") } + checkSameAsJava("(?iu)[a&&b]|c") + assertThrows { checkSameAsJava("^(?iu)[a&&b]$") } + checkSameAsJava("^(?iu)[a&&b]$|c") + } + + @Test + fun testEmptyWithAnchors() { + checkSameAsJava("^$") + assertThrows { checkSameAsJava("^[a&&b]$") } + checkSameAsJava("^[a&&b]$|c") + checkSameAsJava("^(?i:abc)$") + assertThrows { checkSameAsJava("^([a&&b])$") } + checkSameAsJava("^([a&&b]|c)$") + } + + @Test + fun testEmptyWithQuantifiers() { + checkSameAsJava("[a&&b]*") + checkSameAsJava("[a&&b]*c") + checkSameAsJava("[a&&b]?") + checkSameAsJava("[a&&b]?c") + checkSameAsJava("[a&&b]{0,}") + checkSameAsJava("[a&&b]{0}") + checkSameAsJava("([a&&b])*") + checkSameAsJava("([a&&b])*c") + assertThrows { checkSameAsJava("[a&&b]+") } + checkSameAsJava("[a&&b]+|c") + assertThrows { checkSameAsJava("[a&&b]{1,}") } + checkSameAsJava("[a&&b]{1,}|c") + assertThrows { checkSameAsJava("[a&&b]{1}") } + checkSameAsJava("[a&&b]{1}|c") + assertThrows { checkSameAsJava("[a&&b]{2,4}") } + checkSameAsJava("[a&&b]{2,4}|c") + assertThrows { checkSameAsJava("([a&&b])+") } + checkSameAsJava("([a&&b])+|c") + checkSameAsJava("[a&&b]{3}|c") + checkSameAsJava("[a&&b]{3,3}|c") + checkSameAsJava("[a&&b]{0,0}|c") + } + + @Test + fun testEmptyWithBackRefsAndQuantifiers() { + checkSameAsJava("(a)\\1*") + checkSameAsJava("\\1*c") + checkSameAsJava("\\1?c") + checkSameAsJava("(\\1*)") + assertThrows { checkSameAsJava("\\1+") } + checkSameAsJava("\\1+|c") + assertThrows { checkSameAsJava("(\\1+)") } + checkSameAsJava("(\\1+)|c") + } + + @Test + fun testEmptyNestedGroups() { + checkSameAsJava("(?:)") + checkSameAsJava("(?:)|c") + assertThrows { checkSameAsJava("(?:[a&&b])") } + checkSameAsJava("(?:[a&&b])|c") + checkSameAsJava("([a&&b])|c") + checkSameAsJava("([a&&b]|[c&&d])|e") + checkSameAsJava("(([a&&b])|([c&&d]))|e") + checkSameAsJava("((([a&&b]|[c&&d])|[e&&f])|g)") + checkSameAsJava("(g|(([a&&b]|[c&&d])|[e&&f]))") + assertThrows { checkSameAsJava("(?[a&&b])") } + checkSameAsJava("(?[a&&b])|c") + assertThrows { checkSameAsJava("(?[a&&b])|c\\k") } } } \ No newline at end of file From 466ee5490c83612359bc3f20d1ff06c259214a27 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Wed, 3 Jun 2026 15:54:19 -0300 Subject: [PATCH 09/17] Removed bool from VisitResult used to represent unsat. regexes. --- .../core/parser/GeneRegexJavaVisitor.kt | 87 ++++++++----------- .../org/evomaster/core/parser/VisitResult.kt | 3 +- 2 files changed, 35 insertions(+), 55 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index 17b26756a4..9169957860 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -77,11 +77,13 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val res = ctx.accept(this) val validDisjunctions = res.genes.map { it as DisjunctionRxGene } - if (validDisjunctions.isEmpty()) { + val nonEmptyDisj = validDisjunctions.filter{ !it.isEffectivelyEmpty() } + + if(nonEmptyDisj.isEmpty()){ return null } - val disjList = DisjunctionListRxGene(validDisjunctions) + val disjList = DisjunctionListRxGene(nonEmptyDisj) //TODO tmp hack until full handling of ^$. Assume full match when nested disjunctions for (gene in disjList.disjunctions) { @@ -99,11 +101,11 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val text = RegexUtils.getRegexExpByParserRuleContext(ctx) - if (res.genes.isEmpty()) { - throw IllegalStateException("Regex is unsatisfiable: no valid strings can be generated: $text") - } + val nonEmptyDisj = res.genes + .map { it as DisjunctionRxGene } + .filter{ !it.isEffectivelyEmpty() } - val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene }) + val disjList = DisjunctionListRxGene(nonEmptyDisj) // we remove the token from end of the string to store as sourceRegex val gene = RegexGene( @@ -126,14 +128,17 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val res = VisitResult() - if (!altRes.hadFilteredContent) { + // add disjunction if it has genes, OR if the alternative was purely assertions (^$) or flag scopes + // in that case altRes.genes is empty but the alternative is valid (matches "") + val hasOnlyAssertionsOrFlagScopes = ctx.alternative().term().isNotEmpty() && + ctx.alternative().term().all { it.assertion() != null || it.FLAG_SCOPE_OPEN() != null } + + if (altRes.genes.isNotEmpty() || hasOnlyAssertionsOrFlagScopes || ctx.alternative().term().isEmpty()) { val disj = DisjunctionRxGene("disj", altRes.genes.map { it }, matchStart, matchEnd) - // add if genuinely empty (matches "") OR has non-empty terms - if (disj.terms.isEmpty() || !disj.isEffectivelyEmpty()) { - res.genes.add(disj) - } - // else: terms exist but all effectively empty ([a&&b], \1 etc), skip + + res.genes.add(disj) } + // else: had non-assertion terms but all produced nothing (empty char class etc.), skip if(ctx.disjunction() != null){ val disjRes = ctx.disjunction().accept(this) @@ -172,14 +177,6 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ for (j in i + 1 until ctx.term().size) { val resTerm = ctx.term()[j].accept(this) - if (resTerm.hadFilteredContent) { - currentFlags = previous - val r = VisitResult() - r.hadFilteredContent = true - r.data = Pair(false, false) - return r - } - // this condition isolates the back ref case, preserving original behavior otherwise. if (ctx.term()[j].atom()?.atomEscape()?.BackReference() != null){ // if term is a BackReference we addAll genes from result as there may be more than one if digits are dropped @@ -199,13 +196,6 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val resTerm = ctx.term()[i].accept(this) val gene = resTerm.genes.firstOrNull() - if (resTerm.hadFilteredContent) { - val r = VisitResult() - r.hadFilteredContent = true - r.data = Pair(false, false) - return r - } - // this condition isolates the back ref case, preserving original behavior otherwise. if (ctx.term()[i].atom()?.atomEscape()?.BackReference() != null){ // if term is a BackReference we addAll genes from result as there may be more than one if digits are dropped @@ -247,30 +237,23 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ } val resAtom = ctx.atom().accept(this) - - if (resAtom.hadFilteredContent) { - // check if there's a quantifier that allows zero matches - // if so, the term is valid and produces "", don't propagate hadFilteredContent - if (ctx.quantifier() != null) { - val limits = ctx.quantifier().accept(this).data as Pair - if (limits.first == 0) { - // min=0, so zero matches is valid, term produces "", not unsatisfiable - return res // empty genes, no hadFilteredContent - } - } - // no quantifier or min>0, propagate - - res.hadFilteredContent = true - return res - } - val atom = resAtom.genes.firstOrNull() - ?: return res if(ctx.quantifier() != null){ val limits = ctx.quantifier().accept(this).data as Pair + // if quantified atom is unsatisfiable we must then check the limits + if (atom == null || (atom as? RxTerm)?.isEffectivelyEmpty() == true) { + return if (limits.first == 0) { + // if 0 appearances is allowed then the regex is satisfiable only with empty string + VisitResult(PatternCharacterBlockGene("0_QuantifierOnEmptyRegex", "")) + } else { + // if not then unsatisfiable, return with no genes + res + } + } + // if atom is not a back ref then we use the default behavior, results may only have one gene var template: Gene = atom @@ -283,7 +266,9 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ res.genes.addAll(resAtom.genes.dropLast(1)) // the last gene gets wrapped with the quantifier gene, then that gets added to result - template = resAtom.genes.last() + if (resAtom.genes.isNotEmpty()) { + template = resAtom.genes.last() + } } val q = QuantifierRxGene("q", template, limits.first, limits.second) @@ -295,7 +280,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ if (ctx.atom()?.atomEscape()?.BackReference() != null){ // if atom is a BackReference we addAll genes from result as there may be more than one if digits are dropped res.genes.addAll(resAtom.genes) - } else { + } else if (atom != null) { // if atom is not a back ref we fall back to the default behavior, results only have one gene res.genes.add(atom) } @@ -375,7 +360,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ return if (disjList != null) { VisitResult(disjList) } else { - VisitResult().also{ it.hadFilteredContent = true } + VisitResult() } } @@ -410,8 +395,6 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val groupIndex = captureGroups.size captureGroups.add(null) // add placeholder for the gene - val res = ctx.disjunction().accept(this) - val disjList = buildDisjunctionList(ctx.disjunction()) val isCapturingGroup = !ctx.text.startsWith("(?:") @@ -431,9 +414,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ return if (disjList != null) { VisitResult(disjList) } else { - VisitResult().also{ - it.hadFilteredContent = true - } + VisitResult() } } diff --git a/core/src/main/kotlin/org/evomaster/core/parser/VisitResult.kt b/core/src/main/kotlin/org/evomaster/core/parser/VisitResult.kt index d780d5f44b..61f6bd4987 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/VisitResult.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/VisitResult.kt @@ -7,8 +7,7 @@ import org.evomaster.core.search.gene.Gene */ class VisitResult( val genes: MutableList = mutableListOf(), - var data: Any? = null, - var hadFilteredContent: Boolean = false + var data: Any? = null ){ constructor(gene: Gene) : this() { From 160bc29fdc5ec38bbdd0147db6e1a4e2a9ae5099 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Wed, 3 Jun 2026 15:56:19 -0300 Subject: [PATCH 10/17] Removed redundant overrides for isEffectivelyEmpty method. --- .../gene/regex/CharacterClassEscapeRxGene.kt | 6 ++++-- .../core/search/gene/regex/CharacterRangeRxGene.kt | 6 +++--- .../search/gene/regex/DisjunctionListRxGene.kt | 2 -- .../core/search/gene/regex/QuantifierRxGene.kt | 14 +------------- .../core/parser/GeneRegexJavaVisitorTest.kt | 6 +++++- 5 files changed, 13 insertions(+), 21 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt index aa50fd01fb..133f1f4fc0 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt @@ -132,8 +132,10 @@ class CharacterClassEscapeRxGene( } } + override fun isEffectivelyEmpty(): Boolean = multiCharRange.isEmpty + override fun isMutable(): Boolean { - return multiCharRange.isNotEmpty + return !isEffectivelyEmpty() } override fun checkForLocallyValidIgnoringChildren() : Boolean{ @@ -197,7 +199,7 @@ class CharacterClassEscapeRxGene( } override fun getValueAsPrintableString(previousGenes: List, mode: GeneUtils.EscapeMode?, targetFormat: OutputFormat?, extraCheck: Boolean): String { - if (multiCharRange.isEmpty) { + if (isEffectivelyEmpty()) { throw IllegalStateException("Cannot get value from empty CharacterClassEscape") } return if (!flags.isCaseable(value[0])) { diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index 6538718832..08fa1fdf52 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -30,7 +30,7 @@ class CharacterRangeRxGene( private val log = LoggerFactory.getLogger(CharacterRangeRxGene::class.java) } - var value : Char = if (validRanges.isEmpty) '\u0000' else validRanges[0].start + var value : Char = if (isEffectivelyEmpty()) '\u0000' else validRanges[0].start /** * Whether to output the character in uppercase. @@ -51,7 +51,7 @@ class CharacterRangeRxGene( } override fun isMutable(): Boolean { - if (validRanges.isEmpty) { + if (isEffectivelyEmpty()) { return false } // check if there is more than one character or if the character is caseable @@ -139,7 +139,7 @@ class CharacterRangeRxGene( TODO should \ be handled specially? In any case, would have same handling as AnyCharacterRxGene */ - if (validRanges.isEmpty) { + if (isEffectivelyEmpty()) { throw IllegalStateException("Cannot get value from empty CharacterRange") } return if (!flags.isCaseable(value)) { diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionListRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionListRxGene.kt index 6907b4caf9..43676bcfb4 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionListRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionListRxGene.kt @@ -29,8 +29,6 @@ class DisjunctionListRxGene( private val log: Logger = LoggerFactory.getLogger(DisjunctionListRxGene::class.java) } - override fun isEffectivelyEmpty(): Boolean = disjunctions.all { it.isEffectivelyEmpty() } - override fun checkForLocallyValidIgnoringChildren(): Boolean { return activeDisjunction >= 0 && activeDisjunction < disjunctions.size } diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt index 0615846bb2..1ed5edf059 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt @@ -62,7 +62,7 @@ class QuantifierRxGene( max } - if(!isTemplateEffectivelyEmpty() && min == limitedMax && !template.isMutable()){ + if(min == limitedMax && !template.isMutable()){ /* this means this whole gene is immutable. still need to initialize it */ @@ -74,10 +74,6 @@ class QuantifierRxGene( } } - private fun isTemplateEffectivelyEmpty() : Boolean = (template as? RxTerm)?.isEffectivelyEmpty() == true - - override fun isEffectivelyEmpty(): Boolean = min > 0 && isTemplateEffectivelyEmpty() - override fun checkForLocallyValidIgnoringChildren() : Boolean{ val n = getViewOfChildren().size return n in min..limitedMax @@ -120,9 +116,6 @@ class QuantifierRxGene( } override fun isMutable(): Boolean { - if (isTemplateEffectivelyEmpty()) { - return false - } return min != limitedMax || template.isMutable() } @@ -183,11 +176,6 @@ class QuantifierRxGene( } fun addNewAtom(randomness: Randomness, forceNewValue: Boolean){ - - if (isTemplateEffectivelyEmpty()) { - return - } - val base = template.copy() base.resetLocalIdRecursively() base.doInitialize(randomness) diff --git a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt index b1af8b0807..2e68d030ee 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt @@ -255,7 +255,6 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { @Test fun testIntersection(){ - checkSameAsJava("&&") checkSameAsJava("[abc-e[f-h]ij-l[m]n]") checkSameAsJava("[a&&a][a&&a&&a]") checkSameAsJava("[a-z&&[aeiou]]") @@ -273,6 +272,8 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { checkSameAsJava("[a-z&&[a-z]]") checkSameAsJava("[a-ce-g&&[b-f]]") checkSameAsJava("[[a-z&&[a-p]]&&[f-z]]") + checkSameAsJava("[a[b[c[d&&[\\w]]]][0-7&&\\d&&[0-5]&&1-5]]") + checkSameAsJava("&&") checkSameAsJava("[[a-c&&[d-f]][x-z]]") checkSameAsJava("[a-c&&[b-d]]|[x&&y]") } @@ -306,6 +307,8 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { checkSameAsJava("([a&b])|b\\1") assertThrows { checkSameAsJava("([a&&b])|b\\1") } assertThrows { checkSameAsJava("\\k") } + assertThrows { checkSameAsJava("((\\1|\\2)+)") } + checkSameAsJava("((\\1|\\2)*)") } @Test @@ -327,6 +330,7 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { checkSameAsJava("(?iu)[a&&b]|c") assertThrows { checkSameAsJava("^(?iu)[a&&b]$") } checkSameAsJava("^(?iu)[a&&b]$|c") + checkSameAsJava("^(?iu)([a&&b]$|c)") } @Test From e717bae9b51668ad32513819ee93bcbb3dc4a547 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Wed, 3 Jun 2026 17:30:37 -0300 Subject: [PATCH 11/17] Edge case fixed. --- .../main/antlr4/org/evomaster/core/parser/RegexJava.g4 | 10 ++++++---- .../org/evomaster/core/parser/GeneRegexJavaVisitor.kt | 3 ++- .../evomaster/core/parser/GeneRegexJavaVisitorTest.kt | 4 +++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 index 5a5fc8d8a0..7abdcbdeae 100644 --- a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 +++ b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 @@ -189,19 +189,17 @@ patternCharacter | BRACE_close | BRACKET_close | COLON - | INTERSECTION + | DOUBLE_AMPERSAND ; -INTERSECTION : '&&' ; - characterClass : BRACKET_open CARET classContents BRACKET_close | BRACKET_open classContents BRACKET_close ; classContents - : classUnion (INTERSECTION classUnion)* + : classUnion (DOUBLE_AMPERSAND classUnion)* ; classUnion @@ -271,6 +269,10 @@ atomEscape //------ LEXER ------------------------------ // Lexer rules have first letter in upper-case +DOUBLE_AMPERSAND + : '&&' + ; + DecimalDigit : [0-9] ; diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index 9169957860..6b1c0343e6 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -244,7 +244,8 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val limits = ctx.quantifier().accept(this).data as Pair // if quantified atom is unsatisfiable we must then check the limits - if (atom == null || (atom as? RxTerm)?.isEffectivelyEmpty() == true) { + if (atom == null || + ((atom as? RxTerm)?.isEffectivelyEmpty() == true) && resAtom.genes.size == 1) { return if (limits.first == 0) { // if 0 appearances is allowed then the regex is satisfiable only with empty string VisitResult(PatternCharacterBlockGene("0_QuantifierOnEmptyRegex", "")) diff --git a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt index 2e68d030ee..790c864101 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt @@ -254,7 +254,7 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { } @Test - fun testIntersection(){ + fun testCharClassIntersectionSubtractionAndNesting(){ checkSameAsJava("[abc-e[f-h]ij-l[m]n]") checkSameAsJava("[a&&a][a&&a&&a]") checkSameAsJava("[a-z&&[aeiou]]") @@ -309,6 +309,8 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { assertThrows { checkSameAsJava("\\k") } assertThrows { checkSameAsJava("((\\1|\\2)+)") } checkSameAsJava("((\\1|\\2)*)") + checkSameAsJava("(\\12)*") + assertThrows { checkSameAsJava("\\12*") } } @Test From a00f566457cef858a812bfac842ae25b8deec66b Mon Sep 17 00:00:00 2001 From: lmasroca Date: Sun, 7 Jun 2026 20:45:06 -0300 Subject: [PATCH 12/17] Added explicit throw when regex is unsatisfiable. --- .../kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt | 4 ++++ .../kotlin/org/evomaster/core/utils/MultiCharacterRange.kt | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index a83971f7c2..a61f23a1a3 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -105,6 +105,10 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ .map { it as DisjunctionRxGene } .filter{ !it.isEffectivelyEmpty() } + if (nonEmptyDisj.isEmpty()) { + throw IllegalStateException("Regex is unsatisfiable.") + } + val disjList = DisjunctionListRxGene(nonEmptyDisj) // we remove the token from end of the string to store as sourceRegex diff --git a/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt index c5e3c18582..3056323358 100644 --- a/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt +++ b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt @@ -27,7 +27,7 @@ class MultiCharacterRange internal constructor(val ranges: List) if (negated) { internalRanges.add(CharacterRange(Character.MIN_VALUE, Character.MAX_VALUE)) } - for (range in ranges) { + for (range in ranges.sortedBy { it.start }) { internalRanges = if (negated) { remove(internalRanges, CharacterRange(range.start, range.end)) } else { @@ -51,7 +51,7 @@ class MultiCharacterRange internal constructor(val ranges: List) var currentEnd = toAdd.end var merged = false - for ((start, end) in internalRanges.sortedBy { it.start }) { + for ((start, end) in internalRanges) { when { end.code < currentStart.code - 1 -> newInternalRanges += CharacterRange(start, end) start.code > currentEnd.code + 1 -> { From c0a7587198d36628446278430499e4f7d8a41359 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Mon, 8 Jun 2026 13:39:40 -0300 Subject: [PATCH 13/17] Made some adjustments and added some comments. --- .../core/parser/GeneRegexJavaVisitor.kt | 19 +++++++++---------- .../core/parser/GeneRegexJavaVisitorTest.kt | 2 ++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index a61f23a1a3..bb041a9a0f 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -223,6 +223,9 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ */ throw IllegalStateException("Cannot support $assertion at position $i") } + } else { + // unsatisfiable term, return with no genes + return VisitResult(data=Pair(false, false)) } } @@ -271,9 +274,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ res.genes.addAll(resAtom.genes.dropLast(1)) // the last gene gets wrapped with the quantifier gene, then that gets added to result - if (resAtom.genes.isNotEmpty()) { - template = resAtom.genes.last() - } + template = resAtom.genes.last() } val q = QuantifierRxGene("q", template, limits.first, limits.second) @@ -289,6 +290,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ // if atom is not a back ref we fall back to the default behavior, results only have one gene res.genes.add(atom) } + // else atom is unsatisfiable, return no genes } return res @@ -644,13 +646,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val n = allDigits.take(backRefDigitCount).toInt() - val gene = if (captureGroups.size > n-1) { - BackReferenceRxGene(n, captureGroups[n - 1]) - } else { - BackReferenceRxGene(n, null) - } - - val result = VisitResult(gene) + val result = VisitResult(BackReferenceRxGene(n, captureGroups.getOrNull(n - 1))) val remainingChars = allDigits.drop(backRefDigitCount) @@ -666,6 +662,9 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ if (ctx.NamedBackReference() != null) { // strip "\k<" and ">" val name = txt.drop(3).dropLast(1) + if(name !in namedCaptureGroups){ + throw IllegalStateException("Named backreference \\k<$name> refers to unknown group '$name'") + } val group = namedCaptureGroups[name] val groupIndex = captureGroups.indexOf(group) + 1 // 1-based, for the gene name return VisitResult(BackReferenceRxGene(groupIndex, group)) diff --git a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt index 790c864101..e7ea3ed26d 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt @@ -396,5 +396,7 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { assertThrows { checkSameAsJava("(?[a&&b])") } checkSameAsJava("(?[a&&b])|c") assertThrows { checkSameAsJava("(?[a&&b])|c\\k") } + assertThrows { checkSameAsJava("a([b&&c])d") } + assertThrows { checkSameAsJava("abc|\\k") } } } \ No newline at end of file From 7a26ffbaa9bd538eefb042d447a9a41e7b681bd1 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 9 Jun 2026 13:26:21 -0300 Subject: [PATCH 14/17] Added some comments. --- .../org/evomaster/core/parser/GeneRegexJavaVisitor.kt | 7 +++++++ .../core/search/gene/regex/BackReferenceRxGene.kt | 2 ++ 2 files changed, 9 insertions(+) diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index bb041a9a0f..f58173f879 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -40,12 +40,14 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ * Capture groups in order of appearance (1-based index -> list index 0). * Populated as the tree is walked. A backreference is only valid if it * appears after the group it references, which Java regex requires anyway. + * Nullable DisjunctionListRxGene to represent unsatisfiable backreference. */ private val captureGroups = mutableListOf() /** * Same as [captureGroups] but for named backreferences, which can be accessed * with their name or number. + * Nullable DisjunctionListRxGene to represent unsatisfiable backreference. */ private val namedCaptureGroups = mutableMapOf() @@ -73,6 +75,9 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ ) } + /** + * Builds DisjunctionListRxGenes from a disjunction context, returns null if disjunction is unsatisfiable. + */ private fun buildDisjunctionList(ctx: RegexJavaParser.DisjunctionContext): DisjunctionListRxGene? { val res = ctx.accept(this) val validDisjunctions = res.genes.map { it as DisjunctionRxGene } @@ -367,6 +372,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ return if (disjList != null) { VisitResult(disjList) } else { + // unsatisfiable, return with no genes. VisitResult() } } @@ -421,6 +427,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ return if (disjList != null) { VisitResult(disjList) } else { + // unsatisfiable, return with no genes. VisitResult() } } diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt index c2ab9d6132..6cc88fb127 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt @@ -14,6 +14,8 @@ import org.evomaster.core.search.service.mutator.genemutation.SubsetGeneMutation * Represents a backreference \N in a regex (N being a number). * Its value is always identical to the current value of its [captureGroup]. * It has no independent state and is therefore immutable. + * If capture group is null then the referenced group was unsatisfiable, + * in which case the same is true for the backreference to it. */ class BackReferenceRxGene( val groupIndex: Int, From a7e6dd77799918e383b4c979a64af09581927c67 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 9 Jun 2026 13:31:14 -0300 Subject: [PATCH 15/17] Added another comment. --- .../kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index f58173f879..68d37f6d6a 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -85,6 +85,8 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val nonEmptyDisj = validDisjunctions.filter{ !it.isEffectivelyEmpty() } if(nonEmptyDisj.isEmpty()){ + // As DisjunctionListRxGene extends CompositeFixedGene, its disjunctions list cannot be empty. + // In this case we return null to represent an unsatisfiable DisjunctionListRxGene. return null } From 7291f0b775f00d282143fba27c55ea063eaea96a Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 9 Jun 2026 18:15:25 -0300 Subject: [PATCH 16/17] Added comment explaining nullable DisjunctionListRxGene in capture group maps. --- .../org/evomaster/core/parser/GeneRegexJavaVisitor.kt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index 68d37f6d6a..697d0a2260 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -40,14 +40,20 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ * Capture groups in order of appearance (1-based index -> list index 0). * Populated as the tree is walked. A backreference is only valid if it * appears after the group it references, which Java regex requires anyway. - * Nullable DisjunctionListRxGene to represent unsatisfiable backreference. + * The value is nullable to represent a captured group that is unsatisfiable, + * for example when the group contains an empty character class like `([a&&b])`. + * In that case the map holds null instead of a DisjunctionListRxGene. + * @see buildDisjunctionList */ private val captureGroups = mutableListOf() /** * Same as [captureGroups] but for named backreferences, which can be accessed * with their name or number. - * Nullable DisjunctionListRxGene to represent unsatisfiable backreference. + * The value is nullable to represent a captured group that is unsatisfiable, + * for example when the group contains an empty character class like `([a&&b])`. + * In that case the map holds null instead of a DisjunctionListRxGene. + * @see buildDisjunctionList */ private val namedCaptureGroups = mutableMapOf() From 194cc9636d6714bd997de3f6295667963c0689bc Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 16 Jun 2026 14:08:30 -0300 Subject: [PATCH 17/17] Refactor: renamed RxTerm.isEffectivelyEmpty to isUnsatisfiable --- .../core/parser/GeneRegexJavaVisitor.kt | 16 ++++++++-------- .../search/gene/regex/BackReferenceRxGene.kt | 4 ++-- .../gene/regex/CharacterClassEscapeRxGene.kt | 6 +++--- .../search/gene/regex/CharacterRangeRxGene.kt | 8 ++++---- .../core/search/gene/regex/DisjunctionRxGene.kt | 4 ++-- .../evomaster/core/search/gene/regex/RxTerm.kt | 2 +- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index 697d0a2260..0944f2cf2d 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -88,15 +88,15 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val res = ctx.accept(this) val validDisjunctions = res.genes.map { it as DisjunctionRxGene } - val nonEmptyDisj = validDisjunctions.filter{ !it.isEffectivelyEmpty() } + val satisfiableDisjunctions = validDisjunctions.filter{ !it.isUnsatisfiable() } - if(nonEmptyDisj.isEmpty()){ + if(satisfiableDisjunctions.isEmpty()){ // As DisjunctionListRxGene extends CompositeFixedGene, its disjunctions list cannot be empty. // In this case we return null to represent an unsatisfiable DisjunctionListRxGene. return null } - val disjList = DisjunctionListRxGene(nonEmptyDisj) + val disjList = DisjunctionListRxGene(satisfiableDisjunctions) //TODO tmp hack until full handling of ^$. Assume full match when nested disjunctions for (gene in disjList.disjunctions) { @@ -114,15 +114,15 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val text = RegexUtils.getRegexExpByParserRuleContext(ctx) - val nonEmptyDisj = res.genes + val satisfiableDisjunctions = res.genes .map { it as DisjunctionRxGene } - .filter{ !it.isEffectivelyEmpty() } + .filter{ !it.isUnsatisfiable() } - if (nonEmptyDisj.isEmpty()) { + if (satisfiableDisjunctions.isEmpty()) { throw IllegalStateException("Regex is unsatisfiable.") } - val disjList = DisjunctionListRxGene(nonEmptyDisj) + val disjList = DisjunctionListRxGene(satisfiableDisjunctions) // we remove the token from end of the string to store as sourceRegex val gene = RegexGene( @@ -265,7 +265,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ // if quantified atom is unsatisfiable we must then check the limits if (atom == null || - ((atom as? RxTerm)?.isEffectivelyEmpty() == true) && resAtom.genes.size == 1) { + ((atom as? RxTerm)?.isUnsatisfiable() == true) && resAtom.genes.size == 1) { return if (limits.first == 0) { // if 0 appearances is allowed then the regex is satisfiable only with empty string VisitResult(PatternCharacterBlockGene("0_QuantifierOnEmptyRegex", "")) diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt index 6cc88fb127..8a85458d28 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt @@ -22,8 +22,8 @@ class BackReferenceRxGene( val captureGroup: DisjunctionListRxGene? ) : RxAtom, SimpleGene("\\$groupIndex") { - override fun isEffectivelyEmpty(): Boolean { - return captureGroup == null || captureGroup.isEffectivelyEmpty() + override fun isUnsatisfiable(): Boolean { + return captureGroup == null || captureGroup.isUnsatisfiable() } override fun checkForLocallyValidIgnoringChildren(): Boolean = true diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt index 133f1f4fc0..5abc86b9ba 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt @@ -132,10 +132,10 @@ class CharacterClassEscapeRxGene( } } - override fun isEffectivelyEmpty(): Boolean = multiCharRange.isEmpty + override fun isUnsatisfiable(): Boolean = multiCharRange.isEmpty override fun isMutable(): Boolean { - return !isEffectivelyEmpty() + return !isUnsatisfiable() } override fun checkForLocallyValidIgnoringChildren() : Boolean{ @@ -199,7 +199,7 @@ class CharacterClassEscapeRxGene( } override fun getValueAsPrintableString(previousGenes: List, mode: GeneUtils.EscapeMode?, targetFormat: OutputFormat?, extraCheck: Boolean): String { - if (isEffectivelyEmpty()) { + if (isUnsatisfiable()) { throw IllegalStateException("Cannot get value from empty CharacterClassEscape") } return if (!flags.isCaseable(value[0])) { diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index 08fa1fdf52..ee7b930316 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -30,7 +30,7 @@ class CharacterRangeRxGene( private val log = LoggerFactory.getLogger(CharacterRangeRxGene::class.java) } - var value : Char = if (isEffectivelyEmpty()) '\u0000' else validRanges[0].start + var value : Char = if (isUnsatisfiable()) '\u0000' else validRanges[0].start /** * Whether to output the character in uppercase. @@ -38,7 +38,7 @@ class CharacterRangeRxGene( */ var useUpperCase: Boolean = false - override fun isEffectivelyEmpty(): Boolean = validRanges.isEmpty + override fun isUnsatisfiable(): Boolean = validRanges.isEmpty override fun checkForLocallyValidIgnoringChildren() : Boolean{ return validRanges.any { @@ -51,7 +51,7 @@ class CharacterRangeRxGene( } override fun isMutable(): Boolean { - if (isEffectivelyEmpty()) { + if (isUnsatisfiable()) { return false } // check if there is more than one character or if the character is caseable @@ -139,7 +139,7 @@ class CharacterRangeRxGene( TODO should \ be handled specially? In any case, would have same handling as AnyCharacterRxGene */ - if (isEffectivelyEmpty()) { + if (isUnsatisfiable()) { throw IllegalStateException("Cannot get value from empty CharacterRange") } return if (!flags.isCaseable(value)) { diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt index 19a47a2961..4582495b4e 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt @@ -47,8 +47,8 @@ class DisjunctionRxGene( private val log : Logger = LoggerFactory.getLogger(DisjunctionRxGene::class.java) } - override fun isEffectivelyEmpty(): Boolean = - terms.isNotEmpty() && terms.any { (it as? RxTerm)?.isEffectivelyEmpty() == true } + override fun isUnsatisfiable(): Boolean = + terms.isNotEmpty() && terms.any { (it as? RxTerm)?.isUnsatisfiable() == true } override fun checkForLocallyValidIgnoringChildren() : Boolean{ return true diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt index bf5af37879..d6ccdf3574 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt @@ -10,5 +10,5 @@ interface RxTerm { * for example an empty character class intersection like [a&&b]. * Used at construction time to filter unsatisfiable branches from disjunctions. */ - fun isEffectivelyEmpty(): Boolean = false + fun isUnsatisfiable(): Boolean = false } \ No newline at end of file