diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index dfecde83db..0944f2cf2d 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -40,14 +40,22 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ * Capture groups in order of appearance (1-based index -> list index 0). * Populated as the tree is walked. A backreference is only valid if it * appears after the group it references, which Java regex requires anyway. + * The value is nullable to represent a captured group that is unsatisfiable, + * for example when the group contains an empty character class like `([a&&b])`. + * In that case the map holds null instead of a DisjunctionListRxGene. + * @see buildDisjunctionList */ private val captureGroups = mutableListOf() /** * Same as [captureGroups] but for named backreferences, which can be accessed * with their name or number. + * The value is nullable to represent a captured group that is unsatisfiable, + * for example when the group contains an empty character class like `([a&&b])`. + * In that case the map holds null instead of a DisjunctionListRxGene. + * @see buildDisjunctionList */ - private val namedCaptureGroups = mutableMapOf() + private val namedCaptureGroups = mutableMapOf() /** * Tracks the flags active in the current lexical scope. @@ -73,6 +81,32 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ ) } + /** + * Builds DisjunctionListRxGenes from a disjunction context, returns null if disjunction is unsatisfiable. + */ + private fun buildDisjunctionList(ctx: RegexJavaParser.DisjunctionContext): DisjunctionListRxGene? { + val res = ctx.accept(this) + val validDisjunctions = res.genes.map { it as DisjunctionRxGene } + + val satisfiableDisjunctions = validDisjunctions.filter{ !it.isUnsatisfiable() } + + if(satisfiableDisjunctions.isEmpty()){ + // As DisjunctionListRxGene extends CompositeFixedGene, its disjunctions list cannot be empty. + // In this case we return null to represent an unsatisfiable DisjunctionListRxGene. + return null + } + + val disjList = DisjunctionListRxGene(satisfiableDisjunctions) + + //TODO tmp hack until full handling of ^$. Assume full match when nested disjunctions + for (gene in disjList.disjunctions) { + gene.extraPrefix = false + gene.extraPostfix = false + gene.matchStart = true + gene.matchEnd = true + } + return disjList + } override fun visitPattern(ctx: RegexJavaParser.PatternContext): VisitResult { @@ -80,7 +114,15 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val text = RegexUtils.getRegexExpByParserRuleContext(ctx) - val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene }) + val satisfiableDisjunctions = res.genes + .map { it as DisjunctionRxGene } + .filter{ !it.isUnsatisfiable() } + + if (satisfiableDisjunctions.isEmpty()) { + throw IllegalStateException("Regex is unsatisfiable.") + } + + val disjList = DisjunctionListRxGene(satisfiableDisjunctions) // we remove the token from end of the string to store as sourceRegex val gene = RegexGene( @@ -101,9 +143,19 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val matchStart = assertionMatches.first val matchEnd = assertionMatches.second - val disj = DisjunctionRxGene("disj", altRes.genes.map { it }, matchStart, matchEnd) + val res = VisitResult() + + // add disjunction if it has genes, OR if the alternative was purely assertions (^$) or flag scopes + // in that case altRes.genes is empty but the alternative is valid (matches "") + val hasOnlyAssertionsOrFlagScopes = ctx.alternative().term().isNotEmpty() && + ctx.alternative().term().all { it.assertion() != null || it.FLAG_SCOPE_OPEN() != null } + + if (altRes.genes.isNotEmpty() || hasOnlyAssertionsOrFlagScopes || ctx.alternative().term().isEmpty()) { + val disj = DisjunctionRxGene("disj", altRes.genes.map { it }, matchStart, matchEnd) - val res = VisitResult(disj) + res.genes.add(disj) + } + // else: had non-assertion terms but all produced nothing (empty char class etc.), skip if(ctx.disjunction() != null){ val disjRes = ctx.disjunction().accept(this) @@ -169,7 +221,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ // term is not a back ref: we use the default behavior, term results may only have 0-1 genes // if there is a gene, we add it to result res.genes.add(gene) - } else { + } else if (resTerm.data is String) { val assertion = resTerm.data as String if(i==0 && assertion == "^"){ @@ -184,6 +236,9 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ */ throw IllegalStateException("Cannot support $assertion at position $i") } + } else { + // unsatisfiable term, return with no genes + return VisitResult(data=Pair(false, false)) } } @@ -203,12 +258,23 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val resAtom = ctx.atom().accept(this) val atom = resAtom.genes.firstOrNull() - ?: return res if(ctx.quantifier() != null){ val limits = ctx.quantifier().accept(this).data as Pair + // if quantified atom is unsatisfiable we must then check the limits + if (atom == null || + ((atom as? RxTerm)?.isUnsatisfiable() == true) && resAtom.genes.size == 1) { + return if (limits.first == 0) { + // if 0 appearances is allowed then the regex is satisfiable only with empty string + VisitResult(PatternCharacterBlockGene("0_QuantifierOnEmptyRegex", "")) + } else { + // if not then unsatisfiable, return with no genes + res + } + } + // if atom is not a back ref then we use the default behavior, results may only have one gene var template: Gene = atom @@ -233,10 +299,11 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ if (ctx.atom()?.atomEscape()?.BackReference() != null){ // if atom is a BackReference we addAll genes from result as there may be more than one if digits are dropped res.genes.addAll(resAtom.genes) - } else { + } else if (atom != null) { // if atom is not a back ref we fall back to the default behavior, results only have one gene res.genes.add(atom) } + // else atom is unsatisfiable, return no genes } return res @@ -306,21 +373,16 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ currentFlags = merged - val res = ctx.disjunction().accept(this) + val disjList = buildDisjunctionList(ctx.disjunction()) currentFlags = previous - val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene }) - - //TODO tmp hack until full handling of ^$. Assume full match when nested disjunctions - for (gene in disjList.disjunctions) { - gene.extraPrefix = false - gene.extraPostfix = false - gene.matchStart = true - gene.matchEnd = true + return if (disjList != null) { + VisitResult(disjList) + } else { + // unsatisfiable, return with no genes. + VisitResult() } - - return VisitResult(disjList) } if(ctx.quote() != null){ @@ -354,17 +416,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val groupIndex = captureGroups.size captureGroups.add(null) // add placeholder for the gene - val res = ctx.disjunction().accept(this) - - val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene }) - - //TODO tmp hack until full handling of ^$. Assume full match when nested disjunctions - for(gene in disjList.disjunctions){ - gene.extraPrefix = false - gene.extraPostfix = false - gene.matchStart = true - gene.matchEnd = true - } + val disjList = buildDisjunctionList(ctx.disjunction()) val isCapturingGroup = !ctx.text.startsWith("(?:") val isNamedCaptureGroup = ctx.NAMED_CAPTURE_GROUP_OPEN() != null @@ -380,7 +432,12 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ namedCaptureGroups[name] = disjList } - return VisitResult(disjList) + return if (disjList != null) { + VisitResult(disjList) + } else { + // unsatisfiable, return with no genes. + VisitResult() + } } if(ctx.DOT() != null){ @@ -599,15 +656,12 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ maxDigits > allDigits.length -> allDigits.length allDigits.take(maxDigits).toInt() <= captureGroups.size -> maxDigits maxDigits > 1 -> maxDigits - 1 - else -> throw IllegalStateException( - "Backreference ${txt.take(2)} refers to group ${allDigits[0]} but only ${captureGroups.size} " + - "capture group(s) have been defined so far" - ) + else -> 1 } val n = allDigits.take(backRefDigitCount).toInt() - val result = VisitResult(BackReferenceRxGene(n, captureGroups[n - 1]!!)) + val result = VisitResult(BackReferenceRxGene(n, captureGroups.getOrNull(n - 1))) val remainingChars = allDigits.drop(backRefDigitCount) @@ -623,8 +677,10 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ if (ctx.NamedBackReference() != null) { // strip "\k<" and ">" val name = txt.drop(3).dropLast(1) + if(name !in namedCaptureGroups){ + throw IllegalStateException("Named backreference \\k<$name> refers to unknown group '$name'") + } val group = namedCaptureGroups[name] - ?: throw IllegalStateException("Named backreference \\k<$name> refers to unknown group '$name'") val groupIndex = captureGroups.indexOf(group) + 1 // 1-based, for the gene name return VisitResult(BackReferenceRxGene(groupIndex, group)) } diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt index 4c79744212..8a85458d28 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/BackReferenceRxGene.kt @@ -14,12 +14,18 @@ import org.evomaster.core.search.service.mutator.genemutation.SubsetGeneMutation * Represents a backreference \N in a regex (N being a number). * Its value is always identical to the current value of its [captureGroup]. * It has no independent state and is therefore immutable. + * If capture group is null then the referenced group was unsatisfiable, + * in which case the same is true for the backreference to it. */ class BackReferenceRxGene( val groupIndex: Int, - val captureGroup: DisjunctionListRxGene + val captureGroup: DisjunctionListRxGene? ) : RxAtom, SimpleGene("\\$groupIndex") { + override fun isUnsatisfiable(): Boolean { + return captureGroup == null || captureGroup.isUnsatisfiable() + } + override fun checkForLocallyValidIgnoringChildren(): Boolean = true /** @@ -59,7 +65,12 @@ class BackReferenceRxGene( mode: GeneUtils.EscapeMode?, targetFormat: OutputFormat?, extraCheck: Boolean - ): String = captureGroup.getValueAsPrintableString(targetFormat = null) + ): String { + if (captureGroup == null) { + throw IllegalStateException("Cannot get value from invalid backreference \\$groupIndex") + } + return captureGroup.getValueAsPrintableString(previousGenes, mode, targetFormat) + } override fun containsSameValueAs(other: Gene): Boolean { if (other !is BackReferenceRxGene) return false diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt index 06797e7932..5abc86b9ba 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt @@ -82,7 +82,7 @@ class CharacterClassEscapeRxGene( // create both normal and negated version for all .flatMap { (key, value) -> listOf( - key to MultiCharacterRange(value), + key to MultiCharacterRange(false, value), "^$key" to MultiCharacterRange(true, value) ) }.toMap() @@ -132,6 +132,12 @@ class CharacterClassEscapeRxGene( } } + override fun isUnsatisfiable(): Boolean = multiCharRange.isEmpty + + override fun isMutable(): Boolean { + return !isUnsatisfiable() + } + override fun checkForLocallyValidIgnoringChildren() : Boolean{ // we pass the same embedded flags to the regex to accurately match the expected behavior return value.matches(Regex("${flags.getScopeString()}\\$type")) @@ -193,6 +199,9 @@ class CharacterClassEscapeRxGene( } override fun getValueAsPrintableString(previousGenes: List, mode: GeneUtils.EscapeMode?, targetFormat: OutputFormat?, extraCheck: Boolean): String { + if (isUnsatisfiable()) { + throw IllegalStateException("Cannot get value from empty CharacterClassEscape") + } return if (!flags.isCaseable(value[0])) { value[0].toString() } diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index c1401621bb..ee7b930316 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -30,7 +30,7 @@ class CharacterRangeRxGene( private val log = LoggerFactory.getLogger(CharacterRangeRxGene::class.java) } - var value : Char = validRanges[0].start + var value : Char = if (isUnsatisfiable()) '\u0000' else validRanges[0].start /** * Whether to output the character in uppercase. @@ -38,6 +38,8 @@ class CharacterRangeRxGene( */ var useUpperCase: Boolean = false + override fun isUnsatisfiable(): Boolean = validRanges.isEmpty + override fun checkForLocallyValidIgnoringChildren() : Boolean{ return validRanges.any { value in it || @@ -49,6 +51,9 @@ class CharacterRangeRxGene( } override fun isMutable(): Boolean { + if (isUnsatisfiable()) { + return false + } // check if there is more than one character or if the character is caseable return validRanges.charCount > 1 || flags.isCaseable(value) } @@ -134,6 +139,9 @@ class CharacterRangeRxGene( TODO should \ be handled specially? In any case, would have same handling as AnyCharacterRxGene */ + if (isUnsatisfiable()) { + throw IllegalStateException("Cannot get value from empty CharacterRange") + } return if (!flags.isCaseable(value)) { value.toString() } diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt index b753e46a9a..4582495b4e 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/DisjunctionRxGene.kt @@ -47,6 +47,8 @@ class DisjunctionRxGene( private val log : Logger = LoggerFactory.getLogger(DisjunctionRxGene::class.java) } + override fun isUnsatisfiable(): Boolean = + terms.isNotEmpty() && terms.any { (it as? RxTerm)?.isUnsatisfiable() == true } override fun checkForLocallyValidIgnoringChildren() : Boolean{ return true diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt index d6d5469404..1ed5edf059 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/QuantifierRxGene.kt @@ -49,8 +49,8 @@ class QuantifierRxGene( if (min < 0) { throw IllegalArgumentException("Invalid min value '$min': should be positive") } - if (max < 1) { - throw IllegalArgumentException("Invalid max value '$max': should be at least 1") + if (max < 0) { + throw IllegalArgumentException("Invalid max value '$max': should be positive") } if (min > max) { throw IllegalArgumentException("Invalid min-max values '$min-$max': min is greater than max") diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt index 274bfb5adb..d6ccdf3574 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/RxTerm.kt @@ -4,4 +4,11 @@ import org.evomaster.core.search.StructuralElement import org.evomaster.core.search.gene.Gene -interface RxTerm \ No newline at end of file +interface RxTerm { + /** + * Returns true if this gene can never produce a valid value, + * for example an empty character class intersection like [a&&b]. + * Used at construction time to filter unsatisfiable branches from disjunctions. + */ + fun isUnsatisfiable(): Boolean = false +} \ No newline at end of file diff --git a/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt index c13c99f08f..3056323358 100644 --- a/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt +++ b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt @@ -5,10 +5,6 @@ import org.slf4j.LoggerFactory class MultiCharacterRange internal constructor(val ranges: List) { - init { - require(ranges.isNotEmpty()) { "MultiCharacterRange cannot be created with an empty list" } - } - companion object { private val log = LoggerFactory.getLogger(MultiCharacterRange::class.java) @@ -25,16 +21,13 @@ class MultiCharacterRange internal constructor(val ranges: List) } operator fun invoke(negated: Boolean, ranges: List): MultiCharacterRange { - if (ranges.isEmpty()) { - throw IllegalArgumentException("No defined ranges") - } var internalRanges = mutableListOf() if (negated) { internalRanges.add(CharacterRange(Character.MIN_VALUE, Character.MAX_VALUE)) } - for (range in ranges) { + for (range in ranges.sortedBy { it.start }) { internalRanges = if (negated) { remove(internalRanges, CharacterRange(range.start, range.end)) } else { @@ -58,7 +51,7 @@ class MultiCharacterRange internal constructor(val ranges: List) var currentEnd = toAdd.end var merged = false - for ((start, end) in internalRanges.sortedBy { it.start }) { + for ((start, end) in internalRanges) { when { end.code < currentStart.code - 1 -> newInternalRanges += CharacterRange(start, end) start.code > currentEnd.code + 1 -> { @@ -204,10 +197,11 @@ class MultiCharacterRange internal constructor(val ranges: List) } currentRangeMinValue = currentRangeMaxValue } - assert(false) // internal ranges being empty should never happen - return '0' + throw IllegalStateException("Cannot sample characters from an empty char range") } + val isEmpty: Boolean get() = ranges.isEmpty() + val isNotEmpty: Boolean get() = ranges.isNotEmpty() val size: Int get() = ranges.size val charCount :Int = ranges.sumOf{ it.size } operator fun get(index: Int): CharacterRange = ranges[index] diff --git a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt index 489657b294..e7ea3ed26d 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt @@ -2,6 +2,7 @@ package org.evomaster.core.parser import org.evomaster.core.search.gene.regex.RegexGene import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows /** * Created by arcuri82 on 11-Sep-19. @@ -273,5 +274,129 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { checkSameAsJava("[[a-z&&[a-p]]&&[f-z]]") checkSameAsJava("[a[b[c[d&&[\\w]]]][0-7&&\\d&&[0-5]&&1-5]]") checkSameAsJava("&&") + checkSameAsJava("[[a-c&&[d-f]][x-z]]") + checkSameAsJava("[a-c&&[b-d]]|[x&&y]") + } + + @Test + fun testEmptyAlternatives() { + assertThrows{ checkSameAsJava("[a&&b]") } + checkSameAsJava("[a&&b]|c") + checkSameAsJava("0|[a&&b]|c") + assertThrows { checkSameAsJava("[a&&b]|[c&&d]") } + assertThrows { checkSameAsJava("[a&&b]|[c&&d]|[e&&f]") } + checkSameAsJava("a|[b&&c]|d") + checkSameAsJava("([a&&b]|c)d") + } + + @Test + fun testInvalidBackRefs() { + assertThrows { checkSameAsJava("\\1") } + checkSameAsJava("\\1|c") + assertThrows { checkSameAsJava("(a)\\2") } + checkSameAsJava("(a)\\2|b") + assertThrows { checkSameAsJava("(\\1)") } + checkSameAsJava("(\\1|a)") + assertThrows { checkSameAsJava("\\1(a)") } + checkSameAsJava("\\1(a)|b") + checkSameAsJava("(a)(\\1|\\2|c)") + assertThrows { checkSameAsJava("\\1|\\2|\\3") } + checkSameAsJava("(\\2|a)|b") + checkSameAsJava("\\1|[a&&b]|c") + assertThrows { checkSameAsJava("\\1|[a&&b]") } + checkSameAsJava("([a&b])|b\\1") + assertThrows { checkSameAsJava("([a&&b])|b\\1") } + assertThrows { checkSameAsJava("\\k") } + assertThrows { checkSameAsJava("((\\1|\\2)+)") } + checkSameAsJava("((\\1|\\2)*)") + checkSameAsJava("(\\12)*") + assertThrows { checkSameAsJava("\\12*") } + } + + @Test + fun testEmptyWithFlagGroup() { + checkSameAsJava("(?i:)") + checkSameAsJava("(?i:)|c") + assertThrows { checkSameAsJava("(?i:[a&&b])") } + checkSameAsJava("(?i:[a&&b])|c") + checkSameAsJava("(?i:[a&&b]|c)") + assertThrows { checkSameAsJava("(?i:(?u:[a&&b]))") } + checkSameAsJava("(?i:(?u:[a&&b])|c)") + } + + @Test + fun testEmptyWithFlagScope() { + checkSameAsJava("(?iu)") + checkSameAsJava("^(?iu)") + assertThrows { checkSameAsJava("(?iu)[a&&b]") } + checkSameAsJava("(?iu)[a&&b]|c") + assertThrows { checkSameAsJava("^(?iu)[a&&b]$") } + checkSameAsJava("^(?iu)[a&&b]$|c") + checkSameAsJava("^(?iu)([a&&b]$|c)") + } + + @Test + fun testEmptyWithAnchors() { + checkSameAsJava("^$") + assertThrows { checkSameAsJava("^[a&&b]$") } + checkSameAsJava("^[a&&b]$|c") + checkSameAsJava("^(?i:abc)$") + assertThrows { checkSameAsJava("^([a&&b])$") } + checkSameAsJava("^([a&&b]|c)$") + } + + @Test + fun testEmptyWithQuantifiers() { + checkSameAsJava("[a&&b]*") + checkSameAsJava("[a&&b]*c") + checkSameAsJava("[a&&b]?") + checkSameAsJava("[a&&b]?c") + checkSameAsJava("[a&&b]{0,}") + checkSameAsJava("[a&&b]{0}") + checkSameAsJava("([a&&b])*") + checkSameAsJava("([a&&b])*c") + assertThrows { checkSameAsJava("[a&&b]+") } + checkSameAsJava("[a&&b]+|c") + assertThrows { checkSameAsJava("[a&&b]{1,}") } + checkSameAsJava("[a&&b]{1,}|c") + assertThrows { checkSameAsJava("[a&&b]{1}") } + checkSameAsJava("[a&&b]{1}|c") + assertThrows { checkSameAsJava("[a&&b]{2,4}") } + checkSameAsJava("[a&&b]{2,4}|c") + assertThrows { checkSameAsJava("([a&&b])+") } + checkSameAsJava("([a&&b])+|c") + checkSameAsJava("[a&&b]{3}|c") + checkSameAsJava("[a&&b]{3,3}|c") + checkSameAsJava("[a&&b]{0,0}|c") + } + + @Test + fun testEmptyWithBackRefsAndQuantifiers() { + checkSameAsJava("(a)\\1*") + checkSameAsJava("\\1*c") + checkSameAsJava("\\1?c") + checkSameAsJava("(\\1*)") + assertThrows { checkSameAsJava("\\1+") } + checkSameAsJava("\\1+|c") + assertThrows { checkSameAsJava("(\\1+)") } + checkSameAsJava("(\\1+)|c") + } + + @Test + fun testEmptyNestedGroups() { + checkSameAsJava("(?:)") + checkSameAsJava("(?:)|c") + assertThrows { checkSameAsJava("(?:[a&&b])") } + checkSameAsJava("(?:[a&&b])|c") + checkSameAsJava("([a&&b])|c") + checkSameAsJava("([a&&b]|[c&&d])|e") + checkSameAsJava("(([a&&b])|([c&&d]))|e") + checkSameAsJava("((([a&&b]|[c&&d])|[e&&f])|g)") + checkSameAsJava("(g|(([a&&b]|[c&&d])|[e&&f]))") + assertThrows { checkSameAsJava("(?[a&&b])") } + checkSameAsJava("(?[a&&b])|c") + assertThrows { checkSameAsJava("(?[a&&b])|c\\k") } + assertThrows { checkSameAsJava("a([b&&c])d") } + assertThrows { checkSameAsJava("abc|\\k") } } } \ No newline at end of file