@@ -310,7 +310,7 @@ def checkgroupname(self, name, offset):
310310 msg = "bad character in group name %r" % name
311311 raise self .error (msg , len (name ) + offset )
312312
313- def _property_escape (source , escape , in_set = False ):
313+ def _property_escape (source , escape ):
314314 # handle \p{...} and \P{...} (UTS #18 1.2.4, "Property Syntax")
315315 from . import _properties
316316 if not source .match ('{' ):
@@ -320,10 +320,6 @@ def _property_escape(source, escape, in_set=False):
320320 if code is None :
321321 raise source .error ("unknown property name %r" % name ,
322322 len (name ) + len (r'\p{}' ))
323- if in_set and code [1 ][0 ] == (NEGATE , None ):
324- # A negated multi-range property cannot be a member of a set.
325- raise source .error ("bad escape %s in character class" % escape ,
326- len (name ) + len (r'\p{}' ))
327323 return code
328324
329325def _class_escape (source , escape ):
@@ -369,7 +365,7 @@ def _class_escape(source, escape):
369365 len (charname ) + len (r'\N{}' )) from None
370366 return LITERAL , c
371367 elif c in "pP" and source .istext :
372- return _property_escape (source , escape , in_set = True )
368+ return _property_escape (source , escape )
373369 elif c in OCTDIGITS :
374370 # octal escape (up to three digits)
375371 escape += source .getwhile (2 , OCTDIGITS )
@@ -574,11 +570,15 @@ def _difference(left, right, state):
574570# with the next operand.
575571_SETOPS = {'||' : _union , '&&' : _intersect , '--' : _difference }
576572
577- def _operand_elements (set , compound ):
578- # The operand's elements: a standalone nested set, else the member union.
573+ def _operand_elements (set , compound , negated , state ):
574+ # The operand's elements: a standalone nested set, else the member union,
575+ # with any negated-property members alternated in (see addmember).
579576 if compound is not None :
580577 return compound
581- return [_charset_node (_uniq (set ))]
578+ result = [_charset_node (_uniq (set ))] if set or not negated else None
579+ for neg in negated :
580+ result = [neg ] if result is None else _union (result , [neg ], state )
581+ return result
582582
583583def _parse_operand (source , state , nested , here , allow_nested ):
584584 # Read one operand, stopping at a set operator or the closing ']'. An
@@ -591,10 +591,15 @@ def _parse_operand(source, state, nested, here, allow_nested):
591591 sourcematch = source .match
592592 set = []
593593 setappend = set .append
594+ negated = [] # \P{...} negated-range props, alternated in at the end
594595 def addmember (code ):
595- # Flatten a \p{...} property's IN into the member set.
596+ # Flatten a \p{...} property's IN into the member set; a negated one is a
597+ # complemented charset, set aside to _union in (it can't join the union).
596598 if code [0 ] is IN :
597- set .extend (code [1 ])
599+ if code [1 ][0 ][0 ] is NEGATE :
600+ negated .append (code )
601+ else :
602+ set .extend (code [1 ])
598603 else :
599604 setappend (code )
600605 compound = None # elements of a standalone nested-set operand
@@ -607,9 +612,9 @@ def addmember(code):
607612 if this is None :
608613 raise source .error ("unterminated character set" ,
609614 source .tell () - here )
610- if set or compound is not None :
615+ if set or compound is not None or negated :
611616 if this == "]" :
612- return _operand_elements (set , compound ), None
617+ return _operand_elements (set , compound , negated , state ), None
613618 if this in '-&|~' and source .next == this :
614619 if this == '~' :
615620 import warnings
@@ -621,7 +626,7 @@ def addmember(code):
621626 else :
622627 # '--', '&&' or '||' ends this operand and starts the next.
623628 sourceget () # consume the second operator character
624- return _operand_elements (set , compound ), this + this
629+ return _operand_elements (set , compound , negated , state ), this + this
625630 if this [0 ] == "\\ " :
626631 code1 = _class_escape (source , this )
627632 else :
@@ -641,12 +646,12 @@ def addmember(code):
641646 # A trailing '-' is a literal.
642647 addmember (code1 )
643648 setappend ((LITERAL , _ord ("-" )))
644- return [ _charset_node ( _uniq ( set ))] , None
649+ return _operand_elements ( set , None , negated , state ) , None
645650 if that == "-" :
646651 # 'X--': difference, not a range. '--' after a single member
647652 # lands here because the range probe consumed the first '-'.
648653 addmember (code1 )
649- return [ _charset_node ( _uniq ( set ))] , "--"
654+ return _operand_elements ( set , None , negated , state ) , "--"
650655 if that [0 ] == "\\ " :
651656 code2 = _class_escape (source , that )
652657 else :
0 commit comments