@ -961,6 +961,8 @@ tre_parse(tre_parse_ctx_t *ctx)
tre_stack_t * stack = ctx - > stack ;
tre_stack_t * stack = ctx - > stack ;
int bottom = tre_stack_num_objects ( stack ) ;
int bottom = tre_stack_num_objects ( stack ) ;
int depth = 0 ;
int depth = 0 ;
wchar_t wc ;
int clen ;
if ( ! ctx - > nofirstsub )
if ( ! ctx - > nofirstsub )
{
{
@ -1155,10 +1157,9 @@ tre_parse(tre_parse_ctx_t *ctx)
{
{
case CHAR_LPAREN : /* parenthesized subexpression */
case CHAR_LPAREN : /* parenthesized subexpression */
if ( ctx - > cflags & REG_EXTENDED
if ( ctx - > cflags & REG_EXTENDED )
| | ( ctx - > re > ctx - > re_start
& & * ( ctx - > re - 1 ) = = CHAR_BACKSLASH ) )
{
{
lparen :
depth + + ;
depth + + ;
{
{
ctx - > re + + ;
ctx - > re + + ;
@ -1174,25 +1175,6 @@ tre_parse(tre_parse_ctx_t *ctx)
goto parse_literal ;
goto parse_literal ;
break ;
break ;
case CHAR_RPAREN : /* end of current subexpression */
if ( ( ctx - > cflags & REG_EXTENDED & & depth > 0 )
| | ( ctx - > re > ctx - > re_start
& & * ( ctx - > re - 1 ) = = CHAR_BACKSLASH ) )
{
/* We were expecting an atom, but instead the current
subexpression was closed . POSIX leaves the meaning of
this to be implementation - defined . We interpret this as
an empty expression ( which matches an empty string ) . */
result = tre_ast_new_literal ( ctx - > mem , EMPTY , - 1 , - 1 ) ;
if ( result = = NULL )
return REG_ESPACE ;
if ( ! ( ctx - > cflags & REG_EXTENDED ) )
ctx - > re - - ;
}
else
goto parse_literal ;
break ;
case CHAR_LBRACKET : /* bracket expression */
case CHAR_LBRACKET : /* bracket expression */
ctx - > re + + ;
ctx - > re + + ;
status = tre_parse_bracket ( ctx , & result ) ;
status = tre_parse_bracket ( ctx , & result ) ;
@ -1203,13 +1185,14 @@ tre_parse(tre_parse_ctx_t *ctx)
case CHAR_BACKSLASH :
case CHAR_BACKSLASH :
/* If this is "\(" or "\)" chew off the backslash and
/* If this is "\(" or "\)" chew off the backslash and
try again . */
try again . */
if ( ! ( ctx - > cflags & REG_EXTENDED )
if ( ! ( ctx - > cflags & REG_EXTENDED ) & & * ( ctx - > re + 1 ) = = CHAR_LPAREN )
& & ( * ( ctx - > re + 1 ) = = CHAR_LPAREN
| | * ( ctx - > re + 1 ) = = CHAR_RPAREN ) )
{
{
ctx - > re + + ;
ctx - > re + + ;
STACK_PUSHX ( stack , int , PARSE_ATOM ) ;
goto lparen ;
break ;
}
if ( ! ( ctx - > cflags & REG_EXTENDED ) & & * ( ctx - > re + 1 ) = = CHAR_LPAREN )
{
goto empty_atom ;
}
}
/* If a macro is used, parse the expanded macro recursively. */
/* If a macro is used, parse the expanded macro recursively. */
@ -1369,12 +1352,9 @@ tre_parse(tre_parse_ctx_t *ctx)
break ;
break ;
case CHAR_CARET : /* beginning of line assertion */
case CHAR_CARET : /* beginning of line assertion */
/* '^' has a special meaning everywhere in EREs, and in the
/* '^' has a special meaning everywhere in EREs, and at
beginning of the RE and after \ ( is BREs . */
beginning of BRE . */
if ( ctx - > cflags & REG_EXTENDED
if ( ctx - > cflags & REG_EXTENDED
| | ( ctx - > re - 2 > = ctx - > re_start
& & * ( ctx - > re - 2 ) = = CHAR_BACKSLASH
& & * ( ctx - > re - 1 ) = = CHAR_LPAREN )
| | ctx - > re = = ctx - > re_start )
| | ctx - > re = = ctx - > re_start )
{
{
result = tre_ast_new_literal ( ctx - > mem , ASSERTION ,
result = tre_ast_new_literal ( ctx - > mem , ASSERTION ,
@ -1389,10 +1369,8 @@ tre_parse(tre_parse_ctx_t *ctx)
case CHAR_DOLLAR : /* end of line assertion. */
case CHAR_DOLLAR : /* end of line assertion. */
/* '$' is special everywhere in EREs, and in the end of the
/* '$' is special everywhere in EREs, and in the end of the
string and before \ ) is BREs . */
string in BREs . */
if ( ctx - > cflags & REG_EXTENDED
if ( ctx - > cflags & REG_EXTENDED
| | ( * ( ctx - > re + 1 ) = = CHAR_BACKSLASH
& & * ( ctx - > re + 2 ) = = CHAR_RPAREN )
| | ! * ( ctx - > re + 1 ) )
| | ! * ( ctx - > re + 1 ) )
{
{
result = tre_ast_new_literal ( ctx - > mem , ASSERTION ,
result = tre_ast_new_literal ( ctx - > mem , ASSERTION ,
@ -1405,34 +1383,27 @@ tre_parse(tre_parse_ctx_t *ctx)
goto parse_literal ;
goto parse_literal ;
break ;
break ;
case CHAR_RPAREN :
if ( ! depth )
goto parse_literal ;
case CHAR_STAR :
case CHAR_PIPE :
case CHAR_LBRACE :
case CHAR_PLUS :
case CHAR_QUESTIONMARK :
if ( ! ( ctx - > cflags & REG_EXTENDED ) )
goto parse_literal ;
empty_atom :
result = tre_ast_new_literal ( ctx - > mem , EMPTY , - 1 , - 1 ) ;
if ( ! result )
return REG_ESPACE ;
break ;
default :
default :
parse_literal :
parse_literal :
/* We are expecting an atom. If the subexpression (or the whole
clen = mbtowc ( & wc , ctx - > re , - 1 ) ;
regexp ends here , we interpret it as an empty expression
( which matches an empty string ) . */
if (
( ! * ctx - > re
| | * ctx - > re = = CHAR_STAR
| | ( ctx - > cflags & REG_EXTENDED
& & ( * ctx - > re = = CHAR_PIPE
| | * ctx - > re = = CHAR_LBRACE
| | * ctx - > re = = CHAR_PLUS
| | * ctx - > re = = CHAR_QUESTIONMARK ) )
/* Test for "\)" in BRE mode. */
| | ( ! ( ctx - > cflags & REG_EXTENDED )
& & ! * ( ctx - > re + 1 )
& & * ctx - > re = = CHAR_BACKSLASH
& & * ( ctx - > re + 1 ) = = CHAR_LBRACE ) ) )
{
result = tre_ast_new_literal ( ctx - > mem , EMPTY , - 1 , - 1 ) ;
if ( ! result )
return REG_ESPACE ;
break ;
}
wchar_t wc ;
int clen = mbtowc ( & wc , ctx - > re , - 1 ) ;
if ( clen < 0 ) clen = 1 , wc = WEOF ;
if ( clen < 0 ) clen = 1 , wc = WEOF ;
/* Note that we can't use an tre_isalpha() test here, since there
/* Note that we can't use an tre_isalpha() test here, since there