/*----------------------------------------------------------------------------*/ /* Regular expression expander */ /* Sten Morten Andersen */ /* www.stenmorten.com */ /* */ /* Grammar: */ /* */ /* input ::= expression '.' */ /* expression ::= term [ '|' term ] */ /* term ::= factor [ factor ] */ /* factor ::= (atom | '(' expression ')') [ '+' | '?' | '*' ] */ /* atom ::= 'a' | 'b' | .. | 'z' | '0' | '1' | '\' any-character */ /*----------------------------------------------------------------------------*/ #include #include #include #define MAX_BUFF 10 typedef enum { false, true } BOOL; typedef struct { char op; int left; int right; } node; /*----------------------------------------------------------------------------*/ /* Globals */ /*----------------------------------------------------------------------------*/ node m[1000]; int cx; // code index char buf[60]; // output buffer int bufindex; char ch; // The next character BOOL cont, escape; jmp_buf start; // On error: empty stack and go to start /*----------------------------------------------------------------------------*/ /* Function declarations */ /*----------------------------------------------------------------------------*/ void factor(); void term(); void expression(); void gen( char o, int l, int r ); void get_token(); void get_char(); void error( char* e ); /*----------------------------------------------------------------------------*/ /* Input */ /*----------------------------------------------------------------------------*/ void get_char() { // Gets char into ch and skips blanks ch = getchar(); if ( ch < ' ' ) ch = ' '; } void get_token() { do { ch = getchar(); } while ( ch <= ' ' ); if ( ch == '\\' ) { escape = true; get_char(); } } /*----------------------------------------------------------------------------*/ /* Output */ /*----------------------------------------------------------------------------*/ void show() { int i; for ( i = 0; i <= bufindex; i++ ) printf( "%c", buf[ i ] ); printf( "\n" ); } /*----------------------------------------------------------------------------*/ /* Application logic */ /*----------------------------------------------------------------------------*/ void expand( int n, void (*cont) (void) ) { void expandright() { expand( m[ n ].right, cont ); } void expandleft() { cont(); expand( m[ n ].left, expandleft ); } if ( m[ n ].op >= ' ' ) { if ( m[ n ].left == -1 && m[ n ].right == -1 ) { // not an operator if ( bufindex > MAX_BUFF-2 ) return; // kill large inputs bufindex++; buf[ bufindex ] = m[ n ].op; cont(); bufindex--; } } if ( m[ n ].op == '|' ) { expand( m[ n ].left, cont ); expand( m[ n ].right, cont ); } else if ( m[ n ].op == '_' ) { expand( m[ n ].left, expandright ); } else if ( m[ n ].op == '?' ) { // zero or once cont(); expand( m[ n ].left, cont ); } else if ( m[ n ].op == '+' ) { // once or more expand( m[ n ].left, expandleft ); } else if ( m[ n ].op == '*' ) { // zero or more cont(); expand( m[ n ].left, expandleft ); } } void gen( char o, int l, int r ) { cx++; m[ cx ].op = o; m[ cx ].left = l; m[ cx ].right = r; } void factor() {; if ( escape || ch >= 'a' && ch <= 'z' || ch == '0' || ch == '1' ) { escape = false; gen( ch, -1, -1 ); get_token(); } else if ( ch == '(' ) { get_token(); expression(); if ( ch != ')' ) error( "Expected ) " ); get_token(); } else error( "Error: Start of factor" ); while ( ch == '*' || ch == '+' || ch == '?' ) { gen( ch, cx, -1 ); get_token(); } } void term() { int left; factor(); while ( ch >= 'a' && ch <= 'z' || ch == '(' ) { left = cx; factor(); gen( '_', left, cx); // the _ is suppressed in input, but we gen it } } void expression() { int left; term(); while ( ch == '|' ) { // alternation left = cx; get_token(); term(); gen( '|', left, cx ); } } void init() { bufindex = -1; cx = -1; } /*----------------------------------------------------------------------------*/ /* Main */ /*----------------------------------------------------------------------------*/ int main(int argc, char **argv) { cont = true; setjmp( start ); init(); do { init(); get_token(); // one token look-ahead expression(); if ( ch != '.' ) error("Unexpected terminator. Expected ." ); int i; /* for ( i = 0; i <= cx; i++ ) printf( "i:%d %c %d %d\n", i, m[ i ].op, m[ i ].left, m[ i ].right ); */ expand( cx, &show ); printf("\n"); } while ( cont ); return (0); } /*----------------------------------------------------------------------------*/ /* Print out error */ /*----------------------------------------------------------------------------*/ void error(char* e) { printf( "\n%s\n\n", e ); longjmp( start, 0 ); }