/*----------------------------------------------------------------------------*/
/* Context free grammar parser                                                */
/* LOG32LLC Assignment 7                                                      */
/* Sten M. Andersen                                                           */
/*                                                                            */
/*----------------------------------------------------------------------------*/

#include <stdio.h>
#include <setjmp.h>
#include <string.h>
#include <ctype.h>

#define MAX_NONTERMINALS							  50
#define MAX_CODE                                                          1000
#define MAX_STRING								 100
#define ALFA_LENGTH								  25 /* 16 */

typedef enum 
{
	false,
	true
} BOOL;

typedef enum 
{ 
	badchar,query,nonterminal,quote,altern,equal,
    lparen,rparen,lbrack,rbrack,lbrace,rbrace,semcol,period
} symbol;	

typedef enum {
	tsy,nsy,alt,cat,rep,opt
} operator;

typedef struct {
	operator op;
	int left;
	int right;
} node;

/*----------------------------------------------------------------------------*/
/* Globals                                                                    */
/*----------------------------------------------------------------------------*/
const char* emptyalfa = "               "; //  spaces
struct { char name[ALFA_LENGTH]; int adr; } table[ MAX_NONTERMINALS ];
symbol sym;
int lasttable, location;
char ch;
char al[ ALFA_LENGTH ];
node code[ MAX_CODE ];
int lastcode;					
char instring[ MAX_STRING ];
int sp,sl;
BOOL tracing;
int i;
int num_parses;

BOOL cont;
jmp_buf start;				// On error: empty stack and go to start

/*----------------------------------------------------------------------------*/
/* Function declarations                                                      */
/*----------------------------------------------------------------------------*/
void factor();
void term();
void expression();
void gen( operator o, int l, int r );
void getysm();
void error( char* msg );

/*----------------------------------------------------------------------------*/
/* Input                                                                      */
/*----------------------------------------------------------------------------*/

void get_char() {
	ch = getchar();
	if ( ch <= ' ' ) ch = ' ';
}

void getsym() {
	int k;
	do get_char(); while ( ch <= ' ' );
	if ( isalpha( ch ) ) {
		sym = nonterminal; k = 0;
		strcpy(al, emptyalfa);
		
		// Read a symbol
		do {
			if ( k < ALFA_LENGTH ) {
				k++; al[ k ] = ch; 
			}
			get_char();
		} while ( isalnum( ch ) || ch == '_' );
		ungetc( ch, stdin );	// push back char
		
		strcpy( table[ 0 ].name, al);// Prepare sentinel search
    	location = lasttable;
    	while ( strcmp(table[ location ].name, al) ) location--;
	
        if ( !location ) {		// It wasn't in the table from before
        	lasttable++;
			// So insert it now
        	strcpy( table[ lasttable ].name, al);
			table[ lasttable ].adr = 0;
        	location = lasttable;
       }
	} else {
		switch ( ch ) {
			case '.': sym = period;                          break;
			case ';': sym = semcol;                          break;
			case '?': sym = query;                           break;
			case '=': sym = equal;                           break;
			case '\'':sym = quote;                           break;
			case '|': sym = altern;                          break;
			case '(': sym = lparen;                          break;
			case ')': sym = rparen;                          break;
			case '[': sym = lbrack;                          break;
			case ']': sym = rbrack;                          break;
			case '{': sym = lbrace;                          break;
			case '}': sym = rbrace;                          break;
			default:  sym = badchar; error("This char is illegal.");
		}	
		
	}
}

/*----------------------------------------------------------------------------*/
/* Application logic                                                          */
/*----------------------------------------------------------------------------*/

void gen( operator o, int l, int r ) {
	lastcode++;
	code[ lastcode ].op = o;
	code[ lastcode ].left = l;
	code[ lastcode ].right = r;
}

void factor() {;
	int left;
	switch ( sym ) {
		case nonterminal:
			gen( nsy, location, 0 /* fixed in main */ );
			getsym();
			break;
		case quote:
			get_char();
			if ( ch == ' ' ) error ( "no white space as terminal");
			if ( ch == '.' ) error ( "no . as terminal");
			gen( tsy, ch, 0 );
			getsym();
			break;
		case lparen:
			getsym();
			expression();
			if ( sym == rparen ) getsym(); else error(") expected");
			break;
		case lbrack:
			getsym();
			expression();
			if ( sym == rbrack ) getsym(); else error("] expected");
			gen( rep, 0, lastcode );
			break;
		case lbrace:
			getsym();
			expression();
			if ( sym == rbrace ) getsym(); else error("} expected");
			gen( opt, 0, lastcode );
			break;
		default:
			error("beginning of factor expected");
		
	}
}

void term() {
	int left;
	factor();
	if (    sym == nonterminal || sym == quote || sym == lparen
	     || sym == lbrack || sym == lbrace                        ) {
		left = lastcode;
		term();
		gen( cat, left, lastcode);	  
	}
}
	
void expression() {
	int left;
	term();
	if ( sym == altern ) { 				// alternation
		getsym();	
		left = lastcode;
		expression();
		gen( alt, left, lastcode );
	}	
}

/*----------------------------------------------------------------------------*/
/* Parse & show                                                               */
/*----------------------------------------------------------------------------*/

void parse(int t,  void (*cp) (void) ) {
	void alsoright() { parse( code[ t ].right, cp ); } // used for concat (cat)
	void sameagain() { parse( t, cp ); }               // used for rep [] / *
	switch ( code[ t ].op ) {
		case tsy:                                      // terminal symbol
			if ( instring[ sp ] == code[ t ].left ) {
				sp++; cp(); sp--;
			}
			break;
		case nsy:                                      // non-terminal symbol
			parse( code[ t ].right, cp ); break;
		case cat:                                      // concat
			parse( code[ t ].left, alsoright ); break;	
		case alt:                                      // alternation |
			parse( code[ t ].left,  cp );
			parse( code[ t ].right, cp );	  	  
			break;
		case rep:                                      // repetition 
			cp();
			parse( code[ t ].right, sameagain );
			break;
		case opt:                                      // optional
			cp();
			parse( code[ t ].right, cp );
			break;
	}
}

void show() {
	if ( num_parses == 0 ) printf(" ... well-formed -\n");
	num_parses++;	 
	printf("%d: %c", num_parses, 9 /* tab*/ );
	for ( i = 1; i < sp; i++) printf( "%c", instring[i] );
	printf("                ");
	for ( i = sp; i < sl; i++) printf( "%c", instring[i] );
	printf("\n");	 
}

/*----------------------------------------------------------------------------*/
/* Print out error                                                            */
/*----------------------------------------------------------------------------*/

void error(char* msg) {
	printf( "Error: Seen " );
	if ( sym == nonterminal )
		printf( "%s", al );
	else 
		printf ("%c", ch );
	printf( " when %s\n", msg );
	longjmp( start, 0 );
}

/*----------------------------------------------------------------------------*/
/* Main                                                                       */
/*----------------------------------------------------------------------------*/

int main() {
	setjmp( start );
	do { // read a grammar
		getsym();
		if ( sym != nonterminal ) error("non-terminal expected.");
		i = location;		// of last non-terminal
		getsym();
		if ( sym != equal )       error("= (equal) expected.");
		getsym();
		expression();
		table[ i ].adr = lastcode;
	} while ( sym == semcol );
	if ( sym == query)        tracing = true;
	else if ( sym == period ) tracing = false; 
	else                      error(". (period) expected at end of grammar.");
	for ( i = 1; i <= lasttable; i++ ) 
		if ( table[ i ].adr == 0 ) {	// this non-terminal is undefined
			printf("Undefined non-terminal %s. Panicking.", table[ i ].name );
			exit( 1 );
		}
	
	for ( i = 1; i <= lastcode; i++ ) 
		if ( code[ i ].op == nsy ) 
			code[ i ].right = table[ code[ i ].left ].adr;//fixup

	if ( tracing ) {						// output trace
		printf("non-terminals:\n");
		for ( i = 1; i <= lasttable; i++ ) 
			printf("%s : % d\n", table[ i ].name, table[ i ].adr);
		printf("code\n");
		for ( i = 1; i <= lastcode; i++ )
			printf("%d, op: %d l: %d r: %d\n", 
				i, code[i].op, code[i].left ,code[i].right);
		printf("\n");
	}
	cont = true;	// fix this to be more user friendly	
	do {									// parse sentences
		sl = 0;
		do {
			do get_char(); while ( ch == ' ' );
			sl++; instring[ sl ] = ch;
		} while ( ch != '.' );
		printf("\n");
		if ( sl > 1 ) {
			sp = 1; num_parses = 0;
	        parse( table[1].adr, show );
	        if ( num_parses == 0 ) printf(" ... ill-formed\n");
		}	 
	} while ( cont );
	return 0;
}


