/*
    Universal lexical analiser implementation
*/
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include "lexer.h"

//#define MY_DEBUG 1

#ifdef MY_DEBUG
#include <stdio.h>
#endif
/* Uppercase translation table for the Win1251 charset */
const char Win1251UpcaseTbl[] =
    "\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"
    "\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
    " !\042#$%&'()*+,-./0123456789:;<=>?"
    "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"
    "`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\177"
    "\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217"
    "\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237"
    "\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257"
    "\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277"
    "\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317"
    "\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337"
    "\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317"
    "\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337";

char Win1251RusLetters[] =
    "\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317"
    "\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337"
    "\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357"
    "\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377";


hqCharType Win1251NameTbl[256];
int win1251nametbl_initialized = 0;

int FindSymbol( SymbolRec **SymTable, const char *str, int *nchars );

// initializations

void InitCharTypeTable( hqCharType *CharTypeTable, int CharTypes )
{
    int ch;
#ifdef MY_DEBUG
    printf( "CharTypeTable = 0x%X; CharTypes = %d\n", (unsigned)CharTypeTable,
	    CharTypes );
#endif
    memset( CharTypeTable, CH_UNKNOWN, 256 * sizeof(hqCharType) );

    CharTypeTable[0] = CH_FINAL;

    if (CharTypes & CH_SEPARAT) {
	CharTypeTable[' '] = CH_SEPARAT;
	CharTypeTable[9]  = CH_SEPARAT;
	CharTypeTable[13]  = CH_SEPARAT;
	CharTypeTable[10]  = CH_SEPARAT;
    }

    if (CharTypes & CH_QUOTE) {
	CharTypeTable['\'']  = CH_QUOTE;
    }

    if (CharTypes & CH_LETTER) {
	for (ch='A'; ch<='Z'; ch++)
	    CharTypeTable[ch] = CH_LETTER;
	for (ch='a'; ch<='z'; ch++)
	    CharTypeTable[ch] = CH_LETTER;
	CharTypeTable['_'] = CH_LETTER;
    }

    if (CharTypes & CH_DIGIT) {
	for (ch='0'; ch<='9'; ch++)
	    CharTypeTable[ch] = CH_DIGIT;
    }
}

void TypeTableAddChars( hqCharType *CharTypeTable, char *Symbols,
			hqCharType CharType )
{
    while (*Symbols)
	CharTypeTable[ (uchar) *Symbols++] = CharType;
}

void UpcaseWin1251Str( char *Str )
{
    while (( *Str = Win1251UpcaseTbl[ (uchar) *Str ] ))
	++Str;
}


// hqLexer implementation

#define CHARTYPEPP lexer->CharTypeTable[ (uchar) *++(lexer->SS) ]
#define CHARTYPE lexer->CharTypeTable[ (uchar) *lexer->SS ]

int Lexer_SetParseString( hqLexer *lexer, const char *str )
{
    lexer->PrevTokenType = TOK_NONE;
    if ( !str || !*str )
	return 0;

    lexer->SS = str;
    lexer->CharType = CHARTYPE;
    return 1;
}

hqTokenType Lexer_GetNextToken( hqLexer *lexer )
{
    hqTokenType result = TOK_ERROR;

next_token:

    while ( lexer->CharType == CH_SEPARAT )
	lexer->CharType = CHARTYPEPP;
	
    switch ( lexer->CharType ) {
	case CH_FINAL:
	    result = TOK_FINAL;
	    break;
	case CH_LETTER:
	    lexer->Name = lexer->SS;
	    do {
		lexer->CharType = CHARTYPEPP;
	    } while (lexer->CharType <= CH_DIGIT);
	    lexer->NameLen = lexer->SS - lexer->Name;
	    result = TOK_NAME;
	    break;
	case CH_DIGIT: {
	    char *NewSS, ch = *lexer->SS, nch = *(lexer->SS+1);
	    int intreaded = 0;
	    // Readind hex number
	    if ( ch == '0' && nch == 'x' ) {
	    	lexer->IntValue = strtol( lexer->SS, &NewSS, 16 );
                intreaded = 1;
	    }
	    // Readind oct number
	    if ( ch == '0' && nch >= '0' && nch <= '9' ) {
	    	lexer->IntValue = strtol( lexer->SS, &NewSS, 8 );
                intreaded = 1;
	    }

	    if (intreaded == 1) {
                if ( lexer->SS != NewSS ) {
		    lexer->SS = NewSS;
		    if (lexer->NoIntegers) {
			lexer->ExtValue = lexer->IntValue;
			result = TOK_FLOAT;
		    } else
			result = TOK_INT;
		    lexer->CharType = CHARTYPE;
                }
                break;
	    }

            // Readind dec number
	    lexer->ExtValue = strtod( lexer->SS, &NewSS );
	    if ( lexer->SS != NewSS ) {;
		lexer->SS = NewSS;
		if ( !lexer->NoIntegers
		    && lexer->ExtValue<=INT_MAX
		    && lexer->ExtValue>=INT_MAX
		    && (double)( lexer->IntValue = (uchar) lexer->ExtValue )
			== lexer->ExtValue ) {
		    result = TOK_INT;
		} else
		    result = TOK_FLOAT;
		lexer->CharType = CHARTYPE;
	    }
	    break;
	}
	case CH_SYMBOL: {
	    int nchars;
	    int i = FindSymbol( lexer->SymTable, lexer->SS, &nchars );
	    if (i >= 0) {
		lexer->SS += nchars;
		if (i == lexer->cssn) {
		    char comend = *lexer->ComEnd;
		    char comendpp = *(lexer->ComEnd+1);
		    while ( *lexer->SS ) {
			if ( *lexer->SS == comend
			     && 
			     ( comendpp == '\0' || *(lexer->SS+1) == comendpp )
			   ) {
			    ++lexer->SS;
			    if (comendpp != '\0')
				++lexer->SS;
			    lexer->CharType = CHARTYPE;
			    goto next_token;
			}
			++lexer->SS;
		    }
		    break;
		}
		lexer->CharType = CHARTYPE;
		lexer->IntValue = i;
		result = TOK_SYMBOL;
	    }
	    break;
	}
	case CH_QUOTE:
	    lexer->Name = ++(lexer->SS);
	    while ( lexer->CharTypeTable[ (uchar)*lexer->SS ] != CH_QUOTE
		    && *(lexer->SS) != '\0' )
		++lexer->SS;
	    if ( CHARTYPE == CH_QUOTE ) {
		lexer->NameLen = lexer->SS - lexer->Name;
		++lexer->SS;
		lexer->CharType = CHARTYPE;
		result = TOK_STRING;
	    }
	    break;
	default:
	    break;
    }
    return lexer->PrevTokenType = result;
}

const char* Lexer_GetCurrentPos( hqLexer *lexer )
{
    return lexer->SS;
}

// misc functions

void PrepareSymTable( SymbolRec **SymTable, char *symbols )
{
    int i = 0, nchars = 1;
    memset( SymTable, 0, 256 * sizeof(void*) );
    while (*symbols) {
	if (*symbols=='\033') {
	    nchars = *++symbols;
	    ++symbols;
	} else {
	    SymbolRec **RecList = SymTable + *symbols;
	    SymbolRec *Rec = *RecList;
	    int count = 0;
	    while ( Rec ) {
		++count;
		if ( Rec->More )
		    ++Rec;
		else
		    break;
	    }
	    if ( Rec ) {
		*RecList = (SymbolRec*)
			realloc( *RecList, (count+1)*sizeof(SymbolRec) );
		Rec = *RecList + count;
		(Rec-1)->More = 1;
	    } else {
		*RecList = (SymbolRec*) malloc( sizeof(SymbolRec) );
		Rec = *RecList;
	    }
	    strncpy( Rec->Sym, symbols, 4 );
	    Rec->Len = (char) nchars;
	    Rec->Index = (char) i;
	    Rec->More = 0;
	    symbols += nchars;
	    ++i;
	}
    }
}

int FindSymbol( SymbolRec **SymTable, const char *str, int *nchars )
{
    SymbolRec *Rec = SymTable[ (int)*str ];
    while ( Rec ) {
	if ( (Rec->Len == 1 && Rec->Sym[0] == str[0])
	     ||
	     (Rec->Len == 2 && Rec->Sym[0] == str[0] && Rec->Sym[1] == str[1])
	     ||
	     (Rec->Len == 3 && Rec->Sym[0] == str[0] && Rec->Sym[1] == str[1]
	      && Rec->Sym[2] == str[2])
	   ) {
	    *nchars = Rec->Len;
	    return Rec->Index;
	}
	Rec = ( Rec->More ) ? Rec + 1 : NULL;
    }
    return -1;
}