/* ** This file contains code used to tokenize SGML. */ #include #include #include #include #include #include "sgmlparse.h" #define stricmp strcasecmp /* These three pointers define certain special handlers. All whitespace ** is sent to xSpaceHandler. Non-whitespace is given to xWordHandler. ** Any markup that isn't specifically directed elsewhere is given ** to xDefaultMarkupHandlers. */ static void (*xSpaceHandler)(const char*,void*); static void (*xWordHandler)(const char*,void*); static void (*xCommentHandler)(const char*,void*); static void (*xDefaultMarkupHandler)(int, const char**, void*); /* Each handler is stored in a hash table as an instance of the ** following structure. */ typedef struct sgHandler Handler; struct sgHandler { char *zName; /* Name of markup to handle */ void (*xHandler)(int, const char**, void*); /* Routine to do the work */ Handler *pCollide; /* Next handler with same hash */ }; /* The size of the handler hash table. ** For best results, this should be a prime number which is larger than ** the number of markups in the hash table. */ #define SGML_HASH_SIZE 203 /* The handler hash table */ static Handler *apHandler[SGML_HASH_SIZE]; /* Hash a handler name */ static int SgmlHash(const char *zName){ int h = 0; char c; while( (c=*zName)!=0 ){ if( isupper(c) ) c = tolower(c); h = h<<5 ^ h ^ c; zName++; } if( h<0 ) h = -h; return h % SGML_HASH_SIZE; } /* Given a pointer to an input file, read and parse that file ** as if it were SGML. ** ** This is not a true SGML parser because it handles some unusual ** cases differently, and ignores the & operator completely. */ void SgmlParse(FILE *in, void *pArg){ int c; int i, j; int argc; Handler *pHandler; char *argv[100]; char zBuf[10000]; c = getc(in); while( c!=EOF ){ if( isspace(c) ){ /* Case 1: spaces */ zBuf[0] = c; i = 1; while( i' ) c = getc(in); zBuf[i] = 0; if( strncmp(zBuf,"