Logo Search packages:      
Sourcecode: wine-unstable version File versions  Download package

mcl.c

/*
 * Wine Message Compiler lexical scanner
 *
 * Copyright 2000 Bertho A. Stultiens (BS)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

#include "config.h"

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <assert.h>
#include <string.h>

#include "utils.h"
#include "wmc.h"
#include "lang.h"

#include "mcy.tab.h"

/*
 * Keywords are case insensitive. All normal input is treated as
 * being in codepage iso-8859-1 for ascii input files (unicode
 * page 0) and as equivalent unicode if unicode input is selected.
 * All normal input, which is not part of a message text, is
 * enforced to be unicode page 0. Otherwise an error will be
 * generated. The normal file data should only be ASCII because
 * that is the basic definition of the grammar.
 *
 * Byteorder or unicode input is determined automatically by
 * reading the first 8 bytes and checking them against unicode
 * page 0 byteorder (hibyte must be 0).
 * -- FIXME --
 * Alternatively, the input is checked against a special byte
 * sequence to identify the file.
 * -- FIXME --
 *
 *
 * Keywords:
 *    Codepages
 *    Facility
 *    FacilityNames
 *    LanguageNames
 *    MessageId
 *    MessageIdTypedef
 *    Severity
 *    SeverityNames
 *    SymbolicName
 *
 * Default added identifiers for classes:
 * SeverityNames:
 *    Success           = 0x0
 *    Informational     = 0x1
 *    Warning           = 0x2
 *    Error       = 0x3
 * FacilityNames:
 *    System            = 0x0FF
 *    Application = 0xFFF
 *
 * The 'Codepages' keyword is a wmc extension.
 */

static const WCHAR ustr_application[]     = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
static const WCHAR ustr_codepages[] = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
static const WCHAR ustr_english[]   = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
static const WCHAR ustr_error[]           = { 'E', 'r', 'r', 'o', 'r', 0 };
static const WCHAR ustr_facility[]  = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
static const WCHAR ustr_facilitynames[]   = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
static const WCHAR ustr_informational[]   = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
static const WCHAR ustr_language[]  = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
static const WCHAR ustr_languagenames[]   = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
static const WCHAR ustr_messageid[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
static const WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
static const WCHAR ustr_outputbase[]      = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
static const WCHAR ustr_severity[]  = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
static const WCHAR ustr_severitynames[]   = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
static const WCHAR ustr_success[]   = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
static const WCHAR ustr_symbolicname[]    = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
static const WCHAR ustr_system[]    = { 'S', 'y', 's', 't', 'e', 'm', 0 };
static const WCHAR ustr_warning[]   = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
static const WCHAR ustr_msg00001[]  = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
/*
 * This table is to beat any form of "expression building" to check for
 * correct filename characters. It is also used for ident checks.
 * FIXME: use it more consistently.
 */

#define CH_SHORTNAME    0x01
#define CH_LONGNAME     0x02
#define CH_IDENT  0x04
#define CH_NUMBER 0x08
/*#define CH_WILDCARD   0x10*/
/*#define CH_DOT  0x20*/
#define CH_PUNCT  0x40
#define CH_INVALID      0x80

static const char char_table[256] = {
      0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
      0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
      0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
      0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
      0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
      0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
      0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
      0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
      0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
      0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
      0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
      0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
      0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
      0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
      0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
      0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
      0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
};

static int isisochar(int ch)
{
      return !(ch & (~0xff));
}

static int codepage;
static const union cptable *codepage_def;

void set_codepage(int cp)
{
      codepage = cp;
      codepage_def = find_codepage(codepage);
      if(!codepage_def && codepage != CP_UTF8)
            xyyerror("Codepage %d not found; cannot process\n", codepage);
}

/*
 * Input functions
 */
static int nungetstack = 0;
static int allocungetstack = 0;
static char *ungetstack = NULL;
static int ninputbuffer = 0;
static WCHAR *inputbuffer = NULL;
static char *xlatebuffer = NULL;

#define INPUTBUFFER_SIZE      2048  /* Must be larger than 4 and approx. large enough to hold a line */

/*
 * Fill the input buffer with *one* line of input.
 * The line is '\n' terminated so that scanning
 * messages with translation works as expected
 * (otherwise we cannot pre-translate because the
 * language is first known one line before the
 * actual message).
 */
static int fill_inputbuffer(void)
{
      int n;
      static const char err_fatalread[] = "Fatal: reading input failed";
      static int endian = -1;

      if(!inputbuffer)
      {
            inputbuffer = xmalloc(INPUTBUFFER_SIZE*sizeof(WCHAR));
            xlatebuffer = xmalloc(INPUTBUFFER_SIZE);
      }

try_again:
      if(!unicodein)
      {
            char *cptr;
            cptr = fgets(xlatebuffer, INPUTBUFFER_SIZE, yyin);
            if(!cptr && ferror(yyin))
                  xyyerror(err_fatalread);
            else if(!cptr)
                  return 0;
                if (codepage_def)
                    n = wine_cp_mbstowcs(codepage_def, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE);
                else
                    n = wine_utf8_mbstowcs(0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE);
            if(n < 0)
                  internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)\n", n);
            if(n <= 1)
                  goto try_again;   /* Should not happen */
            n--;  /* Strip added conversion '\0' from input length */
            /*
             * FIXME:
             * Detect UTF-8 in the first time we read some bytes by
             * checking the special sequence "FE..." or something like
             * that. I need to check www.unicode.org for details.
             */
      }
      else
      {
            if(endian == -1)
            {
                  n = fread(inputbuffer, 1, 8, yyin);
                  if(n != 8)
                  {
                        if(!n && ferror(yyin))
                              xyyerror(err_fatalread);
                        else
                              xyyerror("Fatal: file to short to determine byteorder (should never happen)\n");
                  }
                  if(isisochar(inputbuffer[0]) &&
                        isisochar(inputbuffer[1]) &&
                        isisochar(inputbuffer[2]) &&
                        isisochar(inputbuffer[3]))
                  {
#ifdef WORDS_BIGENDIAN
                        endian = WMC_BO_BIG;
#else
                        endian = WMC_BO_LITTLE;
#endif
                  }
                  else if(isisochar(BYTESWAP_WORD(inputbuffer[0])) &&
                        isisochar(BYTESWAP_WORD(inputbuffer[1])) &&
                        isisochar(BYTESWAP_WORD(inputbuffer[2])) &&
                        isisochar(BYTESWAP_WORD(inputbuffer[3])))
                  {
#ifdef WORDS_BIGENDIAN
                        endian = WMC_BO_LITTLE;
#else
                        endian = WMC_BO_BIG;
#endif
                  }
                  else
                        xyyerror("Fatal: cannot determine file's byteorder\n");
                  /* FIXME:
                   * Determine the file-endian with the leader-bytes
                   * "FF FE..."; can't remember the exact sequence.
                   */
                  n /= 2;
#ifdef WORDS_BIGENDIAN
                  if(endian == WMC_BO_LITTLE)
#else
                  if(endian == WMC_BO_BIG)
#endif
                  {
                        inputbuffer[0] = BYTESWAP_WORD(inputbuffer[0]);
                        inputbuffer[1] = BYTESWAP_WORD(inputbuffer[1]);
                        inputbuffer[2] = BYTESWAP_WORD(inputbuffer[2]);
                        inputbuffer[3] = BYTESWAP_WORD(inputbuffer[3]);
                  }

            }
            else
            {
                  int i;
                  n = 0;
                  for(i = 0; i < INPUTBUFFER_SIZE; i++)
                  {
                        int t;
                        t = fread(&inputbuffer[i], 2, 1, yyin);
                        if(!t && ferror(yyin))
                              xyyerror(err_fatalread);
                        else if(!t && n)
                              break;
                        n++;
#ifdef WORDS_BIGENDIAN
                        if(endian == WMC_BO_LITTLE)
#else
                        if(endian == WMC_BO_BIG)
#endif
                        {
                              if((inputbuffer[i] = BYTESWAP_WORD(inputbuffer[i])) == '\n')
                                    break;
                        }
                        else
                        {
                              if(inputbuffer[i] == '\n')
                                    break;
                        }
                  }
            }

      }

      if(!n)
      {
            mcy_warning("Re-read line (input was or converted to zilch)\n");
            goto try_again;   /* Should not happen, but could be due to stdin reading and a signal */
      }

      ninputbuffer += n;
      return 1;
}

static int get_unichar(void)
{
      static WCHAR *b = NULL;
      char_number++;

      if(nungetstack)
            return ungetstack[--nungetstack];

      if(!ninputbuffer)
      {
            if(!fill_inputbuffer())
                  return EOF;
            b = inputbuffer;
      }

      ninputbuffer--;
      return (int)(*b++ & 0xffff);
}

static void unget_unichar(int ch)
{
      if(ch == EOF)
            return;

      char_number--;

      if(nungetstack == allocungetstack)
      {
            allocungetstack += 32;
            ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
      }

      ungetstack[nungetstack++] = (WCHAR)ch;
}


/*
 * Normal character stack.
 * Used for number scanning.
 */
static int ncharstack = 0;
static int alloccharstack = 0;
static char *charstack = NULL;

static void empty_char_stack(void)
{
      ncharstack = 0;
}

static void push_char(int ch)
{
      if(ncharstack == alloccharstack)
      {
            alloccharstack += 32;
            charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
      }
      charstack[ncharstack++] = (char)ch;
}

static int tos_char_stack(void)
{
      if(!ncharstack)
            return 0;
      else
            return (int)(charstack[ncharstack-1] & 0xff);
}

static char *get_char_stack(void)
{
      return charstack;
}

/*
 * Unicode character stack.
 * Used for general scanner.
 */
static int nunicharstack = 0;
static int allocunicharstack = 0;
static WCHAR *unicharstack = NULL;

static void empty_unichar_stack(void)
{
      nunicharstack = 0;
}

static void push_unichar(int ch)
{
      if(nunicharstack == allocunicharstack)
      {
            allocunicharstack += 128;
            unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
      }
      unicharstack[nunicharstack++] = (WCHAR)ch;
}

#if 0
static int tos_unichar_stack(void)
{
      if(!nunicharstack)
            return 0;
      else
            return (int)(unicharstack[nunicharstack-1] & 0xffff);
}
#endif

static WCHAR *get_unichar_stack(void)
{
      return unicharstack;
}

/*
 * Number scanner
 *
 * state |      ch         | next state
 * ------+-----------------+--------------------------
 *   0   | [0]             | 1
 *   0   | [1-9]           | 4
 *   0   | .               | error (should never occur)
 *   1   | [xX]            | 2
 *   1   | [0-7]           | 3
 *   1   | [89a-wyzA-WYZ_] | error invalid digit
 *   1   | .               | return 0
 *   2   | [0-9a-fA-F]     | 2
 *   2   | [g-zG-Z_]       | error invalid hex digit
 *   2   | .               | return (hex-number) if TOS != [xX] else error
 *   3   | [0-7]           | 3
 *   3   | [89a-zA-Z_]     | error invalid octal digit
 *   3   | .               | return (octal-number)
 *   4   | [0-9]           | 4
 *   4   | [a-zA-Z_]       | error invalid decimal digit
 *   4   | .               | return (decimal-number)
 *
 * All non-identifier characters [^a-zA-Z_0-9] terminate the scan
 * and return the value. This is not entirely correct, but close
 * enough (should check punctuators as trailing context, but the
 * char_table is not adapted to that and it is questionable whether
 * it is worth the trouble).
 * All non-iso-8859-1 characters are an error.
 */
static int scan_number(int ch)
{
      int state = 0;
      int base = 10;
      empty_char_stack();

      while(1)
      {
            if(!isisochar(ch))
                  xyyerror("Invalid digit\n");

            switch(state)
            {
            case 0:
                  if(isdigit(ch))
                  {
                        push_char(ch);
                        if(ch == '0')
                              state = 1;
                        else
                              state = 4;
                  }
                  else
                        internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state\n");
                  break;
            case 1:
                  if(ch == 'x' || ch == 'X')
                  {
                        push_char(ch);
                        state = 2;
                  }
                  else if(ch >= '0' && ch <= '7')
                  {
                        push_char(ch);
                        state = 3;
                  }
                  else if(isalpha(ch) || ch == '_')
                        xyyerror("Invalid number digit\n");
                  else
                  {
                        unget_unichar(ch);
                        mcy_lval.num = 0;
                        return tNUMBER;
                  }
                  break;
            case 2:
                  if(isxdigit(ch))
                        push_char(ch);
                  else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
                        xyyerror("Invalid hex digit\n");
                  else
                  {
                        base = 16;
                        goto finish;
                  }
                  break;
            case 3:
                  if(ch >= '0' && ch <= '7')
                        push_char(ch);
                  else if(isalnum(ch) || ch == '_')
                        xyyerror("Invalid octal digit\n");
                  else
                  {
                        base = 8;
                        goto finish;
                  }
                  break;
            case 4:
                  if(isdigit(ch))
                        push_char(ch);
                  else if(isalnum(ch) || ch == '_')
                        xyyerror("Invalid decimal digit\n");
                  else
                  {
                        base = 10;
                        goto finish;
                  }
                  break;
            default:
                  internal_error(__FILE__, __LINE__, "Invalid state in number-scanner\n");
            }
            ch = get_unichar();
      }
finish:
      unget_unichar(ch);
      push_char(0);
      mcy_lval.num = strtoul(get_char_stack(), NULL, base);
      return tNUMBER;
}

static void newline(void)
{
      line_number++;
      char_number = 1;
}

static int unisort(const void *p1, const void *p2)
{
      return unistricmp(((const token_t *)p1)->name, ((const token_t *)p2)->name);
}

static token_t *tokentable = NULL;
static int ntokentable = 0;

token_t *lookup_token(const WCHAR *s)
{
      token_t tok;

      tok.name = s;
      return (token_t *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
}

void add_token(tok_e type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
{
      ntokentable++;
      tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
      tokentable[ntokentable-1].type = type;
      tokentable[ntokentable-1].name = name;
      tokentable[ntokentable-1].token = tok;
      tokentable[ntokentable-1].codepage = cp;
      tokentable[ntokentable-1].alias = alias;
      tokentable[ntokentable-1].fixed = fix;
      qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
}

void get_tokentable(token_t **tab, int *len)
{
      assert(tab != NULL);
      assert(len != NULL);
      *tab = tokentable;
      *len = ntokentable;
}

/*
 * The scanner
 *
 */
int mcy_lex(void)
{
      static const WCHAR ustr_dot1[] = { '.', '\n', 0 };
      static const WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
      static int isinit = 0;
      int ch;

      if(!isinit)
      {
            isinit++;
            set_codepage(WMC_DEFAULT_CODEPAGE);
            add_token(tok_keyword,  ustr_codepages,         tCODEPAGE,  0, NULL, 0);
            add_token(tok_keyword,  ustr_facility,          tFACILITY,  0, NULL, 1);
            add_token(tok_keyword,  ustr_facilitynames,     tFACNAMES,  0, NULL, 1);
            add_token(tok_keyword,  ustr_language,          tLANGUAGE,  0, NULL, 1);
            add_token(tok_keyword,  ustr_languagenames,     tLANNAMES,  0, NULL, 1);
            add_token(tok_keyword,  ustr_messageid,         tMSGID,           0, NULL, 1);
            add_token(tok_keyword,  ustr_messageidtypedef,  tTYPEDEF,   0, NULL, 1);
            add_token(tok_keyword,  ustr_outputbase,  tBASE,            0, NULL, 1);
            add_token(tok_keyword,  ustr_severity,          tSEVERITY,  0, NULL, 1);
            add_token(tok_keyword,  ustr_severitynames,     tSEVNAMES,  0, NULL, 1);
            add_token(tok_keyword,  ustr_symbolicname,      tSYMNAME,   0, NULL, 1);
            add_token(tok_severity, ustr_error,       0x03,       0, NULL, 0);
            add_token(tok_severity, ustr_warning,           0x02,       0, NULL, 0);
            add_token(tok_severity, ustr_informational,     0x01,       0, NULL, 0);
            add_token(tok_severity, ustr_success,           0x00,       0, NULL, 0);
            add_token(tok_facility, ustr_application, 0xFFF,            0, NULL, 0);
            add_token(tok_facility, ustr_system,            0x0FF,            0, NULL, 0);
            add_token(tok_language, ustr_english,           0x409,            437, ustr_msg00001, 0);
      }

      empty_unichar_stack();

      while(1)
      {
            if(want_line)
            {
                  while((ch = get_unichar()) != '\n')
                  {
                        if(ch == EOF)
                              xyyerror("Unexpected EOF\n");
                        push_unichar(ch);
                  }
                  newline();
                  push_unichar(ch);
                  push_unichar(0);
                  if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
                  {
                        want_line = 0;
                        /* Reset the codepage to our default after each message */
                        set_codepage(WMC_DEFAULT_CODEPAGE);
                        return tMSGEND;
                  }
                  mcy_lval.str = xunistrdup(get_unichar_stack());
                  return tLINE;
            }

            ch = get_unichar();

            if(ch == EOF)
                  return EOF;

            if(ch == '\n')
            {
                  newline();
                  if(want_nl)
                  {
                        want_nl = 0;
                        return tNL;
                  }
                  continue;
            }

            if(isisochar(ch))
            {
                  if(want_file)
                  {
                        int n = 0;
                        while(n < 8 && isisochar(ch))
                        {
                              int t = char_table[ch];
                              if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
                                    break;

                              push_unichar(ch);
                              n++;
                              ch = get_unichar();
                        }
                        unget_unichar(ch);
                        push_unichar(0);
                        want_file = 0;
                        mcy_lval.str = xunistrdup(get_unichar_stack());
                        return tFILE;
                  }

                  if(char_table[ch] & CH_IDENT)
                  {
                        token_t *tok;
                        while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
                        {
                              push_unichar(ch);
                              ch = get_unichar();
                        }
                        unget_unichar(ch);
                        push_unichar(0);
                        if(!(tok = lookup_token(get_unichar_stack())))
                        {
                              mcy_lval.str = xunistrdup(get_unichar_stack());
                              return tIDENT;
                        }
                        switch(tok->type)
                        {
                        case tok_keyword:
                              return tok->token;

                        case tok_language:
                              codepage = tok->codepage;
                              /* Fall through */
                        case tok_severity:
                        case tok_facility:
                              mcy_lval.tok = tok;
                              return tTOKEN;

                        default:
                              internal_error(__FILE__, __LINE__, "Invalid token type encountered\n");
                        }
                  }

                  if(isspace(ch))   /* Ignore space */
                        continue;

                  if(isdigit(ch))
                        return scan_number(ch);
            }

            switch(ch)
            {
            case ':':
            case '=':
            case '+':
            case '(':
            case ')':
                  return ch;
            case ';':
                  while(ch != '\n' && ch != EOF)
                  {
                        push_unichar(ch);
                        ch = get_unichar();
                  }
                  newline();
                  push_unichar(ch); /* Include the newline */
                  push_unichar(0);
                  mcy_lval.str = xunistrdup(get_unichar_stack());
                  return tCOMMENT;
            default:
                  xyyerror("Invalid character '%c' (0x%04x)\n", isisochar(ch) && isprint(ch) ? ch : '.', ch);
            }
      }
}

Generated by  Doxygen 1.6.0   Back to index