Logo Search packages:      
Sourcecode: kcc version File versions  Download package

kcc.c

/**** << kanji code converter >> ****
 *
 *  kcc.c
 *                                                  Aug 13 1992
 *                                      mod:        Jul  1 1994
 ************************************************** tonooka ***********/
/*
 *    Copyright (c) 1994 Yasuhiro Tonooka (tonooka@msi.co.jp)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
#if !defined lint
static char sccsid[] = "@(#)kcc.c 2.3 (Y.Tonooka) 7/1/94";
static char copyright[] = "@(#)Copyright (c) 1992 Yasuhiro Tonooka";
#endif

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>

/*-
 *  OPTIONS
 *    -O or -IO
 *          I specifies input code and O specifies output code.
 *              When input code is not specified, kcc guesses kanji
 *          code.
 *          I is one of:
 *                e     EUC (with 7-bit JIS)
 *                d     DEC (with 7-bit JIS)
 *                s     shift-JIS (with 7-bit JIS)
 *                j, 7 or k
 *                      7-bit JIS
 *                8     8-bit JIS
 *          O is one of:
 *                e     EUC
 *                d     DEC
 *                s     shift-JIS
 *                jXY, 7XY
 *                      7-bit JIS
 *                kXY   7-bit JIS using "ESC(I" for JIS katakana
 *                8XY   8-bit JIS
 *          where X is:
 *                B     "ESC$B" as kanji shift in
 *                @     "ESC$@" as kanji shift in
 *                +     "ESC&@ESC$B" as kanji shift in
 *          Y is:
 *                B     "ESC(B" as kanji shift out
 *                J     "ESC(J" as kanji shift out
 *                H     "ESC(H" as kanji shift out
 *
 *    -c      Check:  Only tells file type to stdout.  Overrides any
 *          other option except -x and -z.  In this mode, kcc
 *          reads all thru the input file to guess the file type.
 *
 *    -v    Verbose:  Prints to stderr which kanji code system
 *          kcc guessed the input code is in.
 *
 *    -x      Extended mode:  In this mode, recognized area of each
 *          code system is extended in guessing.
 *
 *      -z      Reduced mode:  In this mode, only zenkaku characters
 *          are recognized with EUC and shift-JIS in guessing.
 *          (Reduces ambiguity).
 *
 *      -n      Gaiji & undefined kankaku kana characters are replaced
 *          with padding character.
 *
 *    -h    Hiragana is used instead of katakana when converting
 *          hankaku kana to DEC zenkaku.
 *
 *    -b xxx      Specify hold buffer size to xxx (byte).
 */

#define LENLINE         (BUFSIZ * 4)
#define HOLDBUFSIZ      8192  /* default size of hold buffer */

#define ESC       0x1b
#define SO        0x0e
#define SI        0x0f
#define SS2       0x8e  /* EUC single shift 2 */
#define SS3       0x8f  /* EUC single shift 3 */

#define ZENPAD          0x2222      /* padding char for zenkaku */
#define HANPAD          0x25  /* padding char for hankaku */

typedef int bool;

#define bitflag(c)      (1L << (c) - '@')

#define NONASCII  0x01  /* non-ASCII character */
#define JIS       0x02  /* JIS */
#define ESCI            0x04  /* "ESC(I" */
#define ASSUME          0x08  /* assumed EUC (or DEC) */
#define EUC       0x10
#define DEC       0x20
#define SJIS            0x40
#define JIS8            0x80  /* 8-bit JIS */
#define BIT8            (EUC | DEC | SJIS | JIS8)

enum mode {
    M_ASCII,
    M_KANJI,
    M_GAIJI,
    M_SO,               /* hankaku kana with SO */
    M_ESCI,             /* hankaku kana with "ESC(I" */
};

char *progname;
char *filename = NULL;
char shiftin[7] = "\033$B";
char shiftout[4] = "\033(J";
unsigned incode = 0;
unsigned outcode = JIS;
bool verbose = 0;
bool docheck = 0;
bool extend = 0;
bool zenkaku = 0;
bool nogaiji = 0;

extern unsigned short katakana[];
extern unsigned short hiragana[];
unsigned short *kanatbl = katakana;

void error(char *fmt, ...);

/**********************************************************************
 *                                                                    *
 *  Main Routines                                                     *
 *                                                                    *
 **********************************************************************/
/*---------------------------------------------------------------------
    NAME
      main
 ---------------------------------------------------------------------*/
main(c, v)
    register int c;
    register char *v[];
{
    register char *s;
    bool codeopt = 0;         /* code option is read */
    FILE *iop;
    int status;
    int dev, ino = -1;
    struct stat sbuf;
    unsigned size = HOLDBUFSIZ;
    void filter();
    void check();
    void buffalloc();
    void setfunc();

    progname = *v++;
    /*
     * Process options here.
     */
    for (; --c; v++) {
      /*
       * With -, input is taken from stdin like cat(1).
       */
      if ((*v)[0] != '-' || (*v)[1] == '\0')
          break;
      /*
       * Size of hold buf can be changed with "-b size".
       */
      if (strcmp(*v, "-b") == 0) {
          if (--c == 0)
            error("%s option must have an argument", *v);
          v++;
          if ((size = atoi(*v)) <= 0)
            error("bad buffer size");
          continue;
      }
      /*
       * Options:
       */
      for (s = *v + 1; *s; s++) {
          if (strchr("esdj7k8", *s)) {
            if (codeopt)
                error("%s: duplicate code specification", *v);
            codeopt = 1;
            if (s[1] && strchr("esdj7k8", s[1]))
                /*
                 * Input code:  e, s, j, 7, k or 8.
                 */
                switch (*s++) {
                case 'e':     /* EUC with JIS */
                  incode = EUC | NONASCII;
                  break;
                case 's':     /* shift-JIS with JIS */
                  incode = SJIS | NONASCII;
                  break;
                case 'd':     /* DEC with JIS */
                  incode = DEC | NONASCII;
                  break;
                case 'j':     /* JIS */
                case '7':     /* equivalent to 'j' */
                case 'k':     /* JIS */
                  incode = JIS;
                  break;
                case '8':     /* 8-bit JIS */
                  incode = JIS | JIS8 | NONASCII;
                  break;
                }
            /*
             * Output code:  e, s, d, jXY, 7XY, kXY or 8XY.
             */
            switch (*s) {
            case 'e':   /* EUC */
                outcode = EUC;
                continue;
            case 's':   /* shift-JIS */
                outcode = SJIS;
                continue;
            case 'd':   /* EUC */
                outcode = DEC;
                continue;
            case 'j':   /* 7-bit JIS using SO & SI */
            case '7':   /* equivalent to 'j' */
                outcode = JIS;
                break;
            case 'k':   /* 7-bit JIS using "ESC(I" */
                outcode = JIS | ESCI;
                break;
            case '8':   /* 8-bit JIS */
                outcode = JIS | JIS8;
                break;
            }
            /*
             * Process "XY" part of options j, 7, k & 8.
             */
            if ((s[1] == 'B' || s[1] == '@' || s[1] == '+') &&
                  (s[2] == 'B' || s[2] == 'J' || s[2] == 'H')) {
                if (s[1] == '+')
                  sprintf(shiftin, "\033&@\033$B");
                else
                  sprintf(shiftin, "\033$%c", s[1]);
                sprintf(shiftout, "\033(%c", s[2]);
                s += 2;
            }
            continue;
          }
          /*
           * Other one letter options:
           */
          switch (*s) {
          case 'c':           /* check */
            docheck = 1;
            break;
          case 'h':           /* hiragana for hankaku->DEC */
            kanatbl = hiragana;
            break;
          case 'n':           /* no gaiji */
            nogaiji = 1;
            break;
          case 'v':           /* verbose */
            verbose = 1;
            break;
          case 'x':           /* extended mode */
            extend = 1;
            break;
          case 'z':           /* reduced mode */
            zenkaku = 1;
            break;
          default:
            error("-%c: bad option", *s);
          }
      }
    }
    if (extend && zenkaku)
      error("-x and -z can't go together");
    if (!docheck) {
      buffalloc(size);  /* allocate hold buf */
      setfunc();
    }
    /*
     * Get some info on output file.
     */
    if (fstat(fileno(stdout), &sbuf) == 0) {
      sbuf.st_mode &= S_IFMT;
      if (sbuf.st_mode != S_IFCHR && sbuf.st_mode != S_IFBLK) {
          dev = sbuf.st_dev;
          ino = sbuf.st_ino;
      }
    }
    /*
     * Main loop.
     */
    status = 0;
    do {
      if (c == 0 || strcmp(*v, "-") == 0) {
          /*
           * Stdin:  If tty and at EOF, clear EOF.
           */
          if (isatty(fileno(stdin)) && feof(stdin))
            clearerr(stdin);
          iop = stdin;
      } else
          /*
           * Open a file.
           */
          if ((iop = fopen(*v, "r")) == NULL) {
            perror(*v);
            status |= 1;
            continue;
          }
      if (c)
          filename = *v;
      if (fstat(fileno(iop), &sbuf) == 0) {
          /*
           * Get some info on input file, and see if it is a
           * directory.
           */
          if ((sbuf.st_mode & S_IFMT) == S_IFDIR) {
            fprintf(stderr,
                  "%s: read error on %s: Is a directory\n",
                  progname, c ? *v : "standard input");
            if (iop != stdin)
                fclose(iop);
            status |= 1;
            continue;
          }
          /*
           * Compare the info of input with that of output, and see
           * if they are identical.
           */
          if ((sbuf.st_mode & S_IFMT) == S_IFREG &&
                sbuf.st_dev == dev && sbuf.st_ino == ino) {
            fprintf(stderr, "%s: input %s is output\n", progname,
                  c ? *v : "-");
            if (iop != stdin)
                fclose(iop);
            status |= 1;
            continue;
          }
      }
      /*
       * Do the job here!
       */
      if (docheck)
          check(iop);
      else
          filter(iop);
      if (iop != stdin)
          fclose(iop);
    } while (v++, --c > 0);
    if (ferror(stdout))
      error("output write error");
    return (status);
}

/*---------------------------------------------------------------------
    NAME
      error - print formatted error message on stderr and die
 ---------------------------------------------------------------------*/
#include <stdarg.h>

void error(char *fmt, ...)
{
    va_list args;

    va_start(args, fmt);
    fprintf(stderr, "%s: ", progname);
    vfprintf(stderr, fmt, args);
    putc('\n', stderr);
    va_end(args);
    exit(1);
}

/**********************************************************************
 *                                                                    *
 *  Filter                                                            *
 *                                                                    *
 **********************************************************************/
enum mode gsmode;       /* guess:  M_ASCII M_KANJI M_SO */
enum mode inmode;       /* input:  M_ASCII M_KANJI M_GAIJI
                         * M_SO M_ESCI */
enum mode outmode;            /* output: M_ASCII M_KANJI M_GAIJI
                         * M_SO M_ESCI */

unsigned long insi;           /* JIS shift-in sequence flag */
unsigned long inso;           /* JIS shift-out sequence flag
                         * including "ESC(I" */
unsigned long innj;           /* JIS 1990 sequence flag */
unsigned long ingj;           /* JIS 1990 aux flag */

/*---------------------------------------------------------------------
    NAME
      filter - filtering routine
 ---------------------------------------------------------------------*/
void filter(fp)
    FILE *fp;
{
    register bool hold;
    register unsigned code, c;
    register int len;
    char str[LENLINE];
    unsigned guess();
    bool append();
    void flush();
    unsigned out();
    void showcode();

    code = incode ? incode : extend ? BIT8 : BIT8 & ~DEC;
    gsmode = inmode = outmode = M_ASCII;
    insi = inso = innj = ingj = 0;
    hold = 0;
    while (len = getstr(str, sizeof str, fp)) {
      if (!(code & NONASCII) && code & BIT8 ||
            code & (EUC | DEC) && code & SJIS && !(code & ASSUME)) {
          /*
           * So far, no kanji has been seen, or ambiguous.
           */
          c = guess(str, len);
          code |= c & (JIS | NONASCII), code &= c | ~BIT8;
          if (code & NONASCII && code & (EUC | DEC) && code & SJIS) {
            /*
             * If ambiguous, store the line in hold buffer.
             */
            if (append(str, len)) {
                hold = 1;
                continue;
            }
            /*
             * When buffer is full, assume EUC/DEC.
             */
            code |= ASSUME;
          }
      }
      if (hold) {
          /*
           * Flush hold buffer.
           */
          flush(code);
          hold = 0;
      }
      c = out(str, len, code);
      code |= c & JIS, code &= c | ~BIT8;
    }
    if (hold)
      /*
       * Assume EUC.
       */
      flush(code |= ASSUME);
    if (verbose)
      showcode(code, stderr);
}

/*---------------------------------------------------------------------
    NAME
      check
 ---------------------------------------------------------------------*/
void check(fp)
    FILE *fp;
{
    register unsigned code, c;
    register int len;
    char str[LENLINE];
    void showcode();
    unsigned guess();

    code = extend ? BIT8 : BIT8 & ~DEC;
    gsmode = M_ASCII;
    insi = inso = innj = ingj = 0;
    while (len = getstr(str, sizeof str, fp)) {
      c = guess(str, len);
      code |= c & (JIS | NONASCII), code &= c | ~BIT8;
      if (code & NONASCII && !(code & BIT8))
          break;
    }
    showcode(code, stdout);
}

/*---------------------------------------------------------------------
    NAME
      showcode
 ---------------------------------------------------------------------*/
void showcode(code, fp)
    register unsigned code;
    register FILE *fp;
{
    char *s;
    void showjis();

    if (filename)
      if (fprintf(fp, "%s:\t", filename) < 9)
          putc('\t', fp);
    if (!(code & NONASCII)) {
      /*
       * 7-bit JIS / ASCII.
       */
      if (code & JIS) {
          showjis('7', fp);
          putc('\n', fp);
      } else
          fputs("ASCII\n", fp);
      return;
    } else if (code & (EUC | DEC)) {
      s = code & EUC ? code & DEC ? "EUC/DEC" : "EUC" : "DEC";
      if (code & SJIS) {
          /*
           * Ambiguous.
           */
          fprintf(fp, "ambiguous (%s", s);
          if (code & JIS8) {
            fputs(code & JIS ?
                  " with 7-bit JIS, or " : ", shift-JIS or ", fp);
            showjis('8', fp);
            if (code & ASSUME)
                fprintf(fp, "; assumed %s",
                      code & JIS ? "the former" : s);
            fputs(")\n", fp);
            return;
          }
          fputs(" or shift-JIS", fp);
          if (code & ASSUME)
            fprintf(fp, "; assumed %s", s);
          fputs(")", fp);
      } else
          /*
           * EUC/DEC.
           */
          fputs(s, fp);
    } else if (code & JIS8) {
      /*
       * 8-bit JIS / shift-JIS or 8-bit JIS.
       */
      if (!(code & JIS))
          fputs("shift-JIS or ", fp);
      showjis('8', fp);
      putc('\n', fp);
      return;
    } else if (code & SJIS)
      /*
       * Shift-JIS.
       */
      fputs("shift-JIS", fp);
    else {
      /*
       * Non-ASCII deteced but neither EUC/DEC nor SJIS.
       */
      fputs("data\n", fp);
      return;
    }
    if (code & JIS) {
      fputs(" with ", fp);
      showjis('7', fp);
    }
    putc('\n', fp);
}

/*---------------------------------------------------------------------
    NAME
      showjis
 ---------------------------------------------------------------------*/
void showjis(bit, fp)
    int bit;                  /* 8-bit or 7-bit */
    FILE *fp;
{
    bool comma;
    bool showesc();

    fprintf(fp, "%c-bit JIS [", bit);
    comma = showesc("ESC$", insi, 0, fp);
    comma = showesc("ESC&@ESC$", innj, comma, fp);
    comma = showesc("ESC(", inso, comma, fp);
    showesc("ESC$(", ingj, comma, fp);
    putc(']', fp);
}

/*---------------------------------------------------------------------
    NAME
      showesc
 ---------------------------------------------------------------------*/
bool showesc(str, mask, comma, fp)
    char *str;
    register unsigned long mask;
    bool comma;
    FILE *fp;
{
    register unsigned long m;
    register int c;

    for (m = 1, c = '@'; m; m <<= 1, c++)
      if (mask & m) {
          if (comma)
            fputs(", ", fp);
          else
            comma = 1;
          fputs(str, fp);
          putc(c, fp);
      }
    return (comma);
}

/*---------------------------------------------------------------------
    NAME
      getstr
 ---------------------------------------------------------------------*/
int getstr(str, n, fp)
    char *str;
    register int n;
    FILE *fp;
{
    register int c;
    register char *s;

    for (s = str; --n > 0 && (c = getc(fp)) != EOF; )
      if ((*s++ = c) == '\n')
          break;
    return (s - str);
}

/**********************************************************************
 *                                                                    *
 *  Hold Buffer Operations                                            *
 *                                                                    *
 **********************************************************************/
char *holdbuf, *bufend;
char *bufp;

/*---------------------------------------------------------------------
    NAME
      buffalloc
 ---------------------------------------------------------------------*/
void buffalloc(len)
    unsigned len;
{
    if ((bufp = holdbuf = (char *) malloc(len)) == NULL)
      error("out of memory");
    bufend = holdbuf + len;
}

/*---------------------------------------------------------------------
    NAME
      append
 ---------------------------------------------------------------------*/
bool append(s, len)
    register char *s;
    register int len;
{
    if (bufp + len > bufend)
      return (0);
    for (; len; --len)
      *bufp++ = *(u_char *) s++;
    return (1);
}

/*---------------------------------------------------------------------
    NAME
      flush
 ---------------------------------------------------------------------*/
void flush(code)
    unsigned code;
{
    unsigned out();

    out(holdbuf, bufp - holdbuf, code);
    bufp = holdbuf;
}

/**********************************************************************
 *                                                                    *
 *  General                                                           *
 *                                                                    *
 **********************************************************************/
/*---------------------------------------------------------------------
    NAME
      compare
 ---------------------------------------------------------------------*/
bool compare(s, str)
    register char *s, *str;
{
    while (*s)
      if (*s++ != *str++)
          return (0);
    return (1);
}

/**********************************************************************
 *                                                                    *
 *  Guessing                                                          *
 *                                                                    *
 **********************************************************************/
/*---------------------------------------------------------------------
    NAME
      guess - distinguish code system
 ---------------------------------------------------------------------*/
unsigned guess(str, len)
    char *str;
    int len;
{
    register char *s;
    register int euc, sjis, dec;
    bool jis8;
    register unsigned code;
    register int i;
    enum mode old;

    euc = sjis = 1;
    dec = extend ? 1 : 0;
    jis8 = 1;
    code = 0;
    for (s = str; s < str + len; s += i) {
      i = 1;
      switch (*(u_char *) s) {
      case ESC:
          if (gsmode == M_SO)
            continue;
          old = gsmode;
          if (compare("$B", s + 1) || compare("$@", s + 1)) {
            gsmode = M_KANJI; /* kanji */
            insi |= bitflag(((u_char *) s)[2]);
            i = 3;
          } else if (compare("&@\033$B", s + 1)) {
            gsmode = M_KANJI; /* kanji 1990 */
            innj |= bitflag('B');
            i = 6;
          } else if (compare("(B", s + 1) ||
                compare("(J", s + 1) || compare("(H", s + 1)) {
            gsmode = M_ASCII; /* kanji end */
            inso |= bitflag(((u_char *) s)[2]);
            i = 3;
          } else if (compare("(I", s + 1)) {
            gsmode = M_KANJI; /* "ESC(I" */
            inso |= bitflag('I');
            i = 3;
          } else if (compare("$(D", s + 1)) {
            gsmode = M_KANJI; /* gaiji */
            ingj |= bitflag('D');
            i = 4;
          } else
            break;
          code |= JIS;
          if (old != M_ASCII)
            continue;
          break;
      case SO:
          if (gsmode == M_ASCII) {
            code |= JIS;
            gsmode = M_SO;
            break;
          }
          continue;
      case SI:
          if (gsmode == M_SO) {
            gsmode = M_ASCII;
            continue;
          }
          /* fall thru */
      default:
          if (gsmode != M_ASCII)
            continue;
          break;
      }
      if (*(u_char *) s & 0x80)
          code |= NONASCII;
      switch (euc) {
      case 1:
          /*
           * EUC first byte.
           */
          if (*(u_char *) s & 0x80) {
            if (0xa0 < *(u_char *) s && *(u_char *) s < 0xff ||
                  !zenkaku && *(u_char *) s == SS2) {
                euc = 2;
                break;
            }
            if (extend)
                if (*(u_char *) s == SS3) {
                  euc = 2;
                  break;
                } else if (*(u_char *) s < 0xa0)
                  break;
            euc = 0;    /* not EUC */
          }
          break;
      case 2:
          /*
           * EUC second byte or third byte of CS3.
           */
          if (((u_char *) s)[-1] == SS2) {
            if (0xa0 < *(u_char *) s &&
                  *(u_char *) s < (extend ? 0xff : 0xe0)) {
                euc = 1;      /* hankaku kana */
                break;
            }
          } else
            if (0xa0 < *(u_char *) s && *(u_char *) s < 0xff) {
                if (((u_char *) s)[-1] != SS3)
                  euc = 1;/* zenkaku */
                break;
            }
          euc = 0;            /* not EUC */
          break;
      }
      if (extend)
          switch (dec) {
          case 1:
            /*
             * DEC first byte.
             */
            if (*(u_char *) s & 0x80) {
                if (0xa0 < *(u_char *) s && *(u_char *) s < 0xff) {
                  dec = 2;
                  break;
                } else if (*(u_char *) s < 0xa0)
                  break;
                dec = 0;      /* not DEC */
            }
            break;
          case 2:
            /*
             * DEC second byte.
             */
            if (0x20 < (*(u_char *) s & 0x7f) &&
                  (*(u_char *) s & 0x7f) < 0x7f) {
                dec = 1;
            } else
                dec = 0;      /* not DEC */
            break;
          }
      switch (sjis) {
      case 1:
          /*
           * shift-JIS first byte.
           */
          if (*(u_char *) s & 0x80) {
            if (0xa0 < *(u_char *) s && *(u_char *) s < 0xe0) {
                if (!zenkaku)
                  break;      /* hankaku */
            } else if (*(u_char *) s != 0x80 &&
                  *(u_char *) s != 0xa0 &&
                  *(u_char *) s <= (extend ? 0xfc : 0xef)) {
                sjis = 2;     /* zenkaku */
                jis8 = 0;
                break;
            }
            sjis = 0;   /* not SJIS */
          }
          break;
      case 2:
          /*
           * shift-JIS second byte.
           */
          if (0x40 <= *(u_char *) s && *(u_char *) s != 0x7f &&
                *(u_char *) s <= 0xfc)
            sjis = 1;
          else
            sjis = 0;   /* not SJIS */
          break;
      }
    }
    if (euc == 1)
      code |= EUC;
    if (dec == 1)
      code |= DEC;
    if (sjis == 1)
      code |= zenkaku || !jis8 ? SJIS : SJIS | JIS8;
    return (code);
}

/**********************************************************************
 *                                                                    *
 *  Output Routines                                                   *
 *                                                                    *
 **********************************************************************/
void (*outascii)(), (*outkanji)(), (*outgaiji)(), (*outkana)();

/*---------------------------------------------------------------------
    NAME
      out
 ---------------------------------------------------------------------*/
unsigned out(str, len, code)
    char *str;
    int len;
    register unsigned code;
{
    register char *s;
    register int i;
    void outsjis();

    for (s = str; s < str + len; s += i) {
      i = 1;
      switch (*(u_char *) s) {
      case ESC:
          if (inmode == M_SO)
            break;
          if (compare("$B", s + 1) || compare("$@", s + 1)) {
            inmode = M_KANJI; /* kanji */
            insi |= bitflag(((u_char *) s)[2]);
            i = 3;
          } else if (compare("&@\033$B", s + 1)) {
            inmode = M_KANJI; /* kanji 1990 */
            innj |= bitflag('B');
            i = 6;
          } else if (compare("(B", s + 1) || compare("(J", s + 1) ||
                compare("(H", s + 1)) {
            inmode = M_ASCII; /* kanji end */
            inso |= bitflag(((u_char *) s)[2]);
            i = 3;
          } else if (compare("(I", s + 1)) {
            inmode = M_ESCI;  /* "ESC(I" */
            inso |= bitflag('I');
            i = 3;
          } else if (compare("$(D", s + 1)) {
            inmode = M_GAIJI; /* gaiji */
            ingj |= bitflag('D');
            i = 4;
          } else
            break;
          code |= JIS;
          continue;
      case SO:
          if (inmode == M_ASCII) {
            code |= JIS;
            inmode = M_SO;
            continue;
          }
          break;
      case SI:
          if (inmode == M_SO) {
            inmode = M_ASCII;
            continue;
          }
          break;
      }
      if (inmode != M_ASCII) {
          if (0x20 < ((u_char *) s)[0] && ((u_char *) s)[0] < 0x7f)
            switch (inmode) {
            case M_KANJI:
                (*outkanji)(((u_char *) s)[0],
                      ((u_char *) s)[1] & 0x7f);
                i = 2;
                continue;
            case M_GAIJI:
                (*outgaiji)(((u_char *) s)[0],
                      ((u_char *) s)[1] & 0x7f);
                i = 2;
                continue;
            case M_SO:
            case M_ESCI:
                (*outkana)(((u_char *) s)[0]);
                continue;
            }
      } else if (((u_char *) s)[0] & 0x80)
          if (code & (EUC | DEC)) {
            /*
             * EUC or DEC:
             */
            if (0xa0 < ((u_char *) s)[0] &&
                  ((u_char *) s)[0] < 0xff) {
                if (!(((u_char *) s)[1] & 0x80) && code & DEC) {
                  /*
                   * DEC gaiji:
                   */
                  code &= ~EUC;     /* definitely DEC  */
                  (*outgaiji)(((u_char *) s)[0] & 0x7f,
                        ((u_char *) s)[1]);
                } else
                  /*
                   * EUC code set 1 (kanji), DEC kanji:
                   */
                  (*outkanji)(((u_char *) s)[0] & 0x7f,
                        ((u_char *) s)[1] & 0x7f);
            } else if (((u_char *) s)[0] == SS2 && code & EUC &&
                  0xa0 < ((u_char *) s)[1] &&
                  ((u_char *) s)[1] < 0xff) {
                /*
                 * EUC code set 2 (hankaku kana):
                 */
                code &= ~DEC; /* probably EUC */
                (*outkana)(((u_char *) s)[1] & 0x7f);
            } else if (((u_char *) s)[0] == SS3 && code & EUC &&
                  0xa0 < ((u_char *) s)[1] &&
                  ((u_char *) s)[1] < 0xff &&
                  0xa0 < ((u_char *) s)[2] &&
                  ((u_char *) s)[2] < 0xff) {
                /*
                 * EUC code set 3 (gaiji):
                 */
                code &= ~DEC; /* probably EUC */
                (*outgaiji)(((u_char *) s)[1] & 0x7f,
                      ((u_char *) s)[2] & 0x7f);
                i = 3;
                continue;
            } else {
                /*
                 * Control character (C1):
                 */
                if (outcode != SJIS && (outcode != EUC || 
                      ((u_char *) s)[0] != SS2 &&
                      ((u_char *) s)[0] != SS3))
                  putchar(((u_char *) s)[0]);
                continue;
            }
            i = 2;
            continue;
          } else if (code & (SJIS | JIS8)) {
            /*
             * Shift-JIS or JIS8:
             */
            if (!(code & SJIS) || 0xa0 < ((u_char *) s)[0] &&
                  ((u_char *) s)[0] < 0xe0)
                /*
                 * Hankaku kana:
                 */
                (*outkana)(((u_char *) s)[0] & 0x7f);
            else {
                /*
                 * Shift-JIS kanji:
                 */
                code &= ~JIS8;      /* definitely shift-JIS */
                outsjis(((u_char *) s)[0], ((u_char *) s)[1]);
                i = 2;
            }
            continue;
          }
      (*outascii)(((u_char *) s)[0]);
    }
    return (code);
}

/*---------------------------------------------------------------------
    NAME
      outsjis
 ---------------------------------------------------------------------*/
void outsjis(c1, c2)
    register int c1, c2;
{
    register int c;

    c = c1 * 2 - (c1 <= 0x9f ? 0x00e1 : (c1 < 0xf0 ? 0x0161 : 0x01bf));
    if (c2 < 0x9f)
      c2 = c2 - (c2 > 0x7f ? 0x20 : 0x1f);
    else {
      c2 = c2 - 0x7e;
      c++;
    }
    (*(c1 <= 0xef ? outkanji : outgaiji))(c, c2);
}

/**********************************************************************
 *                                                                    *
 *  Conversion Routines                                               *
 *                                                                    *
 **********************************************************************/
void outchar();
void jisascii(), jiskanji(), jisgaiji();
void jiskana(), jiskanak(), jiskana8();
void euckanji(), eucgaiji(), euckana();
void sjiskanji(), sjisgaiji(), sjiskana();
void decascii(), deckanji(), decgaiji(), deckana();

int lastkana = 0;       /* last hankaku kana for DEC */

/*---------------------------------------------------------------------
    NAME
      setfunc
 ---------------------------------------------------------------------*/
void setfunc()
{
    switch (outcode) {
    case EUC:
      outascii = outchar;
      outkanji = euckanji;
      outgaiji = eucgaiji;
      outkana = euckana;
      break;
    case DEC:
      outascii = decascii;
      outkanji = deckanji;
      outgaiji = decgaiji;
      outkana = deckana;
      break;
    case SJIS:
      outascii = outchar;
      outkanji = sjiskanji;
      outgaiji = sjisgaiji;
      outkana = sjiskana;
      break;
    default:
      outascii = jisascii;
      outkanji = jiskanji;
      outgaiji = jisgaiji;
      switch (outcode) {
      case JIS:         /* mode:  M_ASCII M_KANJI M_GAIJI
                         * M_SO */
          outkana = jiskana;
          break;
      case JIS | ESCI:  /* mode:  M_ASCII M_KANJI M_GAIJI
                         * M_ESCI */
          outkana = jiskanak;
          break;
      case JIS | JIS8:  /* mode:  M_ASCII M_KANJI M_GAIJI */
          outkana = jiskana8;
          break;
      }
      break;
    }
}

/*---------------------------------------------------------------------
    NAME
      outchar
 ---------------------------------------------------------------------*/
void outchar(c)
    register int c;
{
    putchar(c);
}

/*---------------------------------------------------------------------
    NAME
      jisascii
 ---------------------------------------------------------------------*/
void jisascii(c)
    register int c;
{
    switch (outmode) {
    case M_ASCII:
      break;
    case M_SO:
      putchar(SI);
      outmode = M_ASCII;
      break;
    default:
      fputs(shiftout, stdout);
      outmode = M_ASCII;
      break;
    }
    putchar(c);
}

/*---------------------------------------------------------------------
    NAME
      jiskanji
 ---------------------------------------------------------------------*/
void jiskanji(c1, c2)
    register int c1, c2;
{
    if (outmode != M_KANJI) {
      if (outmode == M_SO)
          putchar(SI);
      fputs(shiftin, stdout);
      outmode = M_KANJI;
    }
    putchar(c1);
    putchar(c2);
}

/*---------------------------------------------------------------------
    NAME
      jisgaiji
 ---------------------------------------------------------------------*/
void jisgaiji(c1, c2)
    register int c1, c2;
{
    if (nogaiji)
      jiskanji(ZENPAD >> 8, ZENPAD & 0xff);
    else {
      if (outmode != M_GAIJI) {
          if (outmode == M_SO)
            putchar(SI);
          fputs("\033$(D", stdout);
          outmode = M_GAIJI;
      }
      putchar(c1);
      putchar(c2);
    }
}

/*---------------------------------------------------------------------
    NAME
      jiskana
 ---------------------------------------------------------------------*/
void jiskana(c)
    register int c;
{
    if (outmode != M_SO) {
      if (outmode != M_ASCII)
          fputs(shiftout, stdout);
      putchar(SO);
      outmode = M_SO;
    }
    putchar(!nogaiji || 0x20 < c && c < 0x60 ? c : HANPAD);
}

/*---------------------------------------------------------------------
    NAME
      jiskanak
 ---------------------------------------------------------------------*/
void jiskanak(c)
    register int c;
{
    if (outmode != M_ESCI) {
      fputs("\033(I", stdout);
      outmode = M_ESCI;
    }
    putchar(!nogaiji || 0x20 < c && c < 0x60 ? c : HANPAD);
}

/*---------------------------------------------------------------------
    NAME
      jiskana8
 ---------------------------------------------------------------------*/
void jiskana8(c)
    register int c;
{
    if (outmode != M_ASCII) {
      fputs(shiftout, stdout);
      outmode = M_ASCII;
    }
    putchar((!nogaiji || 0x20 < c && c < 0x60 ? c : HANPAD) | 0x80);
}

/*---------------------------------------------------------------------
    NAME
      euckanji
 ---------------------------------------------------------------------*/
void euckanji(c1, c2)
    register int c1, c2;
{
    putchar(c1 | 0x80);
    putchar(c2 | 0x80);
}

/*---------------------------------------------------------------------
    NAME
      eucgaiji
 ---------------------------------------------------------------------*/
void eucgaiji(c1, c2)
    register int c1, c2;
{
    if (nogaiji) {
      putchar(ZENPAD >> 8 | 0x80);
      putchar(ZENPAD & 0xff | 0x80);
    } else {
      putchar(SS3);
      putchar(c1 | 0x80);
      putchar(c2 | 0x80);
    }
}

/*---------------------------------------------------------------------
    NAME
      euckana
 ---------------------------------------------------------------------*/
void euckana(c)
    register int c;
{
    putchar(SS2);
    putchar((!nogaiji || 0x20 < c && c < 0x60 ? c : HANPAD) | 0x80);
}

/*---------------------------------------------------------------------
    NAME
      sjiskanji
 ---------------------------------------------------------------------*/
void sjiskanji(c1, c2)
    register int c1, c2;
{
    putchar((c1 - 1 >> 1) + (c1 <= 0x5e ? 0x71 : 0xb1));
    putchar(c2 + (c1 & 1 ? (c2 < 0x60 ? 0x1f : 0x20) : 0x7e));
}

/*---------------------------------------------------------------------
    NAME
      sjisgaiji
    DESCRIPTION
      Characters are mapped as follows:
          0x2121 to 0x3a7e --> 0xf040 to 0xfcfc
          0x3b21 to 0x7e7e --> 0xfcfc
 ---------------------------------------------------------------------*/
void sjisgaiji(c1, c2)
    register int c1, c2;
{
    if (nogaiji)
      sjiskanji(ZENPAD >> 8, ZENPAD & 0xff);
    else {
      putchar(c1 < 0x3b ? (c1 - 1 >> 1) + 0xe0 : 0xfc);
      putchar(c1 < 0x3b ? c2 +
            (c1 & 1 ? (c2 < 0x60 ? 0x1f : 0x20) : 0x7e) : 0xfc);
    }
}

/*---------------------------------------------------------------------
    NAME
      sjiskana
 ---------------------------------------------------------------------*/
void sjiskana(c)
    register int c;
{
    putchar(0x20 < c && c < 0x60 ? c | 0x80 : HANPAD | 0x80);
}

/*---------------------------------------------------------------------
    NAME
      decascii
 ---------------------------------------------------------------------*/
void decascii(c)
    register int c;
{
    if (lastkana) {
      putchar(kanatbl[lastkana] >> 8);
      putchar(kanatbl[lastkana] & 0xff);
      lastkana = 0;
    }
    putchar(c);
}

/*---------------------------------------------------------------------
    NAME
      deckanji
 ---------------------------------------------------------------------*/
void deckanji(c1, c2)
    register int c1, c2;
{
    if (lastkana) {
      putchar(kanatbl[lastkana] >> 8);
      putchar(kanatbl[lastkana] & 0xff);
      lastkana = 0;
    }
    putchar(c1 | 0x80);
    putchar(c2 | 0x80);
}

/*---------------------------------------------------------------------
    NAME
      decgaiji
 ---------------------------------------------------------------------*/
void decgaiji(c1, c2)
    register int c1, c2;
{
    if (lastkana) {
      putchar(kanatbl[lastkana] >> 8);
      putchar(kanatbl[lastkana] & 0xff);
      lastkana = 0;
    }
    if (nogaiji) {
      putchar(ZENPAD >> 8 | 0x80);
      putchar(ZENPAD & 0xff | 0x80);
    } else {
      putchar(c1 | 0x80);
      putchar(c2);
    }
}

/*---------------------------------------------------------------------
    NAME
      deckana
 ---------------------------------------------------------------------*/
void deckana(c)
    register int c;
{
    register int cc;
    int i;
    extern unsigned char dakuon[];

    if (lastkana) {
      cc = kanatbl[lastkana];
      if ((c == 0x5e || c == 0x5f) &&
            (i = dakuon[lastkana] & (c == 0x5e ? 1 : 2))) {
          cc += i;
          c = -1;
      }
      putchar(cc >> 8);
      putchar(cc & 0xff);
    }
    if (c < 0x21 || 0x5f < c) {
      if (c > 0) {
          putchar(ZENPAD >> 8);
          putchar(ZENPAD & 0xff);
      }
      lastkana = 0;
    } else
      lastkana = c - 0x20;
}

/*---------------------------------------------------------------------
    TYPE
      table
    NAME
      katakana, hiragana, dakuon - JIS X0201 kana to JIS kanji in DEC
 ---------------------------------------------------------------------*/
unsigned short katakana[] = {
    0,      0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1,
    0xa5a3, 0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3,
    0xa1bc, 0xa5a2, 0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad,
    0xa5af, 0xa5b1, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd,
    0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6, 0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc,
    0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5, 0xa5d8, 0xa5db, 0xa5de,
    0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6, 0xa5e8, 0xa5e9,
    0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab, 0xa1ac,
};

unsigned short hiragana[] = {
    0,      0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa4f2, 0xa4a1,
    0xa4a3, 0xa4a5, 0xa4a7, 0xa4a9, 0xa4e3, 0xa4e5, 0xa4e7, 0xa4c3,
    0xa1bc, 0xa4a2, 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ad,
    0xa4af, 0xa4b1, 0xa4b3, 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd,
    0xa4bf, 0xa4c1, 0xa4c4, 0xa4c6, 0xa4c8, 0xa4ca, 0xa4cb, 0xa4cc,
    0xa4cd, 0xa4ce, 0xa4cf, 0xa4d2, 0xa4d5, 0xa4d8, 0xa4db, 0xa4de,
    0xa4df, 0xa4e0, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e6, 0xa4e8, 0xa4e9,
    0xa4ea, 0xa4eb, 0xa4ec, 0xa4ed, 0xa4ef, 0xa4f3, 0xa1ab, 0xa1ac,
};

unsigned char dakuon[] = {
    0,      0,      0,      0,      0,      0,      0,      0,
    0,      0,      0,      0,      0,      0,      0,      0,
    0,      0,      0,      0,      0,      0,      1,      1,
    1,      1,      1,      1,      1,      1,      1,      1,
    1,      1,      1,      1,      1,      0,      0,      0,
    0,      0,      3,      3,      3,      3,      3,      0,
    0,      0,      0,      0,      0,      0,      0,      0,
    0,      0,      0,      0,      0,      0,      0,      0,
};

Generated by  Doxygen 1.6.0   Back to index