Logo Search packages:      
Sourcecode: ksh version File versions  Download package

magic.c

/***********************************************************************
*                                                                      *
*               This software is part of the ast package               *
*          Copyright (c) 1985-2007 AT&T Intellectual Property          *
*                      and is licensed under the                       *
*                  Common Public License, Version 1.0                  *
*                    by AT&T Intellectual Property                     *
*                                                                      *
*                A copy of the License is available at                 *
*            http://www.opensource.org/licenses/cpl1.0.txt             *
*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
*                                                                      *
*              Information and Software Systems Research               *
*                            AT&T Research                             *
*                           Florham Park NJ                            *
*                                                                      *
*                 Glenn Fowler <gsf@research.att.com>                  *
*                  David Korn <dgk@research.att.com>                   *
*                   Phong Vo <kpv@research.att.com>                    *
*                                                                      *
***********************************************************************/
#pragma prototyped
/*
 * Glenn Fowler
 * AT&T Research
 *
 * library interface to file
 *
 * the sum of the hacks {s5,v10,planix} is _____ than the parts
 */

static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2007-01-08 $\0\n";

static const char lib[] = "libast:magic";

#include <ast.h>
#include <ctype.h>
#include <ccode.h>
#include <dt.h>
#include <modex.h>
#include <error.h>
#include <regex.h>
#include <swap.h>

#define T(m)            (*m?ERROR_translate(NiL,NiL,lib,m):m)

#define match(s,p)      strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)

#define MAXNEST         10          /* { ... } nesting limit      */
#define MINITEM         4           /* magic buffer rounding      */

typedef struct                      /* identifier dictionary entry      */
{
      const char  name[16];   /* identifier name            */
      int         value;            /* identifier value           */
      Dtlink_t    link;       /* dictionary link            */
} Info_t;

typedef struct Edit                 /* edit substitution          */
{
      struct Edit*      next;       /* next in list               */
      regex_t*    from;       /* from pattern               */
} Edit_t;

struct Entry;

typedef struct                      /* loop info                  */
{
      struct Entry*     lab;        /* call this function         */
      int         start;            /* start here                 */
      int         size;       /* increment by this amount   */
      int         count;            /* dynamic loop count         */
      int         offset;           /* dynamic offset       */
} Loop_t;

typedef struct Entry                /* magic file entry           */
{
      struct Entry*     next;       /* next in list               */
      char*       expr;       /* offset expression          */
      union
      {
      unsigned long     num;
      char*       str;
      struct Entry*     lab;
      regex_t*    sub;
      Loop_t*           loop;
      }           value;            /* comparison value           */
      char*       desc;       /* file description           */
      char*       mime;       /* file mime type       */
      unsigned long     offset;           /* offset in bytes            */
      unsigned long     mask;       /* mask before compare        */
      char        cont;       /* continuation operation     */
      char        type;       /* datum type                 */
      char        op;         /* comparison operation       */
      char        nest;       /* { or } nesting operation   */
      char        swap;       /* forced swap order          */
} Entry_t;

#define CC_BIT          5

#if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
typedef unsigned short Cctype_t;
#else
typedef unsigned long Cctype_t;
#endif

#define CC_text         0x01
#define CC_control      0x02
#define CC_latin  0x04
#define CC_binary 0x08
#define CC_utf_8  0x10

#define CC_notext CC_text           /* CC_text is flipped before checking */

#define CC_MASK         (CC_binary|CC_latin|CC_control|CC_text)

#define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)

#define ID_NONE         0
#define ID_ASM          1
#define ID_C            2
#define ID_COBOL  3
#define ID_COPYBOOK     4
#define ID_CPLUSPLUS    5
#define ID_FORTRAN      6
#define ID_HTML         7
#define ID_INCL1  8
#define ID_INCL2  9
#define ID_INCL3  10
#define ID_MAM1         11
#define ID_MAM2         12
#define ID_MAM3         13
#define ID_NOTEXT 14
#define ID_PL1          15
#define ID_YACC         16

#define ID_MAX          ID_YACC

#define INFO_atime      1
#define INFO_blocks     2
#define INFO_ctime      3
#define INFO_fstype     4
#define INFO_gid  5
#define INFO_mode 6
#define INFO_mtime      7
#define INFO_name 8
#define INFO_nlink      9
#define INFO_size 10
#define INFO_uid  11

#define _MAGIC_PRIVATE_ \
      Magicdisc_t*      disc;             /* discipline           */ \
      Vmalloc_t*  vm;               /* vmalloc region */ \
      Entry_t*    magic;                  /* parsed magic table   */ \
      Entry_t*    magiclast;        /* last entry in magic  */ \
      char*       mime;             /* MIME type            */ \
      unsigned char*    x2n;              /* CC_ALIEN=>CC_NATIVE  */ \
      char        fbuf[SF_BUFSIZE + 1];   /* file data            */ \
      char        xbuf[SF_BUFSIZE + 1];   /* indirect file data   */ \
      char        nbuf[256];        /* !CC_NATIVE data      */ \
      char        mbuf[64];         /* mime string          */ \
      char        sbuf[64];         /* type suffix string   */ \
      char        tbuf[2 * PATH_MAX];     /* type string          */ \
      Cctype_t    cctype[UCHAR_MAX + 1];  /* char code types      */ \
      unsigned int      count[UCHAR_MAX + 1];   /* char frequency count */ \
      unsigned int      multi[UCHAR_MAX + 1];   /* muti char count      */ \
      int         keep[MAXNEST];          /* ckmagic nest stack   */ \
      char*       cap[MAXNEST];           /* ckmagic mime stack   */ \
      char*       msg[MAXNEST];           /* ckmagic text stack   */ \
      Entry_t*    ret[MAXNEST];           /* ckmagic return stack */ \
      int         fbsz;             /* fbuf size            */ \
      int         fbmx;             /* fbuf max size  */ \
      int         xbsz;             /* xbuf size            */ \
      int         swap;             /* swap() operation     */ \
      unsigned long     flags;                  /* disc+open flags      */ \
      long        xoff;             /* xbuf offset          */ \
      int         identifier[ID_MAX + 1]; /* Info_t identifier    */ \
      Sfio_t*           fp;               /* fbuf fp        */ \
      Sfio_t*           tmp;              /* tmp string           */ \
      regdisc_t   redisc;                 /* regex discipline     */ \
      Dtdisc_t    dtdisc;                 /* dict discipline      */ \
      Dt_t*       idtab;                  /* identifier dict      */ \
      Dt_t*       infotab;          /* info keyword dict    */

#include <magic.h>

static Info_t           dict[] =          /* keyword dictionary   */
{
      {     "COMMON",   ID_FORTRAN  },
      {     "COMPUTE",  ID_COBOL    },
      {     "COMP",           ID_COPYBOOK },
      {     "COMPUTATIONAL",ID_COPYBOOK   },
      {     "DCL",            ID_PL1            },
      {     "DEFINED",  ID_PL1            },
      {     "DIMENSION",      ID_FORTRAN  },
      {     "DIVISION", ID_COBOL    },
      {     "FILLER",   ID_COPYBOOK },
      {     "FIXED",    ID_PL1            },
      {     "FUNCTION", ID_FORTRAN  },
      {     "HTML",           ID_HTML           },
      {     "INTEGER",  ID_FORTRAN  },
      {     "MAIN",           ID_PL1            },
      {     "OPTIONS",  ID_PL1            },
      {     "PERFORM",  ID_COBOL    },
      {     "PIC",            ID_COPYBOOK },
      {     "REAL",           ID_FORTRAN  },
      {     "REDEFINES",      ID_COPYBOOK },
      {     "S9",       ID_COPYBOOK },
      {     "SECTION",  ID_COBOL    },
      {     "SELECT",   ID_COBOL    },
      {     "SUBROUTINE",     ID_FORTRAN  },
      {     "TEXT",           ID_ASM            },
      {     "VALUE",    ID_COPYBOOK },
      {     "attr",           ID_MAM3           },
      {     "binary",   ID_YACC           },
      {     "block",    ID_FORTRAN  },
      {     "bss",            ID_ASM            },
      {     "byte",           ID_ASM            },
      {     "char",           ID_C        },
      {     "class",    ID_CPLUSPLUS      },
      {     "clr",            ID_NOTEXT   },
      {     "comm",           ID_ASM            },
      {     "common",   ID_FORTRAN  },
      {     "data",           ID_ASM            },
      {     "dimension",      ID_FORTRAN  },
      {     "done",           ID_MAM2           },
      {     "double",   ID_C        },
      {     "even",           ID_ASM            },
      {     "exec",           ID_MAM3           },
      {     "extern",   ID_C        },
      {     "float",    ID_C        },
      {     "function", ID_FORTRAN  },
      {     "globl",    ID_ASM            },
      {     "h",        ID_INCL3    },
      {     "html",           ID_HTML           },
      {     "include",  ID_INCL1    },
      {     "int",            ID_C        },
      {     "integer",  ID_FORTRAN  },
      {     "jmp",            ID_NOTEXT   },
      {     "left",           ID_YACC           },
      {     "libc",           ID_INCL2    },
      {     "long",           ID_C        },
      {     "make",           ID_MAM1           },
      {     "mov",            ID_NOTEXT   },
      {     "private",  ID_CPLUSPLUS      },
      {     "public",   ID_CPLUSPLUS      },
      {     "real",           ID_FORTRAN  },
      {     "register", ID_C        },
      {     "right",    ID_YACC           },
      {     "sfio",           ID_INCL2    },
      {     "static",   ID_C        },
      {     "stdio",    ID_INCL2    },
      {     "struct",   ID_C        },
      {     "subroutine",     ID_FORTRAN  },
      {     "sys",            ID_NOTEXT   },
      {     "term",           ID_YACC           },
      {     "text",           ID_ASM            },
      {     "tst",            ID_NOTEXT   },
      {     "type",           ID_YACC           },
      {     "typedef",  ID_C        },
      {     "u",        ID_INCL2    },
      {     "union",    ID_YACC           },
      {     "void",           ID_C        },
};

static Info_t           info[] =
{
      {     "atime",    INFO_atime        },
      {     "blocks",   INFO_blocks       },
      {     "ctime",    INFO_ctime        },
      {     "fstype",   INFO_fstype       },
      {     "gid",            INFO_gid          },
      {     "mode",           INFO_mode         },
      {     "mtime",    INFO_mtime        },
      {     "name",           INFO_name         },
      {     "nlink",    INFO_nlink        },
      {     "size",           INFO_size         },
      {     "uid",            INFO_uid          },
};

/*
 * return pointer to data at offset off and size siz
 */

static char*
getdata(register Magic_t* mp, register long off, register int siz)
{
      register long     n;

      if (off < 0)
            return 0;
      if (off + siz <= mp->fbsz)
            return mp->fbuf + off;
      if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
      {
            if (off + siz > mp->fbmx)
                  return 0;
            n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
            if (sfseek(mp->fp, n, SEEK_SET) != n)
                  return 0;
            if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
            {
                  mp->xoff = 0;
                  mp->xbsz = 0;
                  return 0;
            }
            mp->xbuf[mp->xbsz] = 0;
            mp->xoff = n;
            if (off + siz > mp->xoff + mp->xbsz)
                  return 0;
      }
      return mp->xbuf + off - mp->xoff;
}

/*
 * @... evaluator for strexpr()
 */

static long
indirect(const char* cs, char** e, void* handle)
{
      register char*          s = (char*)cs;
      register Magic_t* mp = (Magic_t*)handle;
      register long           n = 0;
      register char*          p;

      if (s)
      {
            if (*s == '@')
            {
                  n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
                  switch (*(s = *e))
                  {
                  case 'b':
                  case 'B':
                        s++;
                        if (p = getdata(mp, n, 1))
                              n = *(unsigned char*)p;
                        else
                              s = (char*)cs;
                        break;
                  case 'h':
                  case 'H':
                        s++;
                        if (p = getdata(mp, n, 2))
                              n = swapget(mp->swap, p, 2);
                        else
                              s = (char*)cs;
                        break;
                  case 'q':
                  case 'Q':
                        s++;
                        if (p = getdata(mp, n, 8))
                              n = swapget(mp->swap, p, 8);
                        else
                              s = (char*)cs;
                        break;
                  default:
                        if (isalnum(*s))
                              s++;
                        if (p = getdata(mp, n, 4))
                              n = swapget(mp->swap, p, 4);
                        else
                              s = (char*)cs;
                        break;
                  }
            }
            *e = s;
      }
      else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
            (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
      return n;
}

/*
 * emit regex error message
 */

static void
regmessage(Magic_t* mp, regex_t* re, int code)
{
      char  buf[128];

      if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
      {
            regerror(code, re, buf, sizeof(buf));
            (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
      }
}

/*
 * decompose vcodex(3) method composition
 */

static char*
vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
{
      unsigned char*    map;
      int         c;
      int         n;
      int         i;

      map = CCMAP(CC_ASCII, CC_NATIVE);
      i = 1;
      for (;;)
      {
            if (i)
                  i = 0;
            else
                  *b++ = '^';
            while (b < e && m < x && (c = *m++))
            {
                  if (map)
                        c = map[c];
                  *b++ = c;
            }
            if (b >= e)
                  break;
            n = 0;
            while (m < x)
            {
                  n = (n<<7) | (*m & 0x7f);
                  if (!(*m++ & 0x80))
                        break;
            }
            if (n >= (x - m))
                  break;
            m += n;
      }
      return b;
}

/*
 * check for magic table match in buf
 */

static char*
ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off)
{
      register Entry_t* ep;
      register char*          p;
      register char*          b;
      register int            level = 0;
      int               call = -1;
      int               c;
      char*             q;
      char*             t;
      char*             base = 0;
      unsigned long           num;
      unsigned long           mask;
      regmatch_t        matches[10];

      mp->swap = 0;
      b = mp->msg[0] = buf;
      mp->mime = mp->cap[0] = 0;
      mp->keep[0] = 0;
      for (ep = mp->magic; ep; ep = ep->next)
      {
      fun:
            if (ep->nest == '{')
            {
                  if (++level >= MAXNEST)
                  {
                        call = -1;
                        level = 0;
                        mp->keep[0] = 0;
                        b = mp->msg[0];
                        mp->mime = mp->cap[0];
                        continue;
                  }
                  mp->keep[level] = mp->keep[level - 1] != 0;
                  mp->msg[level] = b;
                  mp->cap[level] = mp->mime;
            }
            switch (ep->cont)
            {
            case '#':
                  if (mp->keep[level] && b > buf)
                  {
                        *b = 0;
                        return buf;
                  }
                  mp->swap = 0;
                  b = mp->msg[0] = buf;
                  mp->mime = mp->cap[0] = 0;
                  if (ep->type == ' ')
                        continue;
                  break;
            case '$':
                  if (mp->keep[level] && call < (MAXNEST - 1))
                  {
                        mp->ret[++call] = ep;
                        ep = ep->value.lab;
                        goto fun;
                  }
                  continue;
            case ':':
                  ep = mp->ret[call--];
                  if (ep->op == 'l')
                        goto fun;
                  continue;
            case '|':
                  if (mp->keep[level] > 1)
                        goto checknest;
                  /*FALLTHROUGH*/
            default:
                  if (!mp->keep[level])
                  {
                        b = mp->msg[level];
                        mp->mime = mp->cap[level];
                        goto checknest;
                  }
                  break;
            }
            if (!ep->expr)
                  num = ep->offset + off;
            else
                  switch (ep->offset)
                  {
                  case 0:
                        num = strexpr(ep->expr, NiL, indirect, mp) + off;
                        break;
                  case INFO_atime:
                        num = st->st_atime;
                        ep->type = 'D';
                        break;
                  case INFO_blocks:
                        num = iblocks(st);
                        ep->type = 'N';
                        break;
                  case INFO_ctime:
                        num = st->st_ctime;
                        ep->type = 'D';
                        break;
                  case INFO_fstype:
                        p = fmtfs(st);
                        ep->type = toupper(ep->type);
                        break;
                  case INFO_gid:
                        if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
                        {
                              p = fmtgid(st->st_gid);
                              ep->type = toupper(ep->type);
                        }
                        else
                        {
                              num = st->st_gid;
                              ep->type = 'N';
                        }
                        break;
                  case INFO_mode:
                        if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
                        {
                              p = fmtmode(st->st_mode, 0);
                              ep->type = toupper(ep->type);
                        }
                        else
                        {
                              num = modex(st->st_mode);
                              ep->type = 'N';
                        }
                        break;
                  case INFO_mtime:
                        num = st->st_ctime;
                        ep->type = 'D';
                        break;
                  case INFO_name:
                        if (!base)
                        {
                              if (base = strrchr(file, '/'))
                                    base++;
                              else
                                    base = (char*)file;
                        }
                        p = base;
                        ep->type = toupper(ep->type);
                        break;
                  case INFO_nlink:
                        num = st->st_nlink;
                        ep->type = 'N';
                        break;
                  case INFO_size:
                        num = st->st_size;
                        ep->type = 'N';
                        break;
                  case INFO_uid:
                        if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
                        {
                              p = fmtuid(st->st_uid);
                              ep->type = toupper(ep->type);
                        }
                        else
                        {
                              num = st->st_uid;
                              ep->type = 'N';
                        }
                        break;
                  }
            switch (ep->type)
            {

            case 'b':
                  if (!(p = getdata(mp, num, 1)))
                        goto next;
                  num = *(unsigned char*)p;
                  break;

            case 'h':
                  if (!(p = getdata(mp, num, 2)))
                        goto next;
                  num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
                  break;

            case 'd':
            case 'l':
            case 'v':
                  if (!(p = getdata(mp, num, 4)))
                        goto next;
                  num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
                  break;

            case 'q':
                  if (!(p = getdata(mp, num, 8)))
                        goto next;
                  num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
                  break;

            case 'e':
                  if (!(p = getdata(mp, num, 0)))
                        goto next;
                  /*FALLTHROUGH*/
            case 'E':
                  if (!ep->value.sub)
                        goto next;
                  if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
                  {
                        c = mp->fbsz;
                        if (c >= sizeof(mp->nbuf))
                              c = sizeof(mp->nbuf) - 1;
                        p = (char*)memcpy(mp->nbuf, p, c);
                        p[c] = 0;
                        ccmapstr(mp->x2n, p, c);
                        if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
                        {
                              if (c != REG_NOMATCH)
                                    regmessage(mp, ep->value.sub, c);
                              goto next;
                        }
                  }
                  p = ep->value.sub->re_sub->re_buf;
                  q = T(ep->desc);
                  t = *q ? q : p;
                  if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
                        *b++ = ' ';
                  b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b'));
                  if (ep->mime)
                        mp->mime = ep->mime;
                  goto checknest;

            case 's':
                  if (!(p = getdata(mp, num, ep->mask)))
                        goto next;
                  goto checkstr;
            case 'm':
                  if (!(p = getdata(mp, num, 0)))
                        goto next;
                  /*FALLTHROUGH*/
            case 'M':
            case 'S':
            checkstr:
                  for (;;)
                  {
                        if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
                              break;
                        if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
                              break;
                        if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
                              goto next;
                        p = (char*)memcpy(mp->nbuf, p, ep->mask);
                        p[ep->mask] = 0;
                        ccmapstr(mp->x2n, p, ep->mask);
                  }
                  q = T(ep->desc);
                  if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
                        *b++ = ' ';
                  for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
                  *t = 0;
                  b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p);
                  *t = c;
                  if (ep->mime)
                        mp->mime = ep->mime;
                  goto checknest;

            }
            if (mask = ep->mask)
                  num &= mask;
            switch (ep->op)
            {

            case '=':
            case '@':
                  if (num == ep->value.num)
                        break;
                  if (ep->cont != '#')
                        goto next;
                  if (!mask)
                        mask = ~mask;
                  if (ep->type == 'h')
                  {
                        if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
                        {
                              if (!(mp->swap & (mp->swap + 1)))
                                    mp->swap = 7;
                              goto swapped;
                        }
                  }
                  else if (ep->type == 'l')
                  {
                        for (c = 1; c < 4; c++)
                              if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
                              {
                                    if (!(mp->swap & (mp->swap + 1)))
                                          mp->swap = 7;
                                    goto swapped;
                              }
                  }
                  else if (ep->type == 'q')
                  {
                        for (c = 1; c < 8; c++)
                              if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
                                    goto swapped;
                  }
                  goto next;

            case '!':
                  if (num != ep->value.num)
                        break;
                  goto next;

            case '^':
                  if (num ^ ep->value.num)
                        break;
                  goto next;

            case '>':
                  if (num > ep->value.num)
                        break;
                  goto next;

            case '<':
                  if (num < ep->value.num)
                        break;
                  goto next;

            case 'l':
                  if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
                  {
                        if (!ep->value.loop->count)
                        {
                              ep->value.loop->count = num;
                              ep->value.loop->offset = off;
                              off = ep->value.loop->start;
                        }
                        else if (!--ep->value.loop->count)
                        {
                              off = ep->value.loop->offset;
                              goto next;
                        }
                        else
                              off += ep->value.loop->size;
                        mp->ret[++call] = ep;
                        ep = ep->value.loop->lab;
                        goto fun;
                  }
                  goto next;

            case 'm':
                  c = mp->swap;
                  t = ckmagic(mp, file, b + (b > buf), st, num);
                  mp->swap = c;
                  if (!t)
                        goto next;
                  if (b > buf)
                        *b = ' ';
                  b += strlen(b);
                  break;

            case 'r':
#if _UWIN
            {
                  char*             e;
                  Sfio_t*                 rp;
                  Sfio_t*                 gp;

                  if (!(t = strrchr(file, '.')))
                        goto next;
                  sfprintf(mp->tmp, "/reg/classes_root/%s", t);
                  if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
                        goto next;
                  *ep->desc = 0;
                  *ep->mime = 0;
                  gp = 0;
                  while (t = sfgetr(rp, '\n', 1))
                  {
                        if (strneq(t, "Content Type=", 13))
                        {
                              ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
                              strcpy(ep->mime, t + 13);
                              if (gp)
                                    break;
                        }
                        else
                        {
                              sfprintf(mp->tmp, "/reg/classes_root/%s", t);
                              if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
                              {
                                    ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
                                    strcpy(ep->desc, t);
                                    if (*ep->mime)
                                          break;
                              }
                        }
                  }
                  sfclose(rp);
                  if (!gp)
                        goto next;
                  if (!*ep->mime)
                  {
                        t = T(ep->desc);
                        if (!strncasecmp(t, "microsoft", 9))
                              t += 9;
                        while (isspace(*t))
                              t++;
                        e = "application/x-ms-";
                        ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
                        e = strcopy(ep->mime, e);
                        while ((c = *t++) && c != '.' && c != ' ')
                              *e++ = isupper(c) ? tolower(c) : c;
                        *e = 0;
                  }
                  while (t = sfgetr(gp, '\n', 1))
                        if (*t && !streq(t, "\"\""))
                        {
                              ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
                              strcpy(ep->desc, t);
                              break;
                        }
                  sfclose(gp);
                  if (!*ep->desc)
                        goto next;
                  if (!t)
                        for (t = T(ep->desc); *t; t++)
                              if (*t == '.')
                                    *t = ' ';
                  if (!mp->keep[level])
                        mp->keep[level] = 2;
                  mp->mime = ep->mime;
                  break;
            }
#else
                  if (ep->cont == '#' && !mp->keep[level])
                        mp->keep[level] = 1;
                  goto next;
#endif

            case 'v':
                  if (!(p = getdata(mp, num, 4)))
                        goto next;
                  c = 0;
                  do
                  {
                        num++;
                        c = (c<<7) | (*p & 0x7f);
                  } while (*p++ & 0x80);
                  if (!(p = getdata(mp, num, c)))
                        goto next;
                  if (mp->keep[level]++ && b > buf && *(b - 1) != ' ')
                  {
                        *b++ = ',';
                        *b++ = ' ';
                  }
                  b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
                  goto checknest;

            }
      swapped:
            q = T(ep->desc);
            if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
                  *b++ = ' ';
            if (ep->type == 'd' || ep->type == 'D')
                  b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num));
            else if (ep->type == 'v')
                  b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num));
            else
                  b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num);
            if (ep->mime && *ep->mime)
                  mp->mime = ep->mime;
      checknest:
            if (ep->nest == '}')
            {
                  if (!mp->keep[level])
                  {
                        b = mp->msg[level];
                        mp->mime = mp->cap[level];
                  }
                  else if (level > 0)
                        mp->keep[level - 1] = mp->keep[level];
                  if (--level < 0)
                  {
                        level = 0;
                        mp->keep[0] = 0;
                  }
            }
            continue;
      next:
            if (ep->cont == '&')
                  mp->keep[level] = 0;
            goto checknest;
      }
      if (mp->keep[level] && b > buf)
      {
            *b = 0;
            return buf;
      }
      return 0;
}

/*
 * check english language stats
 */

static int
ckenglish(register Magic_t* mp, int pun, int badpun)
{
      register char*    s;
      register int      vowl = 0;
      register int      freq = 0;
      register int      rare = 0;

      if (5 * badpun > pun)
            return 0;
      if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
            return 0;
      if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
            return 0;
      for (s = "aeiou"; *s; s++)
            vowl += mp->count[toupper(*s)] + mp->count[*s];
      for (s = "etaion"; *s; s++)
            freq += mp->count[toupper(*s)] + mp->count[*s];
      for (s = "vjkqxz"; *s; s++)
            rare += mp->count[toupper(*s)] + mp->count[*s];
      return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
}

/*
 * check programming language stats
 */

static char*
cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st)
{
      register int            c;
      register unsigned char* b;
      register unsigned char* e;
      register int            q;
      register char*          s;
      char*             t;
      char*             base;
      char*             suff;
      char*             t1;
      char*             t2;
      char*             t3;
      int               n;
      int               badpun;
      int               code;
      int               pun;
      Cctype_t          flags;
      Info_t*                 ip;

      b = (unsigned char*)mp->fbuf;
      e = b + mp->fbsz;
      memzero(mp->count, sizeof(mp->count));
      memzero(mp->multi, sizeof(mp->multi));
      memzero(mp->identifier, sizeof(mp->identifier));

      /*
       * check character coding
       */

      flags = 0;
      while (b < e)
            flags |= mp->cctype[*b++];
      b = (unsigned char*)mp->fbuf;
      code = 0;
      q = CC_ASCII;
      n = CC_MASK;
      for (c = 0; c < CC_MAPS; c++)
      {
            flags ^= CC_text;
            if ((flags & CC_MASK) < n)
            {
                  n = flags & CC_MASK;
                  q = c;
            }
            flags >>= CC_BIT;
      }
      flags = n;
      if (!(flags & (CC_binary|CC_notext)))
      {
            if (q != CC_NATIVE)
            {
                  code = q;
                  ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
            }
            if (b[0] == '#' && b[1] == '!')
            {
                  for (b += 2; b < e && isspace(*b); b++);
                  for (s = (char*)b; b < e && isprint(*b); b++);
                  c = *b;
                  *b = 0;
                  if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
                  {
                        if (t = strrchr(s, '/'))
                              s = t + 1;
                        for (t = s; *t; t++)
                              if (isspace(*t))
                              {
                                    *t = 0;
                                    break;
                              }
                        sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
                        mp->mime = mp->mbuf;
                        if (match(s, "*sh"))
                        {
                              t1 = T("command");
                              if (streq(s, "sh"))
                                    *s = 0;
                              else
                              {
                                    *b++ = ' ';
                                    *b = 0;
                              }
                        }
                        else
                        {
                              t1 = T("interpreter");
                              *b++ = ' ';
                              *b = 0;
                        }
                        sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
                        s = mp->sbuf;
                        goto qualify;
                  }
                  *b = c;
                  b = (unsigned char*)mp->fbuf;
            }
            badpun = 0;
            pun = 0;
            q = 0;
            s = 0;
            t = 0;
            while (b < e)
            {
                  c = *b++;
                  mp->count[c]++;
                  if (c == q && (q != '*' || *b == '/' && b++))
                  {
                        mp->multi[q]++;
                        q = 0;
                  }
                  else if (c == '\\')
                  {
                        s = 0;
                        b++;
                  }
                  else if (!q)
                  {
                        if (isalpha(c) || c == '_')
                        {
                              if (!s)
                                    s = (char*)b - 1;
                        }
                        else if (!isdigit(c))
                        {
                              if (s)
                              {
                                    if (s > mp->fbuf)
                                          switch (*(s - 1))
                                          {
                                          case ':':
                                                if (*b == ':')
                                                      mp->multi[':']++;
                                                break;
                                          case '.':
                                                if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
                                                      mp->multi['.']++;
                                                break;
                                          case '\n':
                                          case '\\':
                                                if (*b == '{')
                                                      t = (char*)b + 1;
                                                break;
                                          case '{':
                                                if (s == t && *b == '}')
                                                      mp->multi['X']++;
                                                break;
                                          }
                                          if (!mp->idtab)
                                          {
                                                if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash))
                                                      for (q = 0; q < elementsof(dict); q++)
                                                            dtinsert(mp->idtab, &dict[q]);
                                                else if (mp->disc->errorf)
                                                      (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
                                                q = 0;
                                          }
                                          if (mp->idtab)
                                          {
                                                *(b - 1) = 0;
                                                if (ip = (Info_t*)dtmatch(mp->idtab, s))
                                                      mp->identifier[ip->value]++;
                                                *(b - 1) = c;
                                          }
                                          s = 0;
                                    }
                              switch (c)
                              {
                              case '\t':
                                    if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
                                          mp->multi['\t']++;
                                    break;
                              case '"':
                              case '\'':
                                    q = c;
                                    break;
                              case '/':
                                    if (*b == '*')
                                          q = *b++;
                                    else if (*b == '/')
                                          q = '\n';
                                    break;
                              case '$':
                                    if (*b == '(' && *(b + 1) != ' ')
                                          mp->multi['$']++;
                                    break;
                              case '{':
                              case '}':
                              case '[':
                              case ']':
                              case '(':
                                    mp->multi[c]++;
                                    break;
                              case ')':
                                    mp->multi[c]++;
                                    goto punctuation;
                              case ':':
                                    if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
                                          mp->multi[':']++;
                                    goto punctuation;
                              case '.':
                              case ',':
                              case '%':
                              case ';':
                              case '?':
                              punctuation:
                                    pun++;
                                    if (*b != ' ' && *b != '\n')
                                          badpun++;
                                    break;
                              }
                        }
                  }
            }
      }
      else
            while (b < e)
                  mp->count[*b++]++;
      base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
      suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
      if (!flags)
      {
            if (match(suff, "*sh|bat|cmd"))
                  goto id_sh;
            if (match(base, "*@(mkfile)"))
                  goto id_mk;
            if (match(base, "*@(makefile|.mk)"))
                  goto id_make;
            if (match(base, "*@(mamfile|.mam)"))
                  goto id_mam;
            if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
                  goto id_c;
            if (match(suff, "f"))
                  goto id_fortran;
            if (match(suff, "htm+(l)"))
                  goto id_html;
            if (match(suff, "cpy"))
                  goto id_copybook;
            if (match(suff, "cob|cbl|cb2"))
                  goto id_cobol;
            if (match(suff, "pl[1i]"))
                  goto id_pl1;
            if (match(suff, "tex"))
                  goto id_tex;
            if (match(suff, "asm|s"))
                  goto id_asm;
            if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
            {
            id_sh:
                  s = T("command script");
                  mp->mime = "application/sh";
                  goto qualify;
            }
            if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
            {
                  s = T("mail message");
                  mp->mime = "message/rfc822";
                  goto qualify;
            }
            if (match(base, "*@(mkfile)"))
            {
            id_mk:
                  s = "mkfile";
                  mp->mime = "application/mk";
                  goto qualify;
            }
            if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
            {
            id_make:
                  s = "makefile";
                  mp->mime = "application/make";
                  goto qualify;
            }
            if (mp->multi['.'] >= 3)
            {
                  s = T("nroff input");
                  mp->mime = "application/x-troff";
                  goto qualify;
            }
            if (mp->multi['X'] >= 3)
            {
                  s = T("TeX input");
                  mp->mime = "application/x-tex";
                  goto qualify;
            }
            if (mp->fbsz < SF_BUFSIZE &&
                (mp->multi['('] == mp->multi[')'] &&
                 mp->multi['{'] == mp->multi['}'] &&
                 mp->multi['['] == mp->multi[']']) ||
                mp->fbsz >= SF_BUFSIZE &&
                (mp->multi['('] >= mp->multi[')'] &&
                 mp->multi['{'] >= mp->multi['}'] &&
                 mp->multi['['] >= mp->multi[']']))
            {
                  c = mp->identifier[ID_INCL1];
                  if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
                      mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
                      mp->count['='] >= 20 && mp->count[';'] >= 20)
                  {
                  id_c:
                        t1 = "";
                        t2 = "c ";
                        t3 = T("program");
                        switch (*suff)
                        {
                        case 'c':
                        case 'C':
                              mp->mime = "application/x-cc";
                              break;
                        case 'l':
                        case 'L':
                              t1 = "lex ";
                              mp->mime = "application/x-lex";
                              break;
                        default:
                              t3 = T("header");
                              if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
                              {
                                    mp->mime = "application/x-cc";
                                    break;
                              }
                              /*FALLTHROUGH*/
                        case 'y':
                        case 'Y':
                              t1 = "yacc ";
                              mp->mime = "application/x-yacc";
                              break;
                        }
                        if (mp->identifier[ID_CPLUSPLUS] >= 3)
                        {
                              t2 = "c++ ";
                              mp->mime = "application/x-c++";
                        }
                        sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
                        s = mp->sbuf;
                        goto qualify;
                  }
            }
            if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
                (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
                 mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
            {
            id_mam:
                  s = T("mam program");
                  mp->mime = "application/x-mam";
                  goto qualify;
            }
            if (mp->identifier[ID_FORTRAN] >= 8)
            {
            id_fortran:
                  s = T("fortran program");
                  mp->mime = "application/x-fortran";
                  goto qualify;
            }
            if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
            {
            id_html:
                  s = T("html input");
                  mp->mime = "text/html";
                  goto qualify;
            }
            if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
            {
            id_copybook:
                  s = T("cobol copybook");
                  mp->mime = "application/x-cobol";
                  goto qualify;
            }
            if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
            {
            id_cobol:
                  s = T("cobol program");
                  mp->mime = "application/x-cobol";
                  goto qualify;
            }
            if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
            {
            id_pl1:
                  s = T("pl1 program");
                  mp->mime = "application/x-pl1";
                  goto qualify;
            }
            if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
            {
            id_tex:
                  s = T("TeX input");
                  mp->mime = "text/tex";
                  goto qualify;
            }
            if (mp->identifier[ID_ASM] >= 4)
            {
            id_asm:
                  s = T("as program");
                  mp->mime = "application/x-as";
                  goto qualify;
            }
            if (ckenglish(mp, pun, badpun))
            {
                  s = T("english text");
                  mp->mime = "text/plain";
                  goto qualify;
            }
      }
      else if (streq(base, "core"))
      {
            mp->mime = "x-system/core";
            return T("core dump");
      }
      if (flags & (CC_binary|CC_notext))
      {
            b = (unsigned char*)mp->fbuf;
            e = b + mp->fbsz;
            n = 0;
            for (;;)
            {
                  c = *b++;
                  q = 0;
                  while (c & 0x80)
                  {
                        c <<= 1;
                        q++;
                  }
                  switch (q)
                  {
                  case 4:
                        if (b < e && (*b++ & 0xc0) != 0x80)
                              break;
                  case 3:
                        if (b < e && (*b++ & 0xc0) != 0x80)
                              break;
                  case 2:
                        if (b < e && (*b++ & 0xc0) != 0x80)
                              break;
                        n = 1;
                  case 0:
                        if (b >= e)
                        {
                              if (n)
                              {
                                    flags &= ~(CC_binary|CC_notext);
                                    flags |= CC_utf_8;
                              }
                              break;
                        }
                        continue;
                  }
                  break;
            }
      }
      if (flags & (CC_binary|CC_notext))
      {
            unsigned long     d = 0;

            if ((q = mp->fbsz / UCHAR_MAX) >= 2)
            {
                  /*
                   * compression/encryption via standard deviation
                   */


                  for (c = 0; c < UCHAR_MAX; c++)
                  {
                        pun = mp->count[c] - q;
                        d += pun * pun;
                  }
                  d /= mp->fbsz;
            }
            if (d <= 0)
                  s = T("binary");
            else if (d < 4)
                  s = T("encrypted");
            else if (d < 16)
                  s = T("packed");
            else if (d < 64)
                  s = T("compressed");
            else if (d < 256)
                  s = T("delta");
            else
                  s = T("data");
            mp->mime = "application/octet-stream";
            return s;
      }
      mp->mime = "text/plain";
      if (flags & CC_utf_8)
            s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
      else if (flags & CC_latin)
            s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
      else
            s = (flags & CC_control) ? T("text with control characters") : T("text");
 qualify:
      if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
      {
            t = "dos ";
            mp->mime = "text/dos";
      }
      else
            t = "";
      if (code)
      {
            if (code == CC_ASCII)
                  sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s);
            else
            {
                  sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s);
                  mp->mime = "text/ebcdic";
            }
            s = buf;
      }
      else if (*t)
      {
            sfsprintf(buf, PATH_MAX, "%s%s", t, s);
            s = buf;
      }
      return s;
}

/*
 * return the basic magic string for file,st in buf,size
 */

static char*
type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size)
{
      register char*    s;
      register char*    t;

      mp->mime = 0;
      if (!S_ISREG(st->st_mode))
      {
            if (S_ISDIR(st->st_mode))
            {
                  mp->mime = "x-system/dir";
                  return T("directory");
            }
            if (S_ISLNK(st->st_mode))
            {
                  mp->mime = "x-system/lnk";
                  s = buf;
                  s += sfsprintf(s, PATH_MAX, T("symbolic link to "));
                  if (pathgetlink(file, s, size - (s - buf)) < 0)
                        return T("cannot read symbolic link text");
                  return buf;
            }
            if (S_ISBLK(st->st_mode))
            {
                  mp->mime = "x-system/blk";
                  sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
                  return buf;
            }
            if (S_ISCHR(st->st_mode))
            {
                  mp->mime = "x-system/chr";
                  sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st));
                  return buf;
            }
            if (S_ISFIFO(st->st_mode))
            {
                  mp->mime = "x-system/fifo";
                  return "fifo";
            }
#ifdef S_ISSOCK
            if (S_ISSOCK(st->st_mode))
            {
                  mp->mime = "x-system/sock";
                  return "socket";
            }
#endif
      }
      if (!(mp->fbmx = st->st_size))
            s = T("empty");
      else if (!mp->fp)
            s = T("cannot read");
      else
      {
            mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
            if (mp->fbsz < 0)
                  s = fmterror(errno);
            else if (mp->fbsz == 0)
                  s = T("empty");
            else
            {
                  mp->fbuf[mp->fbsz] = 0;
                  mp->xoff = 0;
                  mp->xbsz = 0;
                  if (!(s = ckmagic(mp, file, buf, st, 0)))
                        s = cklang(mp, file, buf, st);
            }
      }
      if (!mp->mime)
            mp->mime = "application/unknown";
      else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
      {
            register char*    b;
            register char*    be;
            register char*    m;
            register char*    me;

            b = mp->mime;
            me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
            while (m < me && b < t)
                  *m++ = *b++;
            b = t = s;
            for (;;)
            {
                  if (!(be = strchr(t, ' ')))
                  {
                        be = b + strlen(b);
                        break;
                  }
                  if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
                        break;
                  b = t;
                  t = be + 1;
            }
            while (m < me && b < be)
                  if ((*m++ = *b++) == ' ')
                        *(m - 1) = '-';
            *m = 0;
      }
      return s;
}

/*
 * low level for magicload()
 */

static int
load(register Magic_t* mp, char* file, register Sfio_t* fp)
{
      register Entry_t* ep;
      register char*          p;
      register char*          p2;
      char*             p3;
      char*             next;
      int               n;
      int               lge;
      int               lev;
      int               ent;
      int               old;
      int               cont;
      Info_t*                 ip;
      Entry_t*          ret;
      Entry_t*          first;
      Entry_t*          last = 0;
      Entry_t*          fun['z' - 'a' + 1];

      memzero(fun, sizeof(fun));
      cont = '$';
      ent = 0;
      lev = 0;
      old = 0;
      ret = 0;
      error_info.file = file;
      error_info.line = 0;
      first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
      while (p = sfgetr(fp, '\n', 1))
      {
            error_info.line++;
            for (; isspace(*p); p++);

            /*
             * nesting
             */

            switch (*p)
            {
            case 0:
            case '#':
                  cont = '#';
                  continue;
            case '{':
                  if (++lev < MAXNEST)
                        ep->nest = *p;
                  else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
                        (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
                  continue;
            case '}':
                  if (!last || lev <= 0)
                  {
                        if (mp->disc->errorf)
                              (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
                  }
                  else if (lev-- == ent)
                  {
                        ent = 0;
                        ep->cont = ':';
                        ep->offset = ret->offset;
                        ep->nest = ' ';
                        ep->type = ' ';
                        ep->op = ' ';
                        ep->desc = "[RETURN]";
                        last = ep;
                        ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
                        ret = 0;
                  }
                  else
                        last->nest = *p;
                  continue;
            default:
                  if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
                  {
                        n = *p++;
                        if (n >= 'a' && n <= 'z')
                              n -= 'a';
                        else
                        {
                              if (mp->disc->errorf)
                                    (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
                              n = 0;
                        }
                        if (ret && mp->disc->errorf)
                              (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
                        if (*p == '{')
                        {
                              ent = ++lev;
                              ret = ep;
                              ep->desc = "[FUNCTION]";
                        }
                        else
                        {
                              if (*(p + 1) != ')' && mp->disc->errorf)
                                    (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
                              ep->desc = "[CALL]";
                        }
                        ep->cont = cont;
                        ep->offset = n;
                        ep->nest = ' ';
                        ep->type = ' ';
                        ep->op = ' ';
                        last = ep;
                        ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
                        if (ret)
                              fun[n] = last->value.lab = ep;
                        else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
                              (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
                        continue;
                  }
                  if (!ep->nest)
                        ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
                  break;
            }

            /*
             * continuation
             */

            cont = '$';
            switch (*p)
            {
            case '>':
                  old = 1;
                  if (*(p + 1) == *p)
                  {
                        /*
                         * old style nesting push
                         */

                        p++;
                        old = 2;
                        if (!lev && last)
                        {
                              lev = 1;
                              last->nest = '{';
                              if (last->cont == '>')
                                    last->cont = '&';
                              ep->nest = '1';
                        }
                  }
                  /*FALLTHROUGH*/
            case '+':
            case '&':
            case '|':
                  ep->cont = *p++;
                  break;
            default:
                  if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
                        (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
                  /*FALLTHROUGH*/
            case '*':
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
                  ep->cont = (lev > 0) ? '&' : '#';
                  break;
            }
            switch (old)
            {
            case 1:
                  old = 0;
                  if (lev)
                  {
                        /*
                         * old style nesting pop
                         */

                        lev = 0;
                        if (last)
                              last->nest = '}';
                        ep->nest = ' ';
                        if (ep->cont == '&')
                              ep->cont = '#';
                  }
                  break;
            case 2:
                  old = 1;
                  break;
            }
            if (isdigit(*p))
            {
                  /*
                   * absolute offset
                   */

                  ep->offset = strton(p, &next, NiL, 0);
                  p2 = next;
            }
            else
            {
                  for (p2 = p; *p2 && !isspace(*p2); p2++);
                  if (!*p2)
                  {
                        if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
                              (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
                        continue;
                  }

                  /*
                   * offset expression
                   */

                  *p2++ = 0;
                  ep->expr = vmstrdup(mp->vm, p);
                  if (isalpha(*p))
                        ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
                  else if (*p == '(' && ep->cont == '>')
                  {
                        /*
                         * convert old style indirection to @
                         */

                        p = ep->expr + 1;
                        for (;;)
                        {
                              switch (*p++)
                              {
                              case 0:
                              case '@':
                              case '(':
                                    break;
                              case ')':
                                    break;
                              default:
                                    continue;
                              }
                              break;
                        }
                        if (*--p == ')')
                        {
                              *p = 0;
                              *ep->expr = '@';
                        }
                  }
            }
            for (; isspace(*p2); p2++);
            for (p = p2; *p2 && !isspace(*p2); p2++);
            if (!*p2)
            {
                  if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
                        (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
                  continue;
            }
            *p2++ = 0;

            /*
             * type
             */

            if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
            {
                  ep->swap = ~(*p == 'l' ? 7 : 0);
                  p += 2;
            }
            if (*p == 's')
            {
                  if (*(p + 1) == 'h')
                        ep->type = 'h';
                  else
                        ep->type = 's';
            }
            else if (*p == 'a')
                  ep->type = 's';
            else
                  ep->type = *p;
            if (p = strchr(p, '&'))
            {
                  /*
                   * old style mask
                   */

                  ep->mask = strton(++p, NiL, NiL, 0);
            }
            for (; isspace(*p2); p2++);
            if (ep->mask)
                  *--p2 = '=';

            /*
             * comparison operation
             */

            p = p2;
            if (p2 = strchr(p, '\t'))
                  *p2++ = 0;
            else
            {
                  int   qe = 0;
                  int   qn = 0;

                  /*
                   * assume balanced {}[]()\\""'' field
                   */

                  for (p2 = p;;)
                  {
                        switch (n = *p2++)
                        {
                        case 0:
                              break;
                        case '{':
                              if (!qe)
                                    qe = '}';
                              if (qe == '}')
                                    qn++;
                              continue;
                        case '(':
                              if (!qe)
                                    qe = ')';
                              if (qe == ')')
                                    qn++;
                              continue;
                        case '[':
                              if (!qe)
                                    qe = ']';
                              if (qe == ']')
                                    qn++;
                              continue;
                        case '}':
                        case ')':
                        case ']':
                              if (qe == n && qn > 0)
                                    qn--;
                              continue;
                        case '"':
                        case '\'':
                              if (!qe)
                                    qe = n;
                              else if (qe == n)
                                    qe = 0;
                              continue;
                        case '\\':
                              if (*p2)
                                    p2++;
                              continue;
                        default:
                              if (!qe && isspace(n))
                                    break;
                              continue;
                        }
                        if (n)
                              *(p2 - 1) = 0;
                        else
                              p2--;
                        break;
                  }
            }
            lge = 0;
            if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
                  ep->op = '=';
            else
            {
                  if (*p == '&')
                  {
                        ep->mask = strton(++p, &next, NiL, 0);
                        p = next;
                  }
                  switch (*p)
                  {
                  case '=':
                  case '>':
                  case '<':
                  case '*':
                        ep->op = *p++;
                        if (*p == '=')
                        {
                              p++;
                              switch (ep->op)
                              {
                              case '>':
                                    lge = -1;
                                    break;
                              case '<':
                                    lge = 1;
                                    break;
                              }
                        }
                        break;
                  case '!':
                  case '@':
                        ep->op = *p++;
                        if (*p == '=')
                              p++;
                        break;
                  case 'x':
                        p++;
                        ep->op = '*';
                        break;
                  default:
                        ep->op = '=';
                        if (ep->mask)
                              ep->value.num = ep->mask;
                        break;
                  }
            }
            if (ep->op != '*' && !ep->value.num)
            {
                  if (ep->type == 'e')
                  {
                        if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
                        {
                              ep->value.sub->re_disc = &mp->redisc;
                              if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
                              {
                                    p += ep->value.sub->re_npat;
                                    if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
                                          p += ep->value.sub->re_npat;
                              }
                              if (n)
                              {
                                    regmessage(mp, ep->value.sub, n);
                                    ep->value.sub = 0;
                              }
                              else if (*p && mp->disc->errorf)
                                    (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
                        }
                  }
                  else if (ep->type == 'm')
                  {
                        ep->mask = stresc(p) + 1;
                        ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
                        memcpy(ep->value.str, p, ep->mask);
                        if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
                              ep->value.str[ep->mask - 1] = '*';
                  }
                  else if (ep->type == 's')
                  {
                        ep->mask = stresc(p);
                        ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
                        memcpy(ep->value.str, p, ep->mask);
                  }
                  else if (*p == '\'')
                  {
                        stresc(p);
                        ep->value.num = *(unsigned char*)(p + 1) + lge;
                  }
                  else if (strmatch(p, "+([a-z])\\(*\\)"))
                  {
                        char* t;

                        t = p;
                        ep->type = 'V';
                        ep->op = *p;
                        while (*p && *p++ != '(');
                        switch (ep->op)
                        {
                        case 'l':
                              n = *p++;
                              if (n < 'a' || n > 'z')
                              {
                                    if (mp->disc->errorf)
                                          (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
                              }
                              else if (!fun[n -= 'a'])
                              {
                                    if (mp->disc->errorf)
                                          (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
                              }
                              else
                              {
                                    ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
                                    ep->value.loop->lab = fun[n];
                                    while (*p && *p++ != ',');
                                    ep->value.loop->start = strton(p, &t, NiL, 0);
                                    while (*t && *t++ != ',');
                                    ep->value.loop->size = strton(t, &t, NiL, 0);
                              }
                              break;
                        case 'm':
                        case 'r':
                              ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
                              ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
                              break;
                        case 'v':
                              break;
                        default:
                              if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
                                    (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
                              break;
                        }
                  }
                  else
                  {
                        ep->value.num = strton(p, NiL, NiL, 0) + lge;
                        if (ep->op == '@')
                              ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
                  }
            }

            /*
             * file description
             */

            if (p2)
            {
                  for (; isspace(*p2); p2++);
                  if (p = strchr(p2, '\t'))
                  {
                        /*
                         * check for message catalog index
                         */

                        *p++ = 0;
                        if (isalpha(*p2))
                        {
                              for (p3 = p2; isalnum(*p3); p3++);
                              if (*p3++ == ':')
                              {
                                    for (; isdigit(*p3); p3++);
                                    if (!*p3)
                                    {
                                          for (p2 = p; isspace(*p2); p2++);
                                          if (p = strchr(p2, '\t'))
                                                *p++ = 0;
                                    }
                              }
                        }
                  }
                  stresc(p2);
                  ep->desc = vmstrdup(mp->vm, p2);
                  if (p)
                  {
                        for (; isspace(*p); p++);
                        if (*p)
                              ep->mime = vmstrdup(mp->vm, p);
                  }
            }
            else
                  ep->desc = "";

            /*
             * get next entry
             */

            last = ep;
            ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
      }
      if (last)
      {
            last->next = 0;
            if (mp->magiclast)
                  mp->magiclast->next = first;
            else
                  mp->magic = first;
            mp->magiclast = last;
      }
      vmfree(mp->vm, ep);
      if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
      {
            if (lev < 0)
                  (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
            else if (lev > 0)
                  (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
            if (ret)
                  (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
      }
      error_info.file = 0;
      error_info.line = 0;
      return 0;
}

/*
 * load a magic file into mp
 */

int
magicload(register Magic_t* mp, const char* file, unsigned long flags)
{
      register char*          s;
      register char*          e;
      register char*          t;
      int               n;
      int               found;
      int               list;
      Sfio_t*                 fp;

      mp->flags = mp->disc->flags | flags;
      found = 0;
      if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
      {
            if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
                  s = MAGIC_FILE;
      }
      for (;;)
      {
            if (!list)
                  e = 0;
            else if (e = strchr(s, ':'))
            {
                  /*
                   * ok, so ~ won't work for the last list element
                   * we do it for MAGIC_FILES_ENV anyway
                   */

                  if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
                  {
                        sfputr(mp->tmp, t, -1);
                        s += n - 1;
                  }
                  sfwrite(mp->tmp, s, e - s);
                  if (!(s = sfstruse(mp->tmp)))
                        goto nospace;
            }
            if (!*s || streq(s, "-"))
                  s = MAGIC_FILE;
            if (!(fp = sfopen(NiL, s, "r")))
            {
                  if (list)
                  {
                        if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/'))
                        {
                              strcpy(mp->fbuf, s);
                              sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
                              if (!(s = sfstruse(mp->tmp)))
                                    goto nospace;
                              if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)))
                                    goto next;
                        }
                        if (!(fp = sfopen(NiL, t, "r")))
                              goto next;
                  }
                  else
                  {
                        if (mp->disc->errorf)
                              (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
                        return -1;
                  }
            }
            found = 1;
            n = load(mp, s, fp);
            sfclose(fp);
            if (n && !list)
                  return -1;
      next:
            if (!e)
                  break;
            s = e + 1;
      }
      if (!found)
      {
            if (mp->flags & MAGIC_VERBOSE)
            {
                  if (mp->disc->errorf)
                        (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
            }
            return -1;
      }
      return 0;
 nospace:
      if (mp->disc->errorf)
            (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
      return -1;
}

/*
 * open a magic session
 */

Magic_t*
magicopen(Magicdisc_t* disc)
{
      register Magic_t* mp;
      register int            i;
      register int            n;
      register int            f;
      register int            c;
      register Vmalloc_t*     vm;
      unsigned char*          map[CC_MAPS + 1];

      if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
            return 0;
      if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
      {
            vmclose(vm);
            return 0;
      }
      mp->id = lib;
      mp->disc = disc;
      mp->vm = vm;
      mp->flags = disc->flags;
      mp->redisc.re_version = REG_VERSION;
      mp->redisc.re_flags = REG_NOFREE;
      mp->redisc.re_errorf = (regerror_t)disc->errorf;
      mp->redisc.re_resizef = (regresize_t)vmgetmem;
      mp->redisc.re_resizehandle = (void*)mp->vm;
      mp->dtdisc.key = offsetof(Info_t, name);
      mp->dtdisc.link = offsetof(Info_t, link);
      if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash)))
            goto bad;
      for (n = 0; n < elementsof(info); n++)
            dtinsert(mp->infotab, &info[n]);
      for (i = 0; i < CC_MAPS; i++)
            map[i] = ccmap(i, CC_ASCII);
      mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
      for (n = 0; n <= UCHAR_MAX; n++)
      {
            f = 0;
            i = CC_MAPS;
            while (--i >= 0)
            {
                  c = ccmapchr(map[i], n);
                  f = (f << CC_BIT) | CCTYPE(c);
            }
            mp->cctype[n] = f;
      }
      return mp;
 bad:
      magicclose(mp);
      return 0;
}

/*
 * close a magicopen() session
 */

int
magicclose(register Magic_t* mp)
{
      if (!mp)
            return -1;
      if (mp->tmp)
            sfstrclose(mp->tmp);
      if (mp->vm)
            vmclose(mp->vm);
      return 0;
}

/*
 * return the magic string for file with optional stat info st
 */

char*
magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
{
      off_t off;
      char* s;

      mp->flags = mp->disc->flags;
      mp->mime = 0;
      if (!st)
            s = T("cannot stat");
      else
      {
            if (mp->fp = fp)
                  off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
            s = type(mp, file, st, mp->tbuf, sizeof(mp->tbuf));
            if (mp->fp)
                  sfseek(mp->fp, off, SEEK_SET);
            if (!(mp->flags & MAGIC_MIME))
            {
                  if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
                        sfprintf(mp->tmp, "%s ", T("short"));
                  sfprintf(mp->tmp, "%s", s);
                  if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
                        sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
                  if (st->st_mode & S_ISUID)
                        sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
                  if (st->st_mode & S_ISGID)
                        sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
                  if (st->st_mode & S_ISVTX)
                        sfprintf(mp->tmp, ", sticky");
                  if (!(s = sfstruse(mp->tmp)))
                        s = T("out of space");
            }
      }
      if (mp->flags & MAGIC_MIME)
            s = mp->mime;
      if (!s)
            s = T("error");
      return s;
}

/*
 * list the magic table in mp on sp
 */

int
magiclist(register Magic_t* mp, register Sfio_t* sp)
{
      register Entry_t* ep = mp->magic;
      register Entry_t* rp = 0;

      mp->flags = mp->disc->flags;
      sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
      while (ep)
      {
            sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
            if (ep->expr)
                  sfprintf(sp, "%s", ep->expr);
            else
                  sfprintf(sp, "%ld", ep->offset);
            sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
            switch (ep->type)
            {
            case 'm':
            case 's':
                  sfputr(sp, fmtesc(ep->value.str), -1);
                  break;
            case 'V':
                  switch (ep->op)
                  {
                  case 'l':
                        sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
                        break;
                  case 'v':
                        sfprintf(sp, "vcodex()");
                        break;
                  default:
                        sfprintf(sp, "%p", ep->value.str);
                        break;
                  }
                  break;
            default:
                  sfprintf(sp, "%lo", ep->value.num);
                  break;
            }
            sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
            if (ep->cont == '$' && !ep->value.lab->mask)
            {
                  rp = ep;
                  ep = ep->value.lab;
            }
            else
            {
                  if (ep->cont == ':')
                  {
                        ep = rp;
                        ep->value.lab->mask = 1;
                  }
                  ep = ep->next;
            }
      }
      return 0;
}

Generated by  Doxygen 1.6.0   Back to index