Logo Search packages:      
Sourcecode: ksh version File versions  Download package

regcoll.c

/***********************************************************************
*                                                                      *
*               This software is part of the ast package               *
*          Copyright (c) 1985-2008 AT&T Intellectual Property          *
*                      and is licensed under the                       *
*                  Common Public License, Version 1.0                  *
*                    by AT&T Intellectual Property                     *
*                                                                      *
*                A copy of the License is available at                 *
*            http://www.opensource.org/licenses/cpl1.0.txt             *
*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
*                                                                      *
*              Information and Software Systems Research               *
*                            AT&T Research                             *
*                           Florham Park NJ                            *
*                                                                      *
*                 Glenn Fowler <gsf@research.att.com>                  *
*                  David Korn <dgk@research.att.com>                   *
*                   Phong Vo <kpv@research.att.com>                    *
*                                                                      *
***********************************************************************/
#pragma prototyped
/*
 * regex collation symbol support
 */

#include "reglib.h"

#include <ccode.h>

#ifndef UCS_BYTE
#define UCS_BYTE  1
#endif

#include "ucs_names.h"

typedef struct Ucs_map_s
{
      Ucs_attr_t        attr[3];
      Ucs_code_t        code;
      const char*       name;
      Dtlink_t          link;
      struct Ucs_map_s* next;
} Ucs_map_t;

#define setattr(a,i)    ((a)[(i)>>5]|=(1<<((i)&((1<<5)-1))))
#define tstattr(a,i)    ((a)[(i)>>5]&(1<<((i)&((1<<5)-1))))
#define clrattr(a,i)    ((a)[(i)>>5]&=~(1<<((i)&((1<<5)-1))))

static struct Local_s
{
      int         fatal;
      Dt_t*       attrs;
      Dt_t*       names;
      Dtdisc_t    dtdisc;
#if CC_NATIVE != CC_ASCII
      unsigned char*    a2n;
#endif
} local;

/*
 * initialize the writeable tables from the readonly data
 * the tables are big enough to be concerned about text vs. data vs. bss
 *    UCS_BYTE==0 100K
 *    UCS_BYTE==1  20K
 */

static int
initialize(void)
{
      register int            i;
      register Ucs_map_t*     a;
      register Ucs_map_t*     w;

      if (local.fatal)
            return -1;
      local.dtdisc.link = offsetof(Ucs_map_t, link);
      local.dtdisc.key = offsetof(Ucs_map_t, name);
      local.dtdisc.size = -1;
      if (!(w = (Ucs_map_t*)malloc(sizeof(Ucs_map_t) * (elementsof(ucs_attrs) + elementsof(ucs_names)))))
      {
            local.fatal = 1;
            return -1;
      }
      if (!(local.attrs = dtopen(&local.dtdisc, Dttree)))
      {
            free(w);
            local.fatal = 1;
            return -1;
      }
      if (!(local.names = dtopen(&local.dtdisc, Dttree)))
      {
            free(w);
            dtclose(local.attrs);
            local.fatal = 1;
            return -1;
      }
      for (i = 0; i < elementsof(ucs_attrs); i++, w++)
      {
            memcpy(w, &ucs_attrs[i], offsetof(Ucs_dat_t, table));
            w->name = ucs_strings[ucs_attrs[i].table] + ucs_attrs[i].index;
            w->next = 0;
            dtinsert(local.attrs, w);
      }
      for (i = 0; i < elementsof(ucs_names); i++, w++)
      {
            memcpy(w, &ucs_names[i], offsetof(Ucs_dat_t, table));
            w->name = ucs_strings[ucs_names[i].table] + ucs_names[i].index;
            w->next = 0;
            if (a = (Ucs_map_t*)dtsearch(local.names, w))
            {
                  while (a->next)
                        a = a->next;
                  a->next = w;
            }
            else
                  dtinsert(local.names, w);
      }
#if CC_NATIVE != CC_ASCII
      local.a2n = ccmap(CC_ASCII, CC_NATIVE);
#endif
      return 0;
}

/*
 * return the collating symbol delimited by [c c], where c is either '=' or '.'
 * s points to the first char after the initial [
 * if e!=0 it is set to point to the next char in s on return
 *
 * the collating symbol is converted to multibyte in <buf,size>
 * the return value is:
 *    -1    syntax error or buf not large enough
 *    >=0   size with 0-terminated mb collation element
 *          or ligature value in buf
 */

int
regcollate(register const char* s, char** e, char* buf, int size)
{
      register int            c;
      register char*          u;
      register char*          b;
      register char*          x;
      register Ucs_map_t*     a;
      Ucs_map_t*        z;
      const char*       t;
      const char*       v;
      int               n;
      int               r;
      int               ul;
      int               term;
      wchar_t                 w[2];
      Ucs_attr_t        attr[3];

      if (size < 2)
            r = -1;
      else if ((term = *s++) != '.' && term != '=')
      {
            s--;
            r = -1;
      }
      else if (*s == term && *(s + 1) == ']')
            r = -1;
      else
      {
            t = s;
            mbchar(s);
            if ((n = (s - t)) == 1)
            {
                  if (*s == term && *(s + 1) == ']')
                  {
                        s += 2;
                        r = -1;
                  }
                  else
                  {
                        if (!local.attrs && initialize())
                              return -1;
                        attr[0] = attr[1] = attr[2] = 0;
                        ul = 0;
                        b = buf;
                        x = buf + size - 2;
                        r = 1;
                        s = t;
                        do
                        {
                              v = s;
                              u = b;
                              for (;;)
                              {
                                    if (!(c = *s++))
                                          return -1;
                                    if (c == term)
                                    {
                                          if (!(c = *s++))
                                                return -1;
                                          if (c != term)
                                          {
                                                if (c != ']')
                                                      return -1;
                                                r = -1;
                                                break;
                                          }
                                    }
                                    if (c == ' ' || c == '-' && u > b && *s != ' ' && *s != '-')
                                          break;
                                    if (isupper(c))
                                          c = tolower(c);
                                    if (u > x)
                                          break;
                                    *u++ = c;
                              }
                              *u = 0;
                              if (a = (Ucs_map_t*)dtmatch(local.attrs, b))
                                    setattr(attr, a->code);
                              else
                              {
                                    if (u < x)
                                          *u++ = ' ';
                                    if (b == buf)
                                    {
                                          if (isupper(*v))
                                                ul = UCS_UC;
                                          else if (islower(*v))
                                                ul = UCS_LC;
                                    }
                                    b = u;
                              }
                        } while (r > 0);
                        if (b > buf && *(b - 1) == ' ')
                              b--;
                        *b = 0;
                        attr[0] &= ~((Ucs_attr_t)1);
                        if (ul)
                        {
                              if (tstattr(attr, UCS_UC) || tstattr(attr, UCS_LC))
                                    ul = 0;
                              else
                                    setattr(attr, ul);
                        }
                        if (z = (Ucs_map_t*)dtmatch(local.names, buf))
                              for(;;)
                              {
                                    for (a = z; a; a = a->next)
                                          if ((attr[0] & a->attr[0]) == attr[0] && (attr[1] & a->attr[1]) == attr[1] && (attr[2] & a->attr[2]) == attr[2])
                                          {
                                                if (a->code <= 0xff)
                                                {
#if CC_NATIVE != CC_ASCII
                                                      buf[0] = local.a2n[a->code];
#else
                                                      buf[0] = a->code;
#endif
                                                      buf[r = 1] = 0;
                                                      ul = 0;
                                                      break;
                                                }
                                                w[0] = a->code;
                                                w[1] = 0;
                                                if ((r = wcstombs(buf, w, size)) > 0)
                                                {
                                                      r--;
                                                      ul = 0;
                                                }
                                                break;
                                          }
                                    if (!ul)
                                          break;
                                    clrattr(attr, ul);
                                    ul = 0;
                              }
                  }
                  if (r < 0)
                  {
                        if ((r = s - t - 2) > (size - 1))
                              return -1;
                        memcpy(buf, t, r);
                        buf[r] = 0;
                  }
            }
            else if (*s++ != term || *s++ != ']')
            {
                  s--;
                  r = -1;
            }
            else if (n > (size - 1))
                  r = -1;
            else
            {
                  memcpy(buf, t, n);
                  buf[r = n] = 0;
            }
      }
      if (e)
            *e = (char*)s;
      return r;
}

Generated by  Doxygen 1.6.0   Back to index