Logo Search packages:      
Sourcecode: ksh version File versions

regcmp.c

/***********************************************************************
*                                                                      *
*               This software is part of the ast package               *
*                  Copyright (c) 1985-2005 AT&T Corp.                  *
*                      and is licensed under the                       *
*                  Common Public License, Version 1.0                  *
*                            by AT&T Corp.                             *
*                                                                      *
*                A copy of the License is available at                 *
*            http://www.opensource.org/licenses/cpl1.0.txt             *
*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
*                                                                      *
*              Information and Software Systems Research               *
*                            AT&T Research                             *
*                           Florham Park NJ                            *
*                                                                      *
*                 Glenn Fowler <gsf@research.att.com>                  *
*                  David Korn <dgk@research.att.com>                   *
*                   Phong Vo <kpv@research.att.com>                    *
*                                                                      *
***********************************************************************/
#pragma prototyped
/*
 * regcmp implementation
 */

#include <ast.h>
#include <libgen.h>
#include <regex.h>
#include <align.h>

#define INC       (2*1024)
#define MAX       (16*1024)
#define SUB       10

typedef struct
{
      char*       cur;
      regex_t           re;
      unsigned char     sub[SUB];
      int         nsub;
      size_t            size;
      char        buf[ALIGN_BOUND2];
} Regex_t;

__DEFINE__(char*, __loc1, 0);

static void*
block(void* handle, void* data, size_t size)
{
      register Regex_t* re = (Regex_t*)handle;

      if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur))
            return 0;
      data = (void*)re->cur;
      re->cur += size;
      return data;
}

char*
regcmp(const char* pattern, ...)
{
      register char*          s;
      register Regex_t* re;
      register size_t         n;
      register int            c;
      register int            p;
      int               b;
      int               i;
      int               j;
      int               nsub;
      register Sfio_t*  sp;
      unsigned char           paren[128];
      unsigned char           sub[SUB];
      va_list                 ap;

      va_start(ap, pattern);
      if (!pattern || !*pattern || !(sp = sfstropen()))
            return 0;
      memset(paren, 0, sizeof(paren));
      n = 0;
      p = -1;
      b = 0;
      nsub = 0;
      s = (char*)pattern;
      do
      {
            while (c = *s++)
            {
                  if (c == '\\')
                  {
                        sfputc(sp, c);
                        if (!(c = *s++))
                              break;
                  }
                  else if (b)
                  {
                        if (c == ']')
                              b = 0;
                  }
                  else if (c == '[')
                  {
                        b = 1;
                        if (*s == '^')
                        {
                              sfputc(sp, c);
                              c = *s++;
                        }
                        if (*s == ']')
                        {
                              sfputc(sp, c);
                              c = *s++;
                        }
                  }
                  else if (c == '(')
                  {
                        /*
                         * someone explain in one sentence why
                         * a cast is needed to make this work
                         */

                        if (p < (int)(elementsof(paren) - 1))
                              p++;
                        paren[p] = ++n;
                  }
                  else if (c == ')' && p >= 0)
                  {
                        for (i = p; i > 0; i--)
                              if (paren[i])
                                    break;
                        if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9')
                        {
                              s += 2;
                              j -= '0';
                              if (nsub <= j)
                              {
                                    if (!nsub)
                                          memset(sub, 0, sizeof(sub));
                                    nsub = j + 1;
                              }
                              sub[j] = paren[i] + 1;
                        }
                        paren[i] = 0;
                  }
                  sfputc(sp, c);
            }
      } while (s = va_arg(ap, char*));
      va_end(ap);
      s = sfstruse(sp);
      re = 0;
      n = 0;
      do
      {
            if ((n += INC) > MAX || !(re = newof(re, Regex_t, 0, n)))
            {
                  if (re)
                        free(re);
                  sfstrclose(sp);
                  return 0;
            }
            re->cur = re->buf;
            re->size = n + ALIGN_BOUND2 - sizeof(Regex_t);
            regalloc(re, block, REG_NOFREE);
            c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL);
            regalloc(NiL, NiL, 0);
      } while (c == REG_ESPACE);
      sfstrclose(sp);
      if (c)
      {
            free(re);
            return 0;
      }
      if (re->nsub = nsub)
            memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0]));
      return (char*)re;
}

char*
regex(const char* handle, const char* subject, ...)
{
      register Regex_t* re;
      register int            n;
      register int            i;
      register int            k;
      char*             sub[SUB + 1];
      regmatch_t        match[SUB + 1];
      va_list                 ap;

      va_start(ap, subject);
      if (!(re = (Regex_t*)handle) || !subject)
            return 0;
      for (n = 0; n < re->nsub; n++)
            sub[n] = va_arg(ap, char*);
      va_end(ap);
      if (regexec(&re->re, subject, SUB + 1, match, 0))
            return 0;
      for (n = 0; n < re->nsub; n++)
            if (i = re->sub[n])
            {
                  i--;
                  k = match[i].rm_eo - match[i].rm_so;
                  strncpy(sub[n], subject + match[i].rm_so, k);
                  *(sub[n] + k) = 0;
            }
      __loc1 = (char*)subject + match[0].rm_so;
      return (char*)subject + match[0].rm_eo;
}

Generated by  Doxygen 1.6.0   Back to index