Logo Search packages:      
Sourcecode: ksh version File versions

wclib.c

/***********************************************************************
*                                                                      *
*               This software is part of the ast package               *
*           Copyright (c) 1992-2007 AT&T Knowledge Ventures            *
*                      and is licensed under the                       *
*                  Common Public License, Version 1.0                  *
*                      by AT&T Knowledge Ventures                      *
*                                                                      *
*                A copy of the License is available at                 *
*            http://www.opensource.org/licenses/cpl1.0.txt             *
*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
*                                                                      *
*              Information and Software Systems Research               *
*                            AT&T Research                             *
*                           Florham Park NJ                            *
*                                                                      *
*                 Glenn Fowler <gsf@research.att.com>                  *
*                  David Korn <dgk@research.att.com>                   *
*                                                                      *
***********************************************************************/
#pragma prototyped
/*
 * David Korn
 * AT&T Bell Laboratories
 *
 * library interface for word count
 */

#include <cmd.h>
#include <wc.h>
#include <ctype.h>

#if _hdr_wchar && _hdr_wctype

#include <wchar.h>
#include <wctype.h>

#else

#ifndef iswspace
#define iswspace(x)     isspace(x)
#endif

#endif

#define endline(c)      (((signed char)-1)<0?(c)<0:(c)==((char)-1))
#define mbok(p,n) (((n)<1)?0:mbwide()?((*ast.mb_towc)(NiL,(char*)(p),n)>=0):1)

Wc_t *wc_init(int mode)
{
      register int      n;
      register int      w;
      Wc_t*       wp;

      if(!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
            return(0);
      wp->mode = mode;
      w = mode & WC_WORDS;
      for(n=(1<<CHAR_BIT);--n >=0;)
            wp->space[n] = w ? !!isspace(n) : 0;
      wp->space['\n'] = -1;
      return(wp);
}

/*
 * compute the line, word, and character count for file <fd>
 */
int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
{
      register signed char    *space = wp->space;
      register unsigned char  *cp;
      register Sfoff_t  nchars;
      register Sfoff_t  nwords;
      register Sfoff_t  nlines;
      register Sfoff_t  eline;
      register Sfoff_t  longest;
      register ssize_t  c;
      register unsigned char  *endbuff;
      register int            lasttype = 1;
      unsigned int            lastchar;
      unsigned char           *buff;
      wchar_t                 x;

      sfset(fd,SF_WRITE,1);
      nlines = nwords = nchars = 0;
      wp->longest = 0;
      if (wp->mode & (WC_LONGEST|WC_MBYTE))
      {
            longest = 0;
            eline = -1;
            cp = buff = endbuff = 0;
            for (;;)
            {
                  if (!mbok(cp, endbuff-cp))
                  {
                        if (buff)
                              sfread(fd, buff, cp-buff);
                        if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, SF_LOCKR)))
                              break;
                        endbuff = (cp = buff) + sfvalue(fd);
                  }
                  nchars++;
                  x = mbchar(cp);
                  if (x == -1)
                  {
                        if (eline != nlines && !(wp->mode & WC_QUIET))
                        {
                              error_info.file = (char*)file;
                              error_info.line = eline = nlines;
                              error(ERROR_SYSTEM|1, "invalid multibyte character");
                              error_info.file = 0;
                              error_info.line = 0;
                        }
                  }
                  else if (x == '\n')
                  {
                        if ((nchars - longest) > wp->longest)
                              wp->longest = nchars - longest;
                        longest = nchars;
                        nlines++;
                        lasttype = 1;
                  }
                  else if (iswspace(x))
                        lasttype = 1;
                  else if (lasttype)
                  {
                        lasttype = 0;
                        nwords++;
                  }
            }
      }
      else
      {
            for (;;)
            {
                  /* fill next buffer and check for end-of-file */
                  if (!(buff = (unsigned char*)sfreserve(fd, 0, 0)) || (c = sfvalue(fd)) <= 0)
                        break;
                  sfread(fd,(char*)(cp=buff),c);
                  nchars += c;
                  /* check to see whether first character terminates word */
                  if(c==1)
                  {
                        if(endline(lasttype))
                              nlines++;
                        if((c = space[*cp]) && !lasttype)
                              nwords++;
                        lasttype = c;
                        continue;
                  }
                  if(!lasttype && space[*cp])
                        nwords++;
                  lastchar = cp[--c];
                  cp[c] = '\n';
                  endbuff = cp+c;
                  c = lasttype;
                  /* process each buffer */
                  for (;;)
                  {
                        /* process spaces and new-lines */
                        do if (endline(c))
                        {
                              for (;;)
                              {
                                    /* check for end of buffer */
                                    if (cp > endbuff)
                                          goto eob;
                                    nlines++;
                                    if (*cp != '\n')
                                          break;
                                    cp++;
                              }
                        } while (c = space[*cp++]);
                        /* skip over word characters */
                        while(!(c = space[*cp++]));
                        nwords++;
                  }
            eob:
                  if((cp -= 2) >= buff)
                        c = space[*cp];
                  else
                        c  = lasttype;
                  lasttype = space[lastchar];
                  /* see if was in word */
                  if(!c && !lasttype)
                        nwords--;
            }
            if(endline(lasttype))
                  nlines++;
            else if(!lasttype)
                  nwords++;
      }
      wp->chars = nchars;
      wp->words = nwords;
      wp->lines = nlines;
      return(0);
}

Generated by  Doxygen 1.6.0   Back to index