Logo Search packages:      
Sourcecode: ksh version File versions  Download package

fmt.c

/***********************************************************************
*                                                                      *
*               This software is part of the ast package               *
*          Copyright (c) 1992-2007 AT&T Intellectual Property          *
*                      and is licensed under the                       *
*                  Common Public License, Version 1.0                  *
*                    by AT&T Intellectual Property                     *
*                                                                      *
*                A copy of the License is available at                 *
*            http://www.opensource.org/licenses/cpl1.0.txt             *
*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
*                                                                      *
*              Information and Software Systems Research               *
*                            AT&T Research                             *
*                           Florham Park NJ                            *
*                                                                      *
*                 Glenn Fowler <gsf@research.att.com>                  *
*                  David Korn <dgk@research.att.com>                   *
*                                                                      *
***********************************************************************/
#pragma prototyped

static const char usage[] =
"[-?\n@(#)$Id: fmt (AT&T Research) 2007-01-02 $\n]"
USAGE_LICENSE
"[+NAME?fmt - simple text formatter]"
"[+DESCRIPTION?\bfmt\b reads the input files and left justifies space "
    "separated words into lines \awidth\a characters or less in length and "
    "writes the lines to the standard output. The standard input is read if "
    "\b-\b or no files are specified. Blank lines and interword spacing are "
    "preserved in the output. Indentation is preserved, and lines with "
    "identical indentation are joined and justified.]"
"[+?\bfmt\b is meant to format mail messages prior to sending, but may "
    "also be useful for other simple tasks. For example, in \bvi\b(1) the "
    "command \b:!}fmt\b will justify the lines in the current paragraph.]"
"[c:crown-margin?Preserve the indentation of the first two lines within "
    "a paragraph, and align the left margin of each subsequent line with "
    "that of the second line.]"
"[o:optget?Format concatenated \boptget\b(3) usage strings.]"
"[s:split-only?Split lines only; do not join short lines to form longer "
    "ones.]"
"[u:uniform-spacing?One space between words, two after sentences.]"
"[w:width?Set the output line width to \acolumns\a.]#[columns:=72]"
    "\n\n"
"[ file ... ]"
    "\n\n"
"[+SEE ALSO?\bmailx\b(1), \bnroff\b(1), \btroff\b(1), \bvi\b(1), "
    "\boptget\b(3)]"
;

#include <cmd.h>
#include <ctype.h>

typedef struct Fmt_s
{
      long  flags;
      char* outp;
      char* outbuf;
      char* endbuf;
      Sfio_t*     in;
      Sfio_t*     out;
      int   indent;
      int   nextdent;
      int   nwords;
      int   prefix;
      int   quote;
      int   retain;
      int   section;
} Fmt_t;

#define INDENT          4
#define TABSZ           8

#define isoption(fp,c)  ((fp)->flags&(1L<<((c)-'a')))
#define setoption(fp,c) ((fp)->flags|=(1L<<((c)-'a')))
#define clroption(fp,c) ((fp)->flags&=~(1L<<((c)-'a')))

static void
outline(Fmt_t* fp)
{
      register char*    cp = fp->outbuf;
      int         n = 0;
      int         c;
      int         d;

      if (!fp->outp)
            return;
      while (fp->outp[-1] == ' ')
            fp->outp--;
      *fp->outp = 0;
      while (*cp++ == ' ')
            n++;
      if (n >= TABSZ)
      {
            n /= TABSZ;
            cp = &fp->outbuf[TABSZ*n];
            while (n--)
                  *--cp = '\t';
      }
      else
            cp = fp->outbuf;
      fp->nwords = 0;
      if (!isoption(fp, 'o'))
            sfputr(fp->out, cp, '\n');
      else if (*cp)
      {
            n = fp->indent;
            if (*cp != '[')
            {
                  if (*cp == ' ')
                        cp++;
                  n += INDENT;
            }
            while (n--)
                  sfputc(fp->out, ' ');
            if (fp->quote)
            {
                  if ((d = (fp->outp - cp)) <= 0)
                        c = 0;
                  else if ((c = fp->outp[-1]) == 'n' && d > 1 && fp->outp[-2] == '\\')
                        c = '}';
                  sfprintf(fp->out, "\"%s%s\"\n", cp, c == ']' || c == '{' || c == '}' ? "" : " ");
            }
            else
                  sfputr(fp->out, cp, '\n');
            if (fp->nextdent)
            {
                  fp->indent += fp->nextdent;
                  fp->endbuf -= fp->nextdent;
                  fp->nextdent = 0;
            }
      }
      fp->outp = 0;
}

static void
split(Fmt_t* fp, char* buf, int splice)
{
      register char*    cp;
      register char*    ep;
      register char*    qp;
      register int      c = 1;
      register int      q = 0;
      register int      n;
      int         prefix;

      for (ep = buf; *ep == ' '; ep++);
      prefix = ep - buf;

      /*
       * preserve blank lines
       */

      if ((*ep == 0 || *buf == '.') && !isoption(fp, 'o'))
      {
            if (*ep)
                  prefix = strlen(buf);
            outline(fp);
            strcpy(fp->outbuf, buf);
            fp->outp = fp->outbuf+prefix;
            outline(fp);
            return;
      }
      if (fp->prefix < prefix && !isoption(fp, 'c'))
            outline(fp);
      if (!fp->outp || prefix < fp->prefix)
            fp->prefix = prefix;
      while (c)
      {
            cp = ep;
            while (*ep == ' ')
                  ep++;
            if (cp != ep && isoption(fp, 'u'))
                  cp = ep-1;
            while (c = *ep)
            {
                  if (c == ' ')
                        break;
                  ep++;

                  /*
                   * skip over \space
                   */

                  if (c == '\\' && *ep)
                        ep++;
            }
            n = (ep-cp);
            if (n && isoption(fp, 'o'))
            {
                  for (qp = cp; qp < ep; qp++)
                        if (*qp == '\\')
                              qp++;
                        else if (*qp == '"')
                              q = !q;
                  if (*(ep-1) == '"')
                        goto skip;
            }
            if (fp->nwords > 0 && &fp->outp[n] >= fp->endbuf && !fp->retain && !q)
                  outline(fp);
      skip:
            if (fp->nwords == 0)
            {
                  if (fp->prefix)
                        memset(fp->outbuf, ' ', fp->prefix);
                  fp->outp = &fp->outbuf[fp->prefix];
                  while (*cp == ' ')
                        cp++;
                  n = (ep-cp);
            }
            memcpy(fp->outp, cp, n);
            fp->outp += n;
            fp->nwords++;
      }
      if (isoption(fp, 's') || *buf == 0)
            outline(fp);
      else if (fp->outp)
      {
            /*
             * two spaces at ends of sentences
             */

            if (!isoption(fp, 'o') && strchr(".:!?", fp->outp[-1]))
                  *fp->outp++ = ' ';
            if (!splice && !fp->retain && (!fp->quote || (fp->outp - fp->outbuf) < 2 || fp->outp[-2] != '\\' || fp->outp[-1] != 'n' && fp->outp[-1] != 't' && fp->outp[-1] != ' '))
                  *fp->outp++ = ' ';
      }
}

static int
dofmt(Fmt_t* fp)
{
      register int      c;
      int         b;
      int         x;
      int         splice;
      char*       cp;
      char*       dp;
      char*       ep;
      char*       lp;
      char*       tp;
      char        buf[8192];

      cp = 0;
      while (cp || (cp = sfgetr(fp->in, '\n', 0)) && !(splice = 0) && (lp = cp + sfvalue(fp->in) - 1) || (cp = sfgetr(fp->in, '\n', SF_LASTR)) && (splice = 1) && (lp = cp + sfvalue(fp->in)))
      {
            if (isoption(fp, 'o'))
            {
                  if (!isoption(fp, 'i'))
                  {
                        setoption(fp, 'i');
                        b = 0;
                        while (cp < lp)
                        {
                              if (*cp == ' ')
                                    b += 1;
                              else if (*cp == '\t')
                                    b += INDENT;
                              else
                                    break;
                              cp++;
                        }
                        fp->indent = roundof(b, INDENT);
                  }
                  else
                        while (cp < lp && (*cp == ' ' || *cp == '\t'))
                              cp++;
                  if (!isoption(fp, 'q') && cp < lp)
                  {
                        setoption(fp, 'q');
                        if (*cp == '"')
                        {
                              ep = lp;
                              while (--ep > cp)
                                    if (*ep == '"')
                                    {
                                          fp->quote = 1;
                                          break;
                                    }
                                    else if (*ep != ' ' && *ep != '\t')
                                          break;
                        }
                  }
            }
      again:
            dp = buf;
            ep = 0;
            for (b = 1;; b = 0)
            {
                  if (cp >= lp)
                  {
                        cp = 0;
                        break;
                  }
                  c = *cp++;
                  if (isoption(fp, 'o'))
                  {
                        if (c == '\\')
                        {
                              x = 0;
                              c = ' ';
                              cp--;
                              while (cp < lp)
                              {
                                    if (*cp == '\\')
                                    {
                                          cp++;
                                          if ((lp - cp) < 1)
                                          {
                                                c = '\\';
                                                break;
                                          }
                                          if (*cp == 'n')
                                          {
                                                cp++;
                                                c = '\n';
                                                if ((lp - cp) > 2)
                                                {
                                                      if (*cp == ']' || *cp == '@' && *(cp + 1) == '(')
                                                      {
                                                            *dp++ = '\\';
                                                            *dp++ = 'n';
                                                            c = *cp++;
                                                            break;
                                                      }
                                                      if (*cp == '\\' && *(cp + 1) == 'n')
                                                      {
                                                            cp += 2;
                                                            *dp++ = '\n';
                                                            break;
                                                      }
                                                }
                                          }
                                          else if (*cp == 't' || *cp == ' ')
                                          {
                                                cp++;
                                                x = 1;
                                                c = ' ';
                                          }
                                          else
                                          {
                                                if (x && dp != buf && *(dp - 1) != ' ')
                                                      *dp++ = ' ';
                                                *dp++ = '\\';
                                                c = *cp++;
                                                break;
                                          }
                                    }
                                    else if (*cp == ' ' || *cp == '\t')
                                    {
                                          cp++;
                                          c = ' ';
                                          x = 1;
                                    }
                                    else
                                    {
                                          if (x && c != '\n' && dp != buf && *(dp - 1) != ' ')
                                                *dp++ = ' ';
                                          break;
                                    }
                              }
                              if (c == '\n')
                              {
                                    c = 0;
                                    goto flush;
                              }
                              if (c == ' ' && (dp == buf || *(dp - 1) == ' '))
                                    continue;
                        }
                        else if (c == '"')
                        {
                              if (b || cp >= lp)
                              {
                                    if (fp->quote)
                                          continue;
                                    fp->section = 0;
                              }
                        }
                        else if (c == '\a')
                        {
                              *dp++ = '\\';
                              c = 'a';
                        }
                        else if (c == '\b')
                        {
                              *dp++ = '\\';
                              c = 'b';
                        }
                        else if (c == '\f')
                        {
                              *dp++ = '\\';
                              c = 'f';
                        }
                        else if (c == '\v')
                        {
                              *dp++ = '\\';
                              c = 'v';
                        }
                        else if (c == ']' && (cp >= lp || *cp != ':' && *cp != '#' && *cp != '!'))
                        {
                              if (cp < lp && *cp == ']')
                              {
                                    cp++;
                                    *dp++ = c;
                              }
                              else
                              {
                                    fp->section = 1;
                                    fp->retain = 0;
                              flush:
                                    *dp++ = c;
                                    *dp = 0;
                                    split(fp, buf, 0);
                                    outline(fp);
                                    goto again;
                              }
                        }
                        else if (fp->section)
                        {
                              if (c == '[')
                              {
                                    if (b)
                                          fp->retain = 1;
                                    else
                                    {
                                          cp--;
                                          c = 0;
                                          goto flush;
                                    }
                                    fp->section = 0;
                              }
                              else if (c == '{')
                              {
                                    x = 1;
                                    for (tp = cp; tp < lp; tp++)
                                    {
                                          if (*tp == '[' || *tp == '\n')
                                                break;
                                          if (*tp == ' ' || *tp == '\t' || *tp == '"')
                                                continue;
                                          if (*tp == '\\' && (lp - tp) > 1)
                                          {
                                                if (*++tp == 'n')
                                                      break;
                                                if (*tp == 't' || *tp == '\n')
                                                      continue;
                                          }
                                          x = 0;
                                          break;
                                    }
                                    if (x)
                                    {
                                          if (fp->endbuf > (fp->outbuf + fp->indent + 2*INDENT))
                                                fp->nextdent = 2*INDENT;
                                          goto flush;
                                    }
                                    else
                                          fp->section = 0;
                              }
                              else if (c == '}')
                              {
                                    if (fp->indent && (b || *(cp - 2) != 'f'))
                                    {
                                          if (b)
                                          {
                                                fp->indent -= 2*INDENT;
                                                fp->endbuf += 2*INDENT;
                                          }
                                          else
                                          {
                                                cp--;
                                                c = 0;
                                          }
                                          goto flush;
                                    }
                                    else
                                          fp->section = 0;
                              }
                              else if (c == ' ' || c == '\t')
                                    continue;
                              else
                                    fp->section = 0;
                        }
                        else if (c == '?' && (cp >= lp || *cp != '?'))
                        {
                              if (fp->retain)
                              {
                                    cp--;
                                    while (cp < lp && *cp != ' ' && *cp != '\t' && *cp != ']' && dp < &buf[sizeof(buf)-3])
                                          *dp++ = *cp++;
                                    if (cp < lp && (*cp == ' ' || *cp == '\t'))
                                          *dp++ = *cp++;
                                    *dp = 0;
                                    split(fp, buf, 0);
                                    dp = buf;
                                    ep = 0;
                                    fp->retain = 0;
                                    if (fp->outp >= fp->endbuf)
                                          outline(fp);
                                    continue;
                              }
                        }
                        else if (c == ' ' || c == '\t')
                              for (c = ' '; *cp == ' ' || *cp == '\t'; cp++);
                  }
                  else if (c == '\b')
                  {
                        if (dp > buf)
                        {
                              dp--;
                              if (ep)
                                    ep--;
                        }
                        continue;
                  }
                  else if (c == '\t')
                  {
                        /*
                         * expand tabs
                         */

                        if (!ep)
                              ep = dp;
                        c = isoption(fp, 'o') ? 1 : TABSZ - (dp - buf) % TABSZ;
                        if (dp >= &buf[sizeof(buf) - c - 3])
                        {
                              cp--;
                              break;
                        }
                        while (c-- > 0)
                              *dp++ = ' ';
                        continue;
                  }
                  else if (!isprint(c))
                        continue;
                  if (dp >= &buf[sizeof(buf) - 3])
                  {
                        tp = dp;
                        while (--tp > buf)
                              if (isspace(*tp))
                              {
                                    cp -= dp - tp;
                                    dp = tp;
                                    break;
                              }
                        ep = 0;
                        break;
                  }
                  if (c != ' ')
                        ep = 0;
                  else if (!ep)
                        ep = dp;
                  *dp++ = c;
            }
            if (ep)
                  *ep = 0;
            else
                  *dp = 0;
            split(fp, buf, splice);
      }
      return 0;
}

int
b_fmt(int argc, char** argv, void *context)
{
      register int      n;
      char*       cp;
      Fmt_t       fmt;
      char        outbuf[8 * 1024];

      fmt.flags = 0;
      fmt.out = sfstdout;
      fmt.outbuf = outbuf;
      fmt.outp = 0;
      fmt.endbuf = &outbuf[72];
      fmt.indent = 0;
      fmt.nextdent = 0;
      fmt.nwords = 0;
      fmt.prefix = 0;
      fmt.quote = 0;
      fmt.retain = 0;
      fmt.section = 1;
      cmdinit(argc, argv, context, ERROR_CATALOG, 0);
      while (n = optget(argv, usage))
            switch (n)
            {
            case 'c':
            case 'o':
            case 's':
            case 'u':
                  setoption(&fmt, n);
                  break;
            case 'w':
                  if (opt_info.num < TABSZ || opt_info.num>= sizeof(outbuf))
                        error(2, "width out of range");
                  fmt.endbuf = &outbuf[opt_info.num];
                  break;
            case ':':
                  error(2, "%s", opt_info.arg);
                  break;
            case '?':
                  error(ERROR_usage(2), "%s", opt_info.arg);
                  break;
            }
      argv += opt_info.index;
      if (error_info.errors)
            error(ERROR_usage(2), "%s", optusage(NiL));
      if (isoption(&fmt, 'o'))
            setoption(&fmt, 'c');
      if (isoption(&fmt, 's'))
            clroption(&fmt, 'u');
      if (cp = *argv)
            argv++;
      do {
            if (!cp || streq(cp, "-"))
                  fmt.in = sfstdin;
            else if (!(fmt.in = sfopen(NiL, cp, "r")))
            {
                  error(ERROR_system(0), "%s: cannot open", cp);
                  error_info.errors = 1;
                  continue;
            }
            dofmt(&fmt);
            if (fmt.in != sfstdin)
                  sfclose(fmt.in);
      } while (cp = *argv++);
      outline(&fmt);
      if (sfsync(sfstdout))
            error(ERROR_system(0), "write error");
      return error_info.errors != 0;
}

Generated by  Doxygen 1.6.0   Back to index