Logo Search packages:      
Sourcecode: ksh version File versions

vddelta.c

/***********************************************************************
*                                                                      *
*               This software is part of the ast package               *
*           Copyright (c) 1995-2007 AT&T Knowledge Ventures            *
*                      and is licensed under the                       *
*                  Common Public License, Version 1.0                  *
*                      by AT&T Knowledge Ventures                      *
*                                                                      *
*                A copy of the License is available at                 *
*            http://www.opensource.org/licenses/cpl1.0.txt             *
*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
*                                                                      *
*              Information and Software Systems Research               *
*                            AT&T Research                             *
*                           Florham Park NJ                            *
*                                                                      *
*                   Phong Vo <kpv@research.att.com>                    *
*                                                                      *
***********************************************************************/
#include    "vdelhdr.h"

/*    Compute a transformation that takes source data to target data
**
**    Written by Kiem-Phong Vo, kpv@research.att.com, 5/20/94
*/

#ifdef DEBUG
long  S_copy, S_add;    /* amount of input covered by COPY and ADD      */
long  N_copy, N_add;    /* # of COPY and ADD instructions         */
long  M_copy, M_add;    /* max size of a COPY or ADD instruction  */
long  N_merge;    /* # of merged instructions               */
#endif

#define MERGABLE(a,c,k) ((a) > 0 && A_ISTINY(a) && \
                   (c) > 0 && C_ISTINY(c) && \
                   (k) >= K_SELF )

typedef struct _table_s Table_t;
struct _table_s
{     Vdio_t            io;         /* io structure         */
      uchar*            src;        /* source string  */
      int         n_src;
      uchar*            tar;        /* target string  */
      int         n_tar;
      K_DDECL(quick,recent,rhere);  /* address caches */
      int*        link;       /* links of elements    */
      int         size;       /* size of hash table   */
      int*        hash;       /* hash table           */
};

/* encode and output delta instructions */
#if __STD_C
static int vdputinst(Table_t* tab, uchar* begs, uchar* here, reg int copy, int n_copy)
#else
static int vdputinst(tab, begs, here, copy, n_copy)
Table_t*    tab;
uchar*            begs; /* ADD data if any      */
uchar*            here; /* current location     */
reg int           copy; /* best match if >= 0   */
int         n_copy;     /* length of match      */
#endif
{
      reg int     n_add, i_add, i_copy, k_type;
      reg int     n, c_addr, best, d;

      n_add = begs ? here-begs : 0;       /* add size       */
      c_addr = (here-tab->tar)+tab->n_src;      /* current address      */
      k_type = 0;

      if(n_copy > 0)    /* process the COPY instruction */
      {     /**/DBTOTAL(N_copy,1); DBTOTAL(S_copy,n_copy); DBMAX(M_copy,n_copy);

            best = copy;
            k_type = K_SELF;
            if((d = c_addr - copy) < best)
            {     best = d;
                  k_type = K_HERE;
            }
            for(n = 0; n < K_RTYPE; ++n)
            {     if((d = copy - tab->recent[n]) < 0 || d >= best)
                        continue;
                  best = d;
                  k_type = K_RECENT+n;
            }
            if(best >= I_MORE && tab->quick[n = copy%K_QSIZE] == copy)
            {     for(d = K_QTYPE-1; d > 0; --d)
                        if(n >= (d<<VD_BITS) )
                              break;
                  best = n - (d<<VD_BITS); /**/ASSERT(best < (1<<VD_BITS));
                  k_type = K_QUICK+d;
            }

            /**/ASSERT(best >= 0);
            /**/ASSERT((k_type+K_MERGE) < (1<<I_BITS) );

            /* update address caches */
            K_UPDATE(tab->quick,tab->recent,tab->rhere,copy);

            /* see if mergable to last ADD instruction */
            if(MERGABLE(n_add,n_copy,k_type) )
            {     /**/DBTOTAL(N_merge,1);
                  i_add = K_TPUT(k_type)|A_TPUT(n_add)|C_TPUT(n_copy);
            }
            else
            {     i_copy = K_PUT(k_type);
                  if(C_ISLOCAL(n_copy) )
                        i_copy |= C_LPUT(n_copy);
            }
      }

      if(n_add > 0)
      {     /**/DBTOTAL(N_add,1); DBTOTAL(S_add,n_add); DBMAX(M_add,n_add);

            if(!MERGABLE(n_add,n_copy,k_type) )
                  i_add = A_ISLOCAL(n_add) ? A_LPUT(n_add) : 0;

            if(VDPUTC((Vdio_t*)tab,i_add) < 0 )
                  return -1;
            if(!A_ISLOCAL(n_add) &&
               (*_Vdputu)((Vdio_t*)tab, (ulong)A_PUT(n_add)) < 0 )
                  return -1;
            if((*_Vdwrite)((Vdio_t*)tab, begs, n_add) < 0 )
                  return -1;
      }

      if(n_copy > 0)
      {     if(!MERGABLE(n_add,n_copy,k_type) && VDPUTC((Vdio_t*)tab,i_copy) < 0 )
                  return -1;

            if(!C_ISLOCAL(n_copy) &&
               (*_Vdputu)((Vdio_t*)tab, (ulong)C_PUT(n_copy)) < 0 )
                  return -1;

            if(k_type >= K_QUICK && k_type < (K_QUICK+K_QTYPE) )
            {     if(VDPUTC((Vdio_t*)tab,(uchar)best) < 0 )
                        return -1;
            }
            else
            {     if((*_Vdputu)((Vdio_t*)tab, (ulong)best) < 0 )
                        return -1;
            }
      }

      return 0;
}


/* Fold a string */
#if __STD_C
static int vdfold(Table_t* tab, reg uchar* fold, reg uchar* endfold, int target)
#else
static int vdfold(tab, fold, endfold, target)
Table_t*    tab;
reg uchar*  fold;       /* start of area to fold      */
reg uchar*  endfold;    /* end of area to fold        */
int         target;           /* 1 if doing the target stream     */
#endif
{
      reg ulong   key, n;
      reg uchar   *sm, *ends, *ss, *endh;
      reg int           m, list, curm, bestm;
      reg uchar   *add;
      reg int           head, len, n_src = tab->n_src;
      reg uchar   *src = tab->src, *tar = tab->tar;
      reg int           size = tab->size, *link = tab->link, *hash = tab->hash;

      if((endfold-fold) < M_MIN)    /* not much to do */
      {     add = fold;
            goto done;
      }

      if(target)
            curm = (fold - tab->tar) + n_src;
      else  curm = (fold - tab->src);
      bestm = -1;
      len = M_MIN-1;
      add = NIL(uchar*);
      HINIT(key,fold,n);
      for(;;)
      {     for(;;)     /* search for the longest match */
            {     if((m = hash[key&size]) < 0)
                        goto endsearch;
                  list = m = link[m];     /* head of list */

                  if(bestm >= 0) /* skip over past elements */
                        for(n = bestm+len; m < n;)
                              if((m = link[m]) == list)
                                    goto endsearch;

                  head = len - (M_MIN-1); /* header before the match */
                  endh = fold+head;
                  for(;;)
                  {     if((n = m) < n_src)
                        {     if(n < head)
                                    goto next;
                              sm = src + n;
                        }
                        else
                        {     if((n -= n_src) < head)
                                    goto next;
                              sm = tar + n;
                        }

                        /* make sure that the M_MIN bytes match */
                        if(!EQUAL(endh,sm))
                              goto next;

                        /* make sure this is a real match */
                        for(sm -= head, ss = fold; ss < endh; )
                              if(*sm++ != *ss++)
                                    goto next;

                        /* extend forward as much as possible */
                        ss += M_MIN;
                        sm += M_MIN;
                        ends = endfold;
                        if(m < n_src && (n = (src+n_src)-sm) < (ends-fold) )
                              ends = fold+n;
                        for(;; ++ss, ++sm)
                              if(ss >= ends || *sm != *ss)
                                    goto extend;

                  next: if((m = link[m]) == list )
                              goto endsearch;
                  }

            extend: bestm = m-head;
                  n = len;
                  len = ss-fold;
                  if(ss >= endfold) /* already match everything */
                        goto endsearch;

                  /* check for a longer match */
                  ss -= M_MIN-1;
                  if(len == n+1)
                        HNEXT(key,ss,n);
                  else  HINIT(key,ss,n);
            }

      endsearch:
            if(bestm >= 0)
            {     if(target && vdputinst(tab,add,fold,bestm,len) < 0)
                        return -1;

                  /* add a sufficient number of suffices */
                  ends = (fold += len);
                  ss = ends - (M_MIN-1);
                  if(target)
                        curm = n_src + (ss-tar);
                  else  curm = (ss-src);

                  len = M_MIN-1;
                  add = NIL(uchar*);
                  bestm = -1;
            }
            else
            {     if(!add)
                        add = fold;
                  ss = fold;
                  ends = (fold += 1);     /* add one prefix */
            }

            if(ends > (endfold - (M_MIN-1)) )
                  ends = endfold - (M_MIN-1);

            if(ss < ends) for(;;)   /* add prefices/suffices */
            {     n = key&size;
                  if((m = hash[n]) < 0 )
                        link[curm] = curm;
                  else
                  {     link[curm] = link[m];
                        link[m] = curm;
                  }
                  hash[n] = curm++;

                  if((ss += 1) >= ends)
                        break;
                  HNEXT(key,ss,n);
            }

            if((endfold-fold) < M_MIN)    /* too short to match */
            {     if(!add)
                        add = fold;
                  goto done;
            }

            HNEXT(key,fold,n);
      }

done:
      if(target)
      {     if((len = (tab->tar+tab->n_tar) - endfold) > 0 ) /* match at end */
                  bestm = n_src - len;
            else  bestm = -1;
            if(add || bestm >= 0)
                  return vdputinst(tab,add,endfold,bestm,len);
      }

      return 0;
}

#if __STD_C
static int vdprocess(Table_t* tab)
#else
static int vdprocess(tab)
Table_t*    tab;
#endif
{
      reg uchar   *tar, *src, *endsrc, *endtar, *endt;
      reg int           hn, tn, n_src, n_tar;

      /* check boundary conditions */
      src = tab->src;
      tar = tab->tar;
      if((n_tar = tab->n_tar) <= 0)
            return 0;
      if((n_src = tab->n_src) <= 0)
            return vdfold(tab,tar,tar+n_tar,1);

      /* see if there is a large enough match at the start */
#define LARGE_MATCH     (8*M_MIN)
      endtar = tar + (n_tar < n_src ? n_tar : n_src);
      for(; tar < endtar; ++tar, ++src)
            if(*tar != *src)
                  break;
      if((hn = tar - tab->tar) < LARGE_MATCH )
      {     tar = tab->tar;
            src = tab->src;
            hn  = 0;
      }
      else
      {     if(vdputinst(tab,NIL(uchar*),tab->tar,0,hn) < 0)
                  return -1;
            if(hn == n_tar)
                  return 0;
      }

      /* see if there is a large enough match at the end */
      endtar = tab->tar + n_tar - 1;
      endsrc = tab->src + n_src - 1;
      endt = endtar - (n_tar < n_src ? n_tar : n_src);
      if(endt < tar-1)
            endt = tar-1;
      for(; endtar > endt; --endtar, --endsrc)
            if(*endtar != *endsrc)
                  break;
      endtar += 1; endsrc += 1;

      if((tn = (tab->tar+n_tar) - endtar) < LARGE_MATCH)
      {     endtar = tab->tar+n_tar;
            endsrc = tab->src+n_src;
            tn = 0;
      }

      /* maintain big enough data for matching the remained */
      if((endtar-tar) >= (n_tar/4))
      {     src -= hn/4;
            endsrc += tn/4;
      }
      if(vdfold(tab,src,endsrc,0) < 0)
            return -1;

      return vdfold(tab,tar,endtar,1);
}


#if __STD_C
long vddelta(Vddisc_t* source, Vddisc_t* target, Vddisc_t* delta)
#else
long vddelta(source, target, delta)
Vddisc_t*   source;     /* source data                */
Vddisc_t*   target;     /* target data                */
Vddisc_t*   delta;      /* transform output data      */
#endif
{
      reg int           size, k, n;
      reg long    p, n_src, n_tar, window;
      Table_t           tab;

      if(!target || (n_tar = target->size) < 0)
            return -1;
      if(n_tar > 0 && !target->data && !target->readf)
            return -1;

      if((n_src = source ? source->size : 0) < 0)
            n_src = 0;
      if(n_src > 0 && !source->data && !source->readf)
            return -1;

      if(!delta || (!delta->data && !delta->writef) )
            return -1;

      tab.n_src = tab.n_tar = tab.size = 0;
      tab.tar = tab.src = NIL(uchar*);
      tab.link = NIL(int*);
      tab.hash = NIL(int*);
      INIT(&tab.io,delta);

      /* try to allocate working space */
      window = DFLTWINDOW;
      while(window > 0)
      {     /* space for the target string */
            if((long)(size = (int)window) > n_tar)
                  size = (int)n_tar;
            if(!target->data && !(tab.tar = (uchar*)malloc(size*sizeof(uchar))) )
                  goto reduce_window;
            k = size;

            /* space for sliding header or source string */
            if(n_src <= 0)    /* compression only */
            {     if((long)window >= n_tar)
                  {     size = 0;
                        k += n_tar;
                  }
                  else
                  {     size = (int)HEADER(window);
                        k += size;
                  }
            }
            else        /* differencing */
            {     if((long)(size = (int)window) > n_src)
                        size = (int)n_src;
                  k += size;
                  if(source->data)
                        size = 0;
            }
            if(size > 0 && !(tab.src = (uchar*)malloc(size*sizeof(uchar))) )
                  goto reduce_window;

            /* space for the hash table itself */
            n = k/2;
            do (size = n); while((n &= n-1) != 0);
            if(size < 64)
                  size = 64;
            k += size;

            if(!(tab.hash = (int*)malloc(k*sizeof(int))) )
                  goto reduce_window;

            /* successful */
            tab.link = tab.hash+size;
            tab.size = size-1;
            break;

      reduce_window:
            if(tab.tar)
            {     free((Void_t*)tab.tar);
                  tab.tar = NIL(uchar*);
            }
            if(tab.src)
            {     free((Void_t*)tab.src);
                  tab.src = NIL(uchar*);
            }
            if((window >>= 1) <= 0)
                  return -1;
      }

      /* amount processed */
      n = 0;

      /* output magic bytes and sizes */
      k = sizeof(VD_MAGIC) - 1;
      if((*_Vdwrite)(&tab.io,(uchar*)VD_MAGIC,k) != k ||
         (*_Vdputu)(&tab.io,(ulong)n_tar) <= 0 ||
         (*_Vdputu)(&tab.io,(ulong)n_src) <= 0 ||
         (*_Vdputu)(&tab.io,(ulong)window) <= 0 )
            goto done;

      /* do one window at a time */
      while(n < n_tar)
      {     /* prepare the source string */
            if(n_src <= 0)    /* data compression */
            {     if(n <= 0)
                        tab.n_src = 0;
                  else
                  {     size = (int)HEADER(window);
                        if(target->data)
                              tab.src = tab.tar + tab.n_tar - size;
                        else  memcpy((Void_t*)tab.src,
                                     (Void_t*)(tab.tar + tab.n_tar - size),
                                     size );
                        tab.n_src = size;
                  }
            }
            else  /* data differencing */
            {     if(n < n_src)
                  {     if(window >= n_src)
                              p = 0;
                        else if((n+window) > n_src)
                              p = n_src-window;
                        else  p = n;
                        if((size = n_src-p) > (int)window)
                              size = (int)window;
                        if(source->data)
                              tab.src = (uchar*)source->data + p;
                        else if((*source->readf)(tab.src,size,p,source) != size)
                              goto done;
                        tab.n_src = size;
                  }
                  /* else use last window */
            }

            /* prepare the target string */
            if((size = (int)(n_tar-n)) > (int)window)
                  size = (int)window;
            tab.n_tar = size;
            if(target->data)
                  tab.tar = (uchar*)target->data + n;
            else
            {     size = (*target->readf)(tab.tar, size, (long)n, target);
                  if((long)size != tab.n_tar)
                        goto done;
            }

            /* reinitialize table before processing */
            for(k = tab.size; k >= 0; --k)
                  tab.hash[k] = -1;
            K_INIT(tab.quick,tab.recent,tab.rhere);

            if(vdprocess(&tab) < 0)
                  goto done;

            n += size;
      }

done:
      (void)(*_Vdflsbuf)(&tab.io);

      if(!target->data && tab.tar)
            free((Void_t*)tab.tar);
      if(tab.src && ((n_src <= 0 && !target->data) || (n_src > 0 && !source->data) ) )
            free((Void_t*)tab.src);
      if(tab.hash)
            free((Void_t*)tab.hash);

      return tab.io.here + (tab.io.next - tab.io.data);
}

Generated by  Doxygen 1.6.0   Back to index