/*
_____       _    _    Corso   Italia,  178
(_|__   .  (_   |_|_  56125           Pisa
(_|_) |)|(()_)()| |   tel.  +39  050 46380
  |   |               picosoft@picosoft.it

 Copyright (C) Picosoft s.r.l. 1995-2002

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2, or (at your option)
 any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

*/
# include "sort.h" 
static char rcsid[] = "$Id: sort.cpp 5.4 2001/04/18 15:30:01 picoSoft Exp Marco $";
static char rcsidh[] = sort_h;

# include "perror.h"
extern "C" {
# include <string.h>
# include <errno.h>
# include "isam.h"
# include "picoend.h"
# ifdef MSDOS
# include <windows.h>
# ifdef WIN32
# define open(a,b,c) _lopen(a, OF_READ|OF_SHARE_COMPAT)
# else /* WIN32 */
# define open(a,b,c) _lopen(a, READ|OF_SHARE_COMPAT)
# endif /* WIN32 */
# define creat       _lcreat
# define read        _lread
# define lseek       _llseek
# define write       _lwrite
# define close       _lclose
# else /* MSDOS */
extern "C" {
# include <unistd.h>
# include <fcntl.h>
}
# endif /* MSDOS */
}

# ifdef WIN32
static const size_t BUFFERALLOC =  256000;
# else
# ifdef MSDOS
static const size_t BUFFERALLOC =  64000;
# else
static const size_t BUFFERALLOC =  256000;
# endif //MSDOS
# endif //WIN32
static const int NMERGE = 8;
static const size_t MAXELEM = BUFFERALLOC / sizeof (char *); 
             
int
StringCompare (register unsigned char * c1, unsigned int len1,
               register unsigned char * c2, unsigned int len2, 
                        unsigned char *coll, unsigned char fill)
{
   register unsigned int i;
   unsigned int minLen = (len1 < len2) ? len1 : len2;
   
   if (coll == (unsigned char*) 0) {
      for (i = 0; i < minLen; i++, c1++, c2++)
         if (*c1 != *c2)
            return *c1 - *c2;
      if (len1 < len2) {
         for ( ; i < len2; i++, c2++)
            if (fill != *c2)
               return fill - *c2;
      } else {
         for ( ; i < len1; i++, c1++)
            if (*c1 != fill)
               return *c1 - fill;
      }
   } else {
      for (i = 0; i < minLen; i++, c1++, c2++)
         if (*c1 != *c2)
            return coll[*c1] - coll[*c2];
      if (len1 < len2) {
         for ( ; i < len2; i++, c2++)
            if (fill != *c2)
               return coll[fill] - coll[*c2];
      } else {
         for ( ; i < len1; i++, c1++)
            if (*c1 != fill)
               return coll[*c1] - coll[fill];
      }
   }
   return 0;
}

inline void
OrdCriteria::prepareDate()
{
   if (dateFmt) {
      dateLen = strlen (dateFmt);
      dateBuffer = new char[dateLen + 1];
      dateBuffer[dateLen] = 0;
      cfrt1 = new TIMESTAMP_STRUCT;
      memset (cfrt1, 0, sizeof(TIMESTAMP_STRUCT));
      cfrt2 = new TIMESTAMP_STRUCT;
      memset (cfrt2, 0, sizeof(TIMESTAMP_STRUCT));
   }
}

PCLASSID(Sort)

SortFile::SortFile (int len)
{
   Buffer = new char[length = len];
   fd = -1;
   FileName[0] = 0;
}

SortFile::~SortFile ()
{
   if (fd >= 0)
      Close();
   if (Buffer)
      delete Buffer;
   if (FileName[0])
      unlink(FileName);
} 

int
SortFile::Create ()
{
# ifdef MSDOS
# ifdef WIN32
   GetTempPath (sizeof(FileName), FileName);
   GetTempFileName (FileName, "srt", 0, FileName);
# else
   GetTempFileName (GetTempDrive(0), "srt", 0, FileName);
# endif // WIN32
# else
   strncpy (FileName, tempnam (0, "srt"), sizeof(FileName) - 1);
   FileName[sizeof(FileName) - 1] = 0;
# endif // MSDOS
   errno = ENOENT;
   Status = 0;
# ifdef MSDOS
   fd = creat(FileName, 0);
# else
   fd = creat(FileName, 0777);
# endif
   if (fd < 0)
      Status = errno;
   return Status;
}
       
int
SortFile::OpenRead()
{   
   errno = ETOOMANY;
   Status = 0;
   offsetRead = 0;
   prevDisp = length << 1;
   fd = open (FileName, O_RDONLY, 0666);
   if (fd < 0)
      Status = errno;
   return Status;
} 

void
SortFile::Close ()
{
   Status = 0;
   if (fd >= 0) {
      close (fd);
      fd = -1;
   }
}

int
SortFile::Write (char *l)
{
   errno = ENOSPC;
   Status = 0;
   if (fd >= 0) {
      if (write (fd, l, length) < length)
         Status = errno;
   } else
      Status = ENOTOPEN;
   return Status;
}

int
SortFile::Read ()
{
   errno = EENDFILE;
   Status = 0;
   if (fd >= 0) {
      if (read (fd, Buffer, length) < length) {
         Status = errno; 
         prevDisp = length;
      } else {
         prevDisp = length << 1;
         offsetRead += length;
      }
   } else 
      Status = ENOTOPEN;
   return Status;
}
 
int
SortFile::ReadPrev ()
{
   errno = EENDFILE;
   Status = 0;
   if (fd >= 0) {
      if (offsetRead >= prevDisp) {
         offsetRead -= prevDisp;
         lseek (fd, -prevDisp, 1);
         prevDisp = length << 1;
         if (read (fd, Buffer, length) < length)
            Status = errno;
         else
            offsetRead += length;
      } else {
         SeekStart();
         Status = EENDFILE;
      }
   } else 
      Status = ENOTOPEN;
   return Status;
}

int
SortFile::SeekStart ()
{
   errno = EBADFILE;
   Status = 0;
   if (fd >= 0) {
      prevDisp = length << 1;
      offsetRead = lseek (fd, 0L, 0);
   } else 
      Status = ENOTOPEN;
   return Status;
}
 
int
SortFile::SeekEnd ()
{
   errno = EBADFILE;
   Status = 0;
   if (fd >= 0) {
      prevDisp = length;
      offsetRead = lseek (fd, 0L, 2);
   } else 
      Status = ENOTOPEN;
   return Status;
}                     

Sort::Sort (int len, OrdCriteriaCopyList &ord):
Order (ord)
{  
   OrdCriteria *oc;
   LineLen = len;
   Status = 0;
   
   for (oc = ord.GetFirst(); oc; oc = ord.GetNext())
      oc->prepareDate();

   if ((MaxElem = BUFFERALLOC / len) > MAXELEM)
      MaxElem = MAXELEM;
   CurOffset = IntBuffer = new char[MaxElem * len];
   if (IntBuffer == NULL) {
      PError::InternalError ("Cannot allocate buffer for sort");
      Status = EBADMEM;
   }
   Array = new char*[MaxElem];
   if (Array == NULL) {
      PError::InternalError ("Cannot allocate array for sort");
      Status = EBADMEM;
   }
   NumElem = 0;
   ReadElem = -1;
   TotElem = 0;
   FinalFile = NULL;
   Sorted = PFalse;
}

Sort::~Sort ()
{
  SortFile *sf;
  
  for (sf = Files.GetFirst(); sf; sf = Files.DeleteCurrent ()) {
     sf->Close ();
     delete sf;
  }
  delete IntBuffer;
  delete Array;
}

int
Sort::Write (char *line)
{
   int Return = 0;
   if (NumElem < MaxElem) {
      memcpy (CurOffset, line, LineLen);
      Array[NumElem++] = CurOffset;
      CurOffset += LineLen;
   } else {
      Sorting();
      Return = WriteBuffer();
      if (Return == 0) {
         CurOffset = IntBuffer;
         NumElem = 0;
         Return = Write (line);
      }
   }
   TotElem++;
   return Return;  
}

/*   
void
Sort::Sorting ()
{   
   int   s, i, j, num;
   char  *temp;

   num = NumElem;
   for (s = num / 2; s > 0; s /= 2)
      for (i = s; i < num; i++)
         for (j = i - s; j >= 0; j -= s)
            if (Compare(Array[j], Array[j+s]) > 0) {
               temp = Array[j];
               Array[j] = Array[j+s];
               Array[j+s] = temp;
            }
}
*/
inline int IsPosChar(char c)
{
   switch (c) {
   case '+':
   case '0':
   case '1':
   case '2':
   case '3':
   case '4':
   case '5':
   case '6':
   case '7':
   case '8':
   case '9':
   case '{':
   case 'A':
   case 'B':
   case 'C':
   case 'D':
   case 'E':
   case 'F':
   case 'G':
   case 'H':
   case 'I':
      return PTrue;
   }
   return PFalse;
}
                           
static int negSign1,negSign2; 
int
Sort::Compare (unsigned char *l1, unsigned char *l2)
{
   OrdCriteria *oc;
   int Return = 0; 
   
   for (oc = Order.GetFirst(); oc; oc = Order.GetNext()) {
      switch (oc->type) {
      case T_CSTRING:
         Return = StringCompare (&l1[oc->offs], oc->len,
                                 &l2[oc->offs], oc->len, 
                                 oc->coll, '\0');
         break;
      case T_BLOB:
         Return = 0; // I blob non vengono ordinati!
         break;
      case T_CLOB:
         Return = 0; // I clob non vengono ordinati?
         break;
      case T_PACKED_ORDERED:
         Return = memcmp (&l1[oc->offs], &l2[oc->offs], oc->len);
         break;
      case T_SHORT:
         if (*((short *) &l1[oc->offs]) == NULLSHORT)
               if (*((short *) &l2[oc->offs]) == NULLSHORT)
                  Return = 0;
               else
                  Return = -1;
            else
               if (*((short *) &l2[oc->offs]) == NULLSHORT)
                  Return = 1;
               else
                  if (oc->isSigned)
                     Return = *((short *) &l1[oc->offs]) - *((short *) &l2[oc->offs]);
                  else
                     if (*((unsigned short *) &l1[oc->offs]) >
                         *((unsigned short *) &l2[oc->offs]))
                        Return = 1;
                     else if (*((unsigned short *) &l1[oc->offs]) <
                              *((unsigned short *) &l2[oc->offs]))
                        Return = -1;
                     else 
                        Return = 0;
         break; 
      case T_LONG:
         if (*((long *) &l1[oc->offs]) == NULLLONG)
            if (*((long *) &l2[oc->offs]) == NULLLONG)
               Return = 0;
            else
               Return = -1;
         else               
            if (*((long *) &l2[oc->offs]) == NULLLONG)
               Return = 1;
            else 
               if (oc->isSigned)
                   if (*((long *) &l1[oc->offs]) >
                       *((long *) &l2[oc->offs]))
                      Return = 1;
                   else if (*((long *) &l1[oc->offs]) <
                            *((long *) &l2[oc->offs]))
                      Return = -1;
                   else 
                      Return = 0;
               else
                   if (*((unsigned long *) &l1[oc->offs]) >
                       *((unsigned long *) &l2[oc->offs]))
                      Return = 1;
                   else if (*((unsigned long *) &l1[oc->offs]) <
                            *((unsigned long *) &l2[oc->offs]))
                      Return = -1;
                   else 
                      Return = 0;
         break;
      case T_FLOAT:
         if (*((float *) &l1[oc->offs]) == NULLFLOAT)
            if (*((float *) &l2[oc->offs]) == NULLFLOAT)
               Return = 0;
            else
               Return = -1;
         else               
            if (*((float *) &l2[oc->offs]) == NULLFLOAT)
               Return = 1;
            else
               if (*((float *) &l1[oc->offs]) >
                   *((float *) &l2[oc->offs]))
                  Return = 1;
               else if (*((float *) &l1[oc->offs]) <
                        *((float *) &l2[oc->offs]))
                  Return = -1;
               else 
                  Return = 0;
         break;
      case T_DOUBLE:
         if (*((double *) &l1[oc->offs]) == NULLDOUBLE)
            if (*((double *) &l2[oc->offs]) == NULLDOUBLE)
               Return = 0;
            else
               Return = -1;
         else               
            if (*((double *) &l2[oc->offs]) == NULLDOUBLE)
               Return = 1;
            else
               if (*((double *) &l1[oc->offs]) >
                   *((double *) &l2[oc->offs]))
                  Return = 1;
               else if (*((double *) &l1[oc->offs]) <
                        *((double *) &l2[oc->offs]))
                  Return = -1;
               else 
                  Return = 0;
         break;
      default:
         PError::InternalError ("Invalid type in Compare");
         break;
      }
      if (Return) {
         if (oc->isDesc)
            Return = -Return;
         break;
      }
   }
   return Return;
}

int
Sort::MakeSort ()
{ 
   int Return = 0;
   Status = 0;
   Sorting();
   if (Files.GetItemNum() > 0) {
      while (Status == 0 && !(Files.GetItemNum() == 1 && NumElem == 0))
         Status = Merge();
      FinalFile = Files.GetFirst ();
      Return = FinalFile->OpenRead ();
   }
   Sorted = PTrue;
   return Return;
}

int
Sort::Read (char *line)
{ 
   int Return = 0;
   if (FinalFile == 0)
      if (ReadElem < NumElem - 1)
         memcpy (line, Array[++ReadElem], LineLen);
      else {
         ReadElem = NumElem;
         Return = EENDFILE;
      }
   else {
      Return = FinalFile->Read ();
      if (Return == 0)
         memcpy (line, FinalFile->Buffer, LineLen);
   }   
   return Return;
}

int
Sort::ReadPrev (char *line)
{ 
   int Return = 0;
   if (FinalFile == 0)
      if (ReadElem > 0)
         memcpy (line, Array[--ReadElem], LineLen);
      else {
         ReadElem = -1;
         Return = EENDFILE;
      }
   else {
      Return = FinalFile->ReadPrev ();
      if (Return == 0)
         memcpy (line, FinalFile->Buffer, LineLen);
   }   
   return Return;
}

int
Sort::SeekStart ()
{ 
   int Return = 0;
   if (Sorted == PFalse)
      MakeSort();
   else
      if (FinalFile == 0)
         ReadElem = -1;
      else
         Return = FinalFile->SeekStart ();
   return Return;
}

int
Sort::SeekEnd ()
{ 
   int Return = 0;
   if (Sorted == PFalse)
      MakeSort();
   if (FinalFile == 0)
      ReadElem = NumElem;
   else
      Return = FinalFile->SeekEnd ();
   return Return;
}
 
SortFile *
Sort::Create ()
{             
   SortFile *Return = new SortFile (LineLen);
    
   Status = Return->Create();
   if (Status != 0) {
      delete Return;
      Return = 0;
   }
   return Return;
}

int
Sort::WriteBuffer ()
{
   register int i;
   SortFile *sf;
   Status = 0;
   
   if (Files.GetItemNum() == NMERGE)
      while (Status == 0 && !(Files.GetItemNum() == 1 && NumElem == 0))
         Status = Merge ();
   else {
      sf = Create();
      if (sf != 0) {
         Files.AddItem (sf);
         errno = 0;
         for (i = 0; i < NumElem && Status == 0; i++)
            Status = sf->Write (Array[i]);
         sf->Close ();
      } else
         Status = errno;
   }
   return Status;
}

int
Sort::Merge ()
{
   Status = 0;
   SortFile *sf, *minSf, *newFile;
   char * minLine;
   int nElem = 0;
   int nFile, i;
   
   newFile = Create(); 
   if (newFile == 0)
      return Status;
   for (nFile = 0, sf = Files.GetFirst ();
        sf && Status == 0;
        nFile++  , sf = Files.GetNext())
      Status = sf->OpenRead();
   if (Status != 0)
      nFile--;
   if (NumElem == 0 && nFile < 2 || nFile < 1) {
      delete newFile;
      return Status;
   } else
      Status = 0;
   for (i = 0, sf = Files.GetFirst ();
        i < nFile && sf && Status == 0;
        i++  , sf = Files.GetNext())
      Status = sf->Read();
   if (Status != 0) {
      delete newFile;
      return Status;
   }
   do {
      if (nElem < NumElem)
         minLine = Array[nElem];
      else
         minLine = 0;
      minSf = 0;
      for (i = 0, sf = Files.GetFirst (); i < nFile && sf; i++)
         if (sf->GetStatus() == 0) {
            if ( minLine != 0) {
               if (Compare ((unsigned char*)minLine, (unsigned char*)sf->Buffer) > 0) {
                  minLine = sf->Buffer;
                  minSf = sf;
               }
            } else  {
               minLine = sf->Buffer;
               minSf = sf;
            }
            sf = Files.GetNext();
         } else {
            delete sf;
            sf = Files.DeleteCurrent ();
            nFile--;
            i--;
         }
      if (minLine) {
         newFile->Write (minLine);
         if (minSf)
            minSf->Read();
         else
            nElem++;
      }
   } while (minLine != 0);
   if (nFile == 1) {
      sf = Files.GetFirst ();
      Files.DeleteCurrent();
      delete sf;
      nFile--;
   }
   Files.AddItem (newFile);
   newFile->Close();
   NumElem = 0;
   return Status;
}

/*
 * MTHRESH is the smallest partition for which we compare for a median
 * value instead of using the middle value.
 */
static const int MTHRESH = 6;

/*
 * THRESH is the minimum number of entries in a partition for continued
 * partitioning.
 */
static const int THRESH = 4;

void
Sort::Sorting ()
{                   
   if (&Order == 0 || Order.GetFirst() == 0)
      return;
   if (NumElem <= 1)
      return;

   if (NumElem >= THRESH)
      QuickSort((char *) Array, NumElem);
   else
      InsertionSort((char *) Array, NumElem);
}

#define _compare(a,b) Compare(*((unsigned char **)a), *((unsigned char **)b))

/*
 * Swap two areas of size number of bytes.  Although qsort(3) permits random
 * blocks of memory to be sorted, sorting pointers is almost certainly the
 * common case (and, were it not, could easily be made so).  Regardless, it
 * isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
 * arithmetic gets lost in the time required for comparison function calls.
 */
#define  SWAP(a, b) { \
   cnt = size; \
   do { \
      ch = *a; \
      *a++ = *b; \
      *b++ = ch; \
   } while (--cnt); \
}

/*
 * Knuth, Vol. 3, page 116, Algorithm Q, step b, argues that a single pass
 * of straight insertion sort after partitioning is complete is better than
 * sorting each small partition as it is created.  This isn't correct in this
 * implementation because comparisons require at least one (and often two)
 * function calls and are likely to be the dominating expense of the sort.
 * Doing a final insertion sort does more comparisons than are necessary
 * because it compares the "edges" and medians of the partitions which are
 * known to be already sorted.
 *
 * This is also the reasoning behind selecting a small THRESH value (see
 * Knuth, page 122, equation 26), since the quicksort algorithm does less
 * comparisons than the insertion sort.
 */
#define  SORT(bot, n) { \
   if (n > 1) \
      if (n == 2) { \
         t1 = bot + size; \
         if (_compare(t1, bot) < 0) \
            SWAP(t1, bot); \
      } else \
         InsertionSort(bot, n); \
}

void
Sort::QuickSort(char *bot, int nmemb)
{
   register int cnt;
   register unsigned char ch;
   register char *top, *mid, *t1, *t2;
   register int n1, n2;
   char *bsv;
   register int size = sizeof (char *);

   /* bot and nmemb must already be set. */
partition:

   /* find mid and top elements */
   mid = bot + size * (nmemb >> 1);
   top = bot + (nmemb - 1) * size;

   /*
    * Find the median of the first, last and middle element (see Knuth,
    * Vol. 3, page 123, Eq. 28).  This test order gets the equalities
    * right.
    */
   if (nmemb >= MTHRESH) {
      n1 = _compare(bot, mid);
      n2 = _compare(mid, top);
      if (n1 < 0 && n2 > 0)
         t1 = _compare(bot, top) < 0 ? top : bot;
      else if (n1 > 0 && n2 < 0)
         t1 = _compare(bot, top) > 0 ? top : bot;
      else
         t1 = mid;

      /* if mid element not selected, swap selection there */
      if (t1 != mid) {
         SWAP(t1, mid);
         mid -= size;
      }
   }

   /* Standard quicksort, Knuth, Vol. 3, page 116, Algorithm Q. */
#define  didswap  n1
#define  newbot   t1
#define  replace  t2
   didswap = 0;
   for (bsv = bot;;) {
      for (; bot < mid && _compare(bot, mid) <= 0; bot += size);
      while (top > mid) {
         if (_compare(mid, top) <= 0) {
            top -= size;
            continue;
         }
         newbot = bot + size; /* value of bot after swap */
         if (bot == mid)      /* top <-> mid, mid == top */
            replace = mid = top;
         else {         /* bot <-> top */
            replace = top;
            top -= size;
         }
         goto swap;
      }
      if (bot == mid)
         break;

      /* bot <-> mid, mid == bot */
      replace = mid;
      newbot = mid = bot;     /* value of bot after swap */
      top -= size;

swap:    SWAP(bot, replace);
      bot = newbot;
      didswap = 1;
   }

   /*
    * Quicksort behaves badly in the presence of data which is already
    * sorted (see Knuth, Vol. 3, page 119) going from O N lg N to O N^2.
    * To avoid this worst case behavior, if a re-partitioning occurs
    * without swapping any elements, it is not further partitioned and
    * is insert sorted.  This wins big with almost sorted data sets and
    * only loses if the data set is very strangely partitioned.  A fix
    * for those data sets would be to return prematurely if the insertion
    * sort routine is forced to make an excessive number of swaps, and
    * continue the partitioning.
    */
   if (!didswap) {
      InsertionSort(bsv, nmemb);
      return;
   }

   /*
    * Re-partition or sort as necessary.  Note that the mid element
    * itself is correctly positioned and can be ignored.
    */
#define  nlower   n1
#define  nupper   n2
   bot = bsv;
   nlower = (mid - bot) / size;  /* size of lower partition */
   mid += size;
   nupper = nmemb - nlower - 1;  /* size of upper partition */

   /*
    * If must call recursively, do it on the smaller partition; this
    * bounds the stack to lg N entries.
    */
   if (nlower > nupper) {
      if (nupper >= THRESH)
         QuickSort(mid, nupper);
      else {
         SORT(mid, nupper);
         if (nlower < THRESH) {
            SORT(bot, nlower);
            return;
         }
      }
      nmemb = nlower;
   } else {
      if (nlower >= THRESH)
         QuickSort(bot, nlower);
      else {
         SORT(bot, nlower);
         if (nupper < THRESH) {
            SORT(mid, nupper);
            return;
         }
      }
      bot = mid;
      nmemb = nupper;
   }
   goto partition;
   /* NOTREACHED */
}

void
Sort::InsertionSort(char * bot, int nmemb)
{
   register int cnt;
   register unsigned char ch;
   register char *s1, *s2, *t1, *t2, *top;
   register int size = sizeof(char *);

   /*
    * A simple insertion sort (see Knuth, Vol. 3, page 81, Algorithm
    * S).  Insertion sort has the same worst case as most simple sorts
    * (O N^2).  It gets used here because it is (O N) in the case of
    * sorted data.
    */
   top = bot + nmemb * size;
   for (t1 = bot + size; t1 < top;) {
      for (t2 = t1; (t2 -= size) >= bot && _compare(t1, t2) < 0;);
      if (t1 != (t2 += size)) {
         /* Bubble bytes up through each element. */
         for (cnt = size; cnt--; ++t1) {
            ch = *t1;
            for (s1 = s2 = t1; (s2 -= size) >= t2; s1 = s2)
               *s1 = *s2;
            *s1 = ch;
         }
      } else
         t1 += size;
   }
}

