
// reemplazar sasort por algo mas eficiente y que ordene en mem sec.

#include "basics.h"
#include "sort.h"
#include "mtf.h"
#include "bits.h"
#include "huff.h"

static void txtload (char *fname, uchar **text, uint *n)

   { int f;
     struct stat sdata;

        /* read the file into memory */
     if (stat(fname,&sdata) != 0)
        { fprintf (stderr,"Cannot stat file %s\n",fname);
	  exit(1);
        }

     *n = sdata.st_size;
     *text = (uchar*) malloc (*n+1);

     f = open (fname,O_RDONLY);
     if (f == -1)
        { fprintf (stderr,"Cannot open file %s\n",fname);
	  exit(1);
        }

     if (read(f,*text,*n) != *n)
        { fprintf (stderr,"Cannot read file %s\n",fname);
	  exit(1);
        }

        /* terminator */
     (*text)[*n] = 0;

     close (f);
   }

static int txtsave (char *fname, uchar *text, uint n, 
		int on, uchar *sorted, int alphn)

   { int f,i,siz=0;

     if ((f = creat(fname,S_IRUSR|S_IWUSR)) == -1)
        { fprintf (stderr,"Cannot create file %s\n",fname);
          exit(1);
        }

     if (write(f,&on,sizeof(int)) != sizeof(int))
        { fprintf (stderr,"Cannot write file %s\n",fname);
          exit(1);
        }
     siz += sizeof(int);

     if (write(f,&alphn,sizeof(int)) != sizeof(int))
        { fprintf (stderr,"Cannot write file %s\n",fname);
          exit(1);
        }
     siz += sizeof(int);

     if (write(f,sorted,alphn) != alphn)
        { fprintf (stderr,"Cannot write file %s\n",fname);
          exit(1);
        }
     siz += alphn;

     if (write(f,text,n) != n)
        { fprintf (stderr,"Cannot write file %s\n",fname);
          exit(1);
        }
     siz += n;

     if (close(f) != 0)
        { fprintf (stderr,"Cannot write file %s\n",fname);
          exit(1);
        }
     return siz;
   }

int writerun (int f, char *fname, uchar d)

   { static uchar z = 0;
     if (write(f,&z,sizeof(uchar)) != sizeof(uchar))
        { fprintf (stderr,"Cannot write file %s\n",fname);
          exit(1);
        }
     if (write(f,&d,sizeof(uchar)) != sizeof(uchar))
        { fprintf (stderr,"Cannot write file %s\n",fname);
          exit(1);
        }
     return 2*sizeof(uchar);
   }

int freqsave (char *fname, uchar *L, int n, uchar *sorted, int alphn)
    
   { int c,i,j,k,f,siz=0;
     ushort *freq[256];
     uint *sfreq[256];
     ushort *sptrs[256];
     uchar d,run;

     if ((f = creat(fname,S_IRUSR|S_IWUSR)) == -1)
        { fprintf (stderr,"Cannot create file %s\n",fname);
          exit(1);
        }

     for (i=0;i<alphn;i++)
	{ c = sorted[i];
	  freq[c] = malloc ((1+(n>>bitsblock))*sizeof(ushort));
	  for (j=0;j<=n>>bitsblock;j++) freq[c][j] = 0;
	  sfreq[c] = malloc ((1+(n>>bitssblock))*sizeof(uint));
	  for (j=0;j<=n>>bitssblock;j++) sfreq[c][j] = 0;
	  sptrs[c] = malloc ((1+(n>>bitssblock))*sizeof(ushort));
	}

     for (i=0;i<=n;i++) 
	{ freq[L[i]][i>>bitsblock]++;
	  sfreq[L[i]][i>>bitssblock]++;
	}

     for (i=0;i<alphn;i++)
	{ c = sorted[i];
	  for (j=0;j<=n>>bitssblock;j++) 
	    if (!sfreq[c][j] && (((j+1)<<bbitssblock)-1 <= n>>bitsblock))
	       sptrs[c][j] = 2+2; // caso especial: run de largo 256
	    else { run = 0; sptrs[c][j] = 0;
	           for (k=0;k<(1<<bbitssblock);k++)
		     { if ((j<<bbitssblock)+k > n>>bitsblock) break;
		       if (freq[c][(j<<bbitssblock)+k]) 
		          { if (run) { sptrs[c][j] += 2; run = 0; }
			    d = (uchar) freq[c][(j<<bbitssblock)+k];
			    if (d == 0) sptrs[c][j] += 2; // d = 256
		            else sptrs[c][j]++;
		          }
		       else run++;
		     }
	           if (run) sptrs[c][j] += 2; // run trunco
	         }
	}
     
     for (i=0;i<alphn;i++)
	{ c = sorted[i];
	  for (j=0;j<=n>>bitssblock;j++)
	      { if (write(f,&sfreq[c][j],sizeof(uint)) != sizeof(uint))
                   { fprintf (stderr,"Cannot write file %s\n",fname);
                     exit(1);
                   }
                siz += sizeof(uint);
	      }
	}

     for (i=0;i<alphn;i++)
	{ c = sorted[i];
	  for (j=0;j<=n>>bitssblock;j++)
	      { if (write(f,&sptrs[c][j],sizeof(ushort)) != sizeof(ushort))
                   { fprintf (stderr,"Cannot write file %s\n",fname);
                     exit(1);
                   }
                siz += sizeof(ushort);
	      }
	}

     for (i=0;i<alphn;i++)
	{ c = sorted[i];
	  for (j=0;j<=n>>bitssblock;j++)
              if (!sfreq[c][j] && (((j+1)<<bbitssblock)-1 <= n>>bitsblock))
 			   // caso especial: run de largo 256
                   { siz += writerun (f,fname,255);
                     siz += writerun (f,fname,1);
		   }
              else { run = 0;
	             for (k=0;k<(1<<bbitssblock);k++)
		         { if ((j<<bbitssblock)+k > n>>bitsblock) break;
		           if (freq[c][(j<<bbitssblock)+k]) 
		              { if (run) 
		                   { siz += writerun (f,fname,run);
			    	     run = 0;
			           }
				d = (uchar) freq[c][(j<<bbitssblock)+k];
				if (d == 0) siz += writerun (f,fname,0);
		                else { if (write(f,&d,sizeof(uchar)) != sizeof(uchar))
                                          { fprintf (stderr,"Cannot write file %s\n",fname);
                                            exit(1);
                                          }
                                       siz += sizeof(uchar);
				     }
		     	      }
		           else run++;
		         }
	             if (run) // run trunco
		        siz += writerun (f,fname,run);
		   }
	    }

     if (close(f) != 0)
        { fprintf (stderr,"Cannot write file %s\n",fname);
          exit(1);
        }
     return siz;
   }

int ptrsave (char *fname, uint *array, int n, int pblock)

   { int f,i,siz=0;

     if ((f = creat(fname,S_IRUSR|S_IWUSR)) == -1)
        { fprintf (stderr,"Cannot create file %s\n",fname);
          exit(1);
        }

     if (write(f,&pblock,sizeof(int)) != sizeof(int))
        { fprintf (stderr,"Cannot write file %s\n",fname);
          exit(1);
        }
     siz += sizeof(int);

     for (i=0;i<=(n+pblock-1)/pblock;i++)
        { if (write(f,&array[i],sizeof(int)) != sizeof(int))
             { fprintf (stderr,"Cannot write file %s\n",fname);
               exit(1);
             }
          siz += sizeof(int);
	}

     if (close(f) != 0)
        { fprintf (stderr,"Cannot write file %s\n",fname);
          exit(1);
        }
     return siz;
   }

static int sortFreqs (int *freq, uchar *sorted)
 
   { int i,j,cmax;
     int F[256];
     int n = 0;
     for (i=0;i<256;i++)
        { F[i] = freq[i]; if (F[i]) n++; }
     for (i=0;i<n;i++)
        { cmax = 0;
          for (j=1;j<256;j++)
              if (F[j] > F[cmax]) cmax = j;
          sorted[i] = cmax;
          F[cmax] = -1;
        }
     return n;
   }

main (int argc, char **argv)

   { uchar *text,*ctext;
     char cfile[1024];
     int i,cn,n,run,pblock,alphn,siz;
     uchar c;
     sarray S;
     MTF M;
     bitstream B;
     uchar sorted[256];
     struct sHT treec[512],treer[512];
     int mtfreqsc[256],mtfreqsr[256];
     struct shcode mtcodec[256],mtcoder[256];
     
     if (argc != 3)
	{ fprintf(stderr,"Usage: bwtc <textfile> <extra %% for pointers>\n");
	  exit(1);
	}

     txtload (argv[1],&text,&n);
     if (atof(argv[2]) <= 0.0) pblock = n+1;
     else pblock = 100*sizeof(int)/(float)atof(argv[2]);
     if (pblock == 0) pblock = 1;

     S = sacreate (text,n,pblock);
     free (text);
     alphn = sortFreqs (S.freq,sorted);

	// 1era pasada para contar frecuencias
     M = initMTF (sorted,alphn);
     for (i=0;i<256;i++) mtfreqsc[i] = mtfreqsr[i] = 0;
 
     run = 0;
     for (i=0;i<=n;i++)
	{ if (!(i % block)) 
	     { reinitMTF(M);
	       if (run) 
		  { mtfreqsc[0]++;
		    mtfreqsr[run-1]++;
		    run = 0; 
		  }
	     }
	  c = findMTF(M,S.L[i]);
	  if ((c == 0) && (run<256)) run++;
	  else 
	     { if (run) 
		  { mtfreqsc[0]++;
		    mtfreqsr[run-1]++;
		    run = 0; 
		  }
	       if (c == 0) run = 1;
	       else mtfreqsc[c]++;
	     }
	}
     if (run) 
	{ mtfreqsc[0]++;
	  mtfreqsr[run-1]++;
	}

     createHuffCodes (mtfreqsc,treec,mtcodec);
     createHuffCodes (mtfreqsr,treer,mtcoder);

	// ahora codificar

     B = createbits (2*(512+256*8)+2*8*(n+1));
     writeHuffTree (treec,B);
     writeHuffTree (treer,B);

     run = 0;
     for (i=0;i<=n;i++)
	{ if (!(i % block)) 
	     { reinitMTF(M);
	       if (run) 
		  { codeHuff (&mtcodec[0],B);
		    codeHuff (&mtcoder[run-1],B);
		    run = 0; 
		  }
	     }
	  c = findMTF(M,S.L[i]);
	  if ((c == 0) && (run < 256)) run++;
	  else 
	     { if (run) 
		  { codeHuff (&mtcodec[0],B);
		    codeHuff (&mtcoder[run-1],B);
		    run = 0; 
		  }
	       if (c == 0) run = 1;
	       else codeHuff (&mtcodec[c],B); 
	     }
	}
     if (run) 
	{ codeHuff (&mtcodec[0],B);
	  codeHuff (&mtcoder[run-1],B);
	}

     ctext = closebits (B,&cn);

     sprintf (cfile,"%s.bwt",argv[1]);
     siz = txtsave (cfile,ctext,cn,n,sorted,alphn);

	// almacenar los conteos de frecuencias
     sprintf (cfile,"%s.bwf",argv[1]);
     siz += freqsave (cfile,S.L,n,sorted,alphn);

	// almacenar el inverse map del suffix array
     sprintf (cfile,"%s.bwp",argv[1]);
     siz += ptrsave (cfile,S.invmap,n,pblock);

     fprintf (stderr,"Compressed, ratio = %0.2f%%\n",100.0*siz/(float)n);

     safree (S);
     exit(0);
   }

