/* "cleave.c" copyright 1993-2006 by Franklin Webber
 * This source code may be freely copied if it is not modified.  It may
 * be modified if this copyright notice (the first 7 lines of the file)
 * is included unchanged at the beginning of every modified version.
 * The author requests that you send a copy of any modified version to him
 * at Franklin.Webber@computer.org .
 */
#define THE_VERSION "1C"
#define THE_DATE    "2006 May 1"

/* This program has two functions, inverses of each other:
 * (1) splitting a file into separate files each no larger than some
 * maximum size, and  (2) rejoining the separate files into a whole.
 * (The word "cleave" describes both splitting and joining.)
 *
 * To compile:
 *   under DOS: "tcc cleave.c"
 *   under Win: "bcc32 cleave.c"
 *   under Unix: "gcc -o cleave cleave.c"
 *
 * To execute:
 *   "cleave {file} [-] [-v] [-o] [-{size}]"
 * where
 *     [-] indicates joining instead of splitting
 *     [-v] enables verbose mode
 *     [-o] allows overwriting of existing files
 *     [-{size}] is the maximum size of each file part in bytes
 *
 * The name of the original whole file is given on the command line.
 * The names of its separate part files are generated by replacing 
 * its filename extension with a three digit sequence number.
 * So, "foo.ext" is split into parts "foo.000", "foo.001", etc.
 * "foo.000" is called the index file and contains control information;
 * the other part files contain the data.
 *
 * cleave overwrites the original extension in the part file names
 * because cleave is meant to work the same way on every OS,
 * including DOS, which won't allow arbitrary length extensions.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Define booleans.
 */
typedef int Bool;
#define False     0
#define True      1
#define Or       ||
#define And      &&
#define Not       !

/* Declare command-line switches.
 */
Bool splitting = True;       /* True => splitting; False => joining */
Bool verbose = False;        /* True => verbose; False => quiet */
Bool overwrite = False;      /* True => overwrite; False => abort */

/* Declare file names.
 */
char* cmdName = NULL;
char* wholeName = NULL;
char* partName = NULL;
char* indexName = NULL;

/* Declare file streams.
 */
FILE* wholeFile = NULL;
FILE* partFile = NULL;
FILE* indexFile = NULL;

/* Declare the maximum size of each part file.
 */
#define PARTSIZE 100000
long partSize = PARTSIZE;

/* Declare the maximum size of the internal buffer.
 */
#define BLOCKSIZE 16384
char block [BLOCKSIZE]; 

/* Define error return macros.
 */
#define EXIT_(err)            { fprintf (stderr,"\n"); exit (err); }
#define EXIT0_(str,err)       { fprintf (stderr,str); EXIT_(err); }
#define EXIT1_(str,a1,err)    { fprintf (stderr,str,a1); EXIT_(err); }

/* Give help on how to invoke the program.
 */
#define USAGE_EXIT_(str) { PutUsage (); EXIT0_(str,1) }

static void PutUsage (void)
{
    fprintf (stderr, "copyright 1993-2006 by Franklin Webber\n");
    fprintf (stderr, "version %s of %s\n", THE_VERSION, THE_DATE);
    fprintf (stderr, "usage: cleave {file} [-] [-v] [-o] [-{size}]\n");
    fprintf (stderr, "where\n");
    fprintf (stderr, "    [-] indicates joining instead of splitting\n");
    fprintf (stderr, "    [-v] enables verbose mode\n");
    fprintf (stderr, "    [-o] allows overwriting of existing files\n");
    fprintf (stderr, "    [-{size}] specifies the maximum size ");
    fprintf (stderr, "of each file part in bytes\n");
}

/* Return the number of bytes in a file name before the extension.
 * For example, if name is "foo.bar.ext" then return 7,
 * if name is "foo.ext" then return 3, and
 * if name is just "foo" then still return 3 (the extension is implicit).
 * The file name before the extension is the base from which part file names
 * are constructed by encoding a version number in the filename extension.
 */
static int Measure (char* name)
{
    char* ptr = name + strlen (name);
    char* save = ptr;
    while (ptr--> name)
        if (*ptr == '.')
            { save = ptr; break; }
    return save - name;
}

/* Allocate space on the heap for part file names derived from the
 * given whole file name, where length is the number of bytes in the base.
 * Copy the base to the new space.  Allow enough extra room
 * for a '.', a 3 byte extension, and a null byte.
 * Return a pointer to the new space.
 */
static char* Derive (char* name, int length)
{
    char* space = (char*) malloc (length + 5);
    strncpy (space, name, length);
    return space;
}

/* Open a file and handle errors.
 * If writing is True, try opening for read first to see if file exists.
 * If it does exist already, overwrite it if overwrite is True, else abort.
 */
static FILE* Open (char* name, Bool writing)
{
    FILE* fp = fopen (name, "rb");
    if (writing)
    {
        if (fp)
        {
            if (overwrite)
                fclose (fp);
            else
                EXIT1_("cleave: cannot overwrite file %s", name, 2)
        }
        fp = fopen (name, "wb");
    }
    if (Not fp)
        EXIT1_("cleave: cannot open file %s", name, 3)
    return fp;
}

/* If size is positive, copy at most size bytes from the in file to the 
 * out file.  If size is negative, copy until the end of file.
 * Return the number of bytes copied.
 */
static long Copy (FILE* in, FILE* out, long size)
{
    Bool eof = False;
    long ask, got, put, total = 0;
    while (Not eof And size != 0)
    {
        if (0 < size And size < BLOCKSIZE) ask = size;
        else                               ask = BLOCKSIZE;
        got = fread (block, 1, ask, in);
        if (got != ask)
        {
            eof = feof (in);
            if (Not eof)
                EXIT0_("cleave: cannot read from input file", 4)
        }
        put = fwrite (block, 1, got, out);
        if (put != got)
            EXIT0_("cleave: cannot write to output file", 5)
        total += got;
        if (size > 0)
            size -= got;
    }
    return total;
}

/* Interpret the command-line parameters and initialize globals.
 * InitializeSwitch interprets each parameter preceded by '-' and
 * Initialize interprets the whole command line.
 */
Bool sizeSpecified = False;

static void InitializeSwitch (char* str)
{
    if (Not strcmp (str, ""))
        splitting = False;
    else if (Not strcmp (str, "v"))
        verbose = True;
    else if (Not strcmp (str, "o"))
        overwrite = True;
    else
        if (sizeSpecified)
            USAGE_EXIT_(">> you may specify at most one size")
        else if (sscanf (str, "%ld", &partSize) != 1)
            USAGE_EXIT_(">> you may specify only an integer size")
        else
            sizeSpecified = True;
}

static void Initialize (int argc, char* argv [])
{
    sizeSpecified = False;

    if (argc < 2)
        USAGE_EXIT_(">> you must specify a file")
    cmdName = argv [1];

    while (argc--> 2)
        if (argv [argc] [0] == '-')
            InitializeSwitch (argv [argc] + 1);
        else
            USAGE_EXIT_(">> you may specify only one file")

    if (partSize <= 0)
        USAGE_EXIT_(">> you may specify only a positive size")
}

/* The main function.
 * Initialize, open files, and copy while generating new file names.
 */
int main (int argc, char* argv [])
{
    int length, number, parts;
    long copied;

    Initialize (argc, argv);

    /* Derive the whole, part, and index names from the file name
     * in the command line args.
     * The whole file name is the same as the filename in the command line.
     */
    length = Measure (cmdName);
    wholeName = cmdName;
    partName = Derive (cmdName, length);
    indexName = Derive (cmdName, length);

    /* Open the whole file.
     */
    wholeFile = Open (wholeName, Not splitting);

    /* Open the index file.
     * Use ext .000
     */
    sprintf (indexName+length, ".%03d", 0);
    indexFile = Open (indexName, splitting);

    /* If joining, read the index file.
     * Find out how many parts exist, excluding the index part.
     */
    if (Not splitting)
    {
        if (fscanf (indexFile, "%d", &parts) != 1)
            EXIT1_("cleave: cannot read index in file %s", indexName, 6)
        if (verbose)
            printf ("%d parts => whole\n", parts+1);
    }

    /* Announce the index after announcing how many parts to join.
     */
    if (verbose)
        printf ("%s\n", indexName);

    /* Copy each part.
     */
    number = 0;
    for (;;)
    {
        if (++number > 999)
            EXIT0_("cleave: cannot generate more filenames", 8)

        if (splitting)
            parts = number;
        else if (number > parts)
            break;

        /* Open the part file.
         * Complete the filename of the current part file
         * using the current 3-digit sequence number.
         */
        sprintf (partName+length, ".%03d", number);
        partFile = Open (partName, splitting);
        if (verbose)
            printf ("%s\n", partName);

        /* Copy from whole to part if splitting,
         * or from part to whole if joining.
         * Change to a new part file each time the current one is either
         * at its maximum size (splitting)
         * or is completely copied (joining).
         * If no bytes were copied into the final part file,
         * delete the file and decrement the number of parts
         * (this step is not really necessary but avoids having an empty
         *  part file if the part size divides the whole file exactly).
         */
        if (splitting)
        {
            copied = Copy (wholeFile, partFile, partSize);
            if (copied < partSize)
            {
                fclose (partFile);
                if (copied == 0)
                {
                    (void) remove (partName);
                    parts--;
                }
                break;
            }
        }
        else
            (void) Copy (partFile, wholeFile, (long) -1);
        fclose (partFile);
    }

    /* If splitting, record how many parts exist, excluding the index part.
     * Also record the whole file name.
     */
    if (splitting)
    {
        if (fprintf (indexFile, "%d\n", parts) < 0 Or
            fprintf (indexFile, "%s\n", wholeName) < 0)
            EXIT1_("cleave: cannot write index in file %s", indexName, 7)
        if (verbose)
            printf ("whole => %d parts\n", parts+1);
    }

    fclose (wholeFile);
    fclose (indexFile);
    return 0;
}
