#ifndef lint
static char SCCSid[] = "@(#) ./comm/io/piobuf.c 07/23/93";
#endif

/*
   This file contains routines to do buffered output (and later input)

   This is more scalable than the non-blocked version, but it has its
   drawbacks.
   
   Principly, it can spend a fair amount of its time waiting in receives
   for the combinations when it could be computing the next group of
   outputs.  This can cause a fair amount of sequentialization when
   a processor has no output in some buffer (it should begin writing
   the first buffer that it does use).

   Thus, the code should be restructured along the lines of

   fill my buffer (skipping over empty ones).
   Forward any buffers I'm skipping (no ops necessary)
   
   This requires frequent probing for pending messages; this shouldn't
   be too bad, since (at least in the formatted output version) there
   are lots of routine calls for each output already.

   To do this, we need to "invert" the combine operation so that we
   can put user code within it.
 */

#include <stdio.h>
#include "tools.h"
#include "comm/comm.h"
#include "comm/io/pio.h"
#ifndef SEEK_SET
#include <unistd.h>
#endif

#define READ  (*fp->read)
#define WRITE (*fp->write)
#define LSEEK (*fp->lseek)

void PIiRWriterFlush();
/* This is the structure of the buffer.  We need to know:
   The mapping from buffer to file
   where in the buffer we are
                    + Where we start when writing to the file (fp->fpos)
                    v 
   File   ----------+-------------------------------------------------
                         ^
			 + Location of buffer in file (wctx->fileloc)
			        + location of end of buffer in file 
				|                             (wctx->bufend)
			        v
   Buffers          -----+AAAAAA+-----+-----+-----+
                            ^
                            + current location in buffer (wctx->curloc) 
			      (currently ignored)

   (only one buffer == A, + work area for the combination)
 */
typedef struct {
    int bufsize;
    char *buf, *swork;
    int  fileloc, curloc, bufend;
    int  empty;
    } PIFBlock;

void PIiRWriterBlock( fp, buf, n, offset )
PIFILE *fp;
char   *buf;
int    n, offset;
{
int bsize;
int loc;
PIFBlock *wctx = (PIFBlock *)fp->disWctx;

/*
if (n == 0) {
    PIiRWriterFlush( fp );
    return;
    }
 */
if (fp->fpos > wctx->fileloc) {
    /* printf( "[%d] updateing fileloc from %d to %d\n", MYPROCID,
	    wctx->fileloc, fp->fpos ); */
    wctx->fileloc = fp->fpos;
    wctx->bufend  = wctx->fileloc + wctx->bufsize - 1;
    MEMSET( wctx->buf, wctx->bufsize, 0 );
    }

loc = *((long *)buf);
buf += sizeof(long);   /* skip over offset field */
wctx->empty = 0;
while (n > 0) {
    /* add data to current buffer; if we hit the end, do the 
       combine and write */
    while (loc > wctx->bufend) {
	/* printf( "[%d] starting buffer flush for write-ahead\n", 
	           MYPROCID ); */
	PIiRWriterFlush( fp );
	wctx->empty = 0;
	}
    
    if (loc + n > wctx->bufend) {
	bsize = wctx->bufend - loc + 1;
	if (bsize <= 0) {
	    fprintf( stderr, "[%d] error in bsize\n", MYPROCID ); 
	    fflush( stderr );
	    }
	}
    else 
	bsize = n;

    MEMCPY( wctx->buf + (loc - wctx->fileloc), buf, bsize );
    buf += bsize;
    loc += bsize;
    n   -= bsize;
    }
}

extern void PIior();

void PIiRWriterFlush( fp )
PIFILE *fp;
{
int      rootnode, bsize, err;
PIFBlock *wctx = (PIFBlock *)fp->disWctx;

/* printf( "[%d] entering flush\n", PImytid ); */
if (wctx->empty) return;

rootnode = PSROOT( fp->procset );
/* eventually, do combine to a single node */
bsize = wctx->bufsize;
if (fp->fpos > wctx->fileloc) {
    /* Only flush to fp->pos */
    bsize = fp->fpos - wctx->fileloc;
    if (bsize > wctx->bufsize)
	bsize = wctx->bufsize;
    }
/* printf( "[%d] About to do GIOR\n", MYPROCID ); fflush( stdout ); */
#if !defined(USE_HALF_OP)
GIOR( wctx->buf, (bsize + sizeof(int) - 1 ) / sizeof(int), 
      wctx->swork, fp->procset ); 
/* This doesn't quite work because multiple messages may be sent up the
   same tree from several different starts */
#else
gsetopHalfT( wctx->buf, (bsize + sizeof(int) - 1 ) / sizeof(int), 
      wctx->swork, fp->procset, sizeof(int), MSG_INT, 0, PIior ); 
#endif
if (PImytid == rootnode) {
    LSEEK( fp->fd, wctx->fileloc, SEEK_SET );
    err = WRITE( fp->fd, wctx->buf, bsize );
    if (err != bsize) {
	SETERRC(1,"Write failed" );
	}
    }
/* Zero the buffer/counters */
MEMSET( wctx->buf, 0, wctx->bufsize );
wctx->fileloc += bsize;
wctx->bufend  = wctx->fileloc + wctx->bufsize - 1;
wctx->empty   = 1;
}

void PIiRWriterBlockWait( fp, datatype, finalloc )
PIFILE *fp;
int    datatype;
{
int savepos;
PIFBlock *wctx = (PIFBlock *)fp->disWctx;

savepos  = fp->fpos;
fp->fpos = finalloc;
/* printf( "[%d] starting blockwait\n", MYPROCID ); */
while (wctx->fileloc < finalloc) {
    wctx->empty = 0;
    /* printf( "[%d] flushing for location %d in wait\n", 
	    MYPROCID, wctx->fileloc ); fflush(stdout); */
    PIiRWriterFlush( fp );
    }
    
fp->fpos = savepos;
}

PIFBlock *PIiRBlockCreate( size )
int size;
{
PIFBlock *new;

if (size == 0) size = 4096;
new = NEW(PIFBlock);   CHKPTRN(new);

new->bufsize = size;
new->buf     = MALLOC( size ); CHKPTRN(new->buf);
new->swork   = MALLOC( size ); CHKPTRN(new->swork);
MEMSET( new->buf, 0, size );

new->fileloc = 0;
new->curloc  = 0;
new->bufend  = size-1;
new->empty   = 1;

return new;
}

void PIFSetWritersBlock()
{
PIFSetWriters( PIiRWriterBlock, PIiRWriterBlockWait, 
	       PIiRWriterFlush, PIiRBlockCreate );
}





