/*
 ************************************************************************
 *
 *			BIBLIOGRAPHY DATA BASE
 *		Parse the BEN SSSR internal tape format
 *
 * Tape format
 *	Every document (bibliograhic description) is made of 3 to 5 records
 *	of variable length (not exceeding 1650 bytes) terminated with LF.
 *	All records have the 10-digit identification decimal number (in ASCII)
 *	in their first 10 bytes, all records pertaining to one document
 *	having the same identification number. Documents must have the
 *      consecutive numbers.
 *
 *	The first record of the group contains bibliographic data and consists
 *	of the 25 fields (terminated with '#' char). Blanks are permitted but
 *	ignored (with except of the first field which may not have any 
 *	blanks). The fields are of variable length not exceeding 255 bytes.
 *	All fields must be present in fixed order (When there are no
 *	information for the particular field, it contains some blanks).
 *	The last field of the record may be terminated with LF rather
 *	than normal field delimiter ('#' char).
 *
 *	The second and the third records of the group contains no additional
 *	information as compared to the first record. Therefore, these records
 *	are ignored.
 *
 *	The fourth records is not obligatory and contains (after the
 *	identification number field) the text of the abstract. On very
 *	large abstract (exceeding the space allowed for one records) it
 *	continues in the fifth record.
 *
 ************************************************************************
 */

#include "stdio.h"
#include "assert.h"

				/* Field processing routines		*/
extern void process_field_init();
extern void process_field(char * field);
extern void process_field_commit();

				/* Internal format parameters		*/

#define Record_max_len	1660
#define Record_terminator	'\n'

#define Field_max_len	256
#define Field_number	25
#define Field_terminator	'#'

#define ID_field_len	10
#define Abstract_field_len (2*Record_max_len+1)


				/* Local data				*/

static FILE * Fp;			/* Input file ptr		*/
static int ID_number;			/* ID number for the curr record*/

/*
 *-----------------------------------------------------------------------
 *			Low level service functions
 */

			/* Report on error in the input file and on the	*/
			/* possible location of the error		*/
static void parse_error(title)
const char *title;
{
  const int VMS_block_size = 512;
  register int location = ftell(Fp);
  
  _error("%s\noccured at the block %d., offset %xH, grand offset %xH\n",title,
	 location/VMS_block_size+1, location%VMS_block_size, location);
}


                   	/* Scan blanks					*/
static void scan_blanks()
{
  register int c;
  while( (c=getc(Fp)) == ' ' )
    if( feof(Fp) || ferror(Fp) )
      parse_error("EOF or other input error while scanning blanks");
  ungetc(c,Fp);
}


/*
 *-----------------------------------------------------------------------
 * 			Field handling functions
 */

			/* Read the identification number		*/
static int ID_just_read = 0;

#define unget_ID_number(id_number) ID_just_read = id_number;

static int get_ID_number()
{
  char id_field[ID_field_len+1];
  int id_number;
  register int i;

  if( ID_just_read ) 
  {
    int id_number = ID_just_read;
    ID_just_read = 0;
    return id_number;
  }

  for(i=0; i<ID_field_len; i++)
     id_field[i] = getc(Fp);
  id_field[i] = '\0';

  if( feof(Fp) )
    return EOF;

  if( sscanf(id_field,"%d",&id_number) != 1 )
    parse_error("Illegal ID");

  return id_number;
}

                        /* Get the field terminated with Field_terminator*/
			/* Scan leading blanks				*/
static char * get_field()
{
  static char buffer [Field_max_len+1];
  register char *p = buffer;
  register int c;

  scan_blanks();			/* Scan leading blanks		*/

  while( (c=getc(Fp)) != Field_terminator )
  {
    if( feof(Fp) || ferror(Fp) )
      parse_error("EOF or other input error while reading the field");
    if( c == Record_terminator )
    {
      ungetc(c,Fp);
      break;
    }
    if( p-buffer >= Field_max_len )
      parse_error("Too long field");
    *p++ = c;
  }
  *p = '\0';
  return buffer;
}


			/* Flush the whole record up to the terminator	*/
static void flush_record()
{
  register int c;

  if( get_ID_number() != ID_number )
    parse_error("ID numbers mismatch while flushing the record");

  while( (c=getc(Fp)) != Record_terminator )
    if( feof(Fp) || ferror(Fp) )
      parse_error("EOF or other input error while flushing the record");
}

                        /* Get the abstract from the fourth (and fifth)	*/
			/* records, if any. Scan leading blanks		*/
static char * get_abstract()
{
  static char buffer [Abstract_field_len+1];
  register char *p = buffer;
  register char c;
  int id_number;

  if( (id_number = get_ID_number()) != ID_number )/* No fourth record	    */
  {
    unget_ID_number(id_number);
    *p = '\0';
    return buffer;
  }

  scan_blanks();
  while( (c=getc(Fp)) != Record_terminator )
  {
    if( feof(Fp) || ferror(Fp) )
      parse_error("EOF or other input error while reading the abstract");
    if( p-buffer >= Abstract_field_len )
      parse_error("Too long abstract field");
    *p++ = c;
  }

  if( (id_number = get_ID_number()) != ID_number )/* No fifth record	    */
  {
    unget_ID_number(id_number);
    *p = '\0';
    return buffer;
  }

  scan_blanks();
  while( (c=getc(Fp)) != Record_terminator )
  {
    if( feof(Fp) || ferror(Fp) )
      parse_error("EOF or other input error while reading the abstract");
    if( p-buffer >= Abstract_field_len )
      parse_error("Too long abstract field");
    *p++ = c;
  }
  *p = '\0';
  return buffer;
}


/*
 *-----------------------------------------------------------------------
 *			 Root modules
 */

void parse_init(file_ptr)
const FILE * file_ptr;			/* Input stream file 		*/
{
  register int i;

  Fp = file_ptr;
  ID_number = 0;		/* It means initialization		*/
}


				/* Prepare to parse a new document	*/
				/* Return EOF if no document to read	*/
				/* Return 1 on success			*/
int parse_new_document()
{
  int doc_number = get_ID_number();
  register int i;

  if( feof(Fp) )
  {
    return EOF;
  }

  if( ID_number == 0 ) 				/* Treat the initial	*/
    ID_number = doc_number;                 	/* case separately	*/
  else if( doc_number != ++ID_number )
    parse_error("Not a consecutive ID");

  process_field_init();
                               		/* Fill in Fields from the first*/
  for(i=0; i<Field_number; i++)     	/* record of the document	*/
     process_field(get_field());
  if( getc(Fp) != Record_terminator)	/* Assure all the record has been*/
    parse_error("No CR found were expected");	/* processed		*/

  flush_record();			/* Flush the second record	*/
  flush_record();			/* Flush the third record	*/
  process_field(get_abstract());

  process_field_commit();
  return 1;
}
