/******************************************************** * * * INDEX * * Text file index generator. * * * * T. Jennings 7/21/81 * * 221 W. Springfield St. * * Boston MA 02118 * * * * * ********************************************************* INDEX Generates an ASCII WordStar (or equiv.) compatible index from a text file. Words to be indexed are marked with a control character. Entire phrases can be indexed as well, by marking both ends with a different character. The index generated will be sorted alphabetically, with the first character of all entries in caps. Each entry will contain the page number as well. Two control characters, ^K and ^P are used. ^K marks single words, and ^P marks phrases. Phrases too long will be truncated to fit. Examples: The sixth word in this ^Ksentence will be put in the index. ^PThis entire phrase^P will be indexed. The index for these two items, once printed, would look like: Sentence .................................... 2 This entire phrase .......................... 3 See INDEX.DOC for details */ #include #define WORD_MARK 0x0b /* The single word marker */ #define PHRASE_MARK 0x10 /* Phrase marker */ #define CONTROLZ 0x1a #define CR 0x0d /* useful ASCII characters */ #define LF 0x0a #define WORD_LEN 39 /* maximum word or phrase size */ #define LAST_COL 40 /* column to start page # */ #define PAGE_LEN 66 /* default lines per page */ #define DEF_LMARGIN 8 /* default left margin, */ #define DEF_RMARGIN 72 /* ... and right margin */ char c; /* a useful variable for everyone */ int char_count; /* # characters looked at in file, */ int word_count; /* # words looked at */ int line_count; /* ditto lines */ int this_line; /* current line #/page */ int entries; /* # entries in index */ int this_page; /* current page */ int page_size; /* current max page length */ int lmargin,rmargin; /* current left and right margins */ int found_index; /* true if old index found */ char inbuf[BUFSIZ]; /* text input buffer */ char outbuf[BUFSIZ]; char inname[12]; /* where we save ASCII filenames */ char outname[12]; char tempname[12]; char pile[32768]; /* kludge */ /* Awaiting dynamic storage */ char *i; /* index pointer */ char *pointers[1024]; /* pile pointers for sorting */ change_page() /* dummy function to start a new */ {} /* page here. */ /* System interface for INDEX. Make sure theres at least one argument (the filename). Open it for reading, (error check) make a temporary file for output (filename.$I$) (error check). Then... 1/ Read the file sequentially, looking for marked words, [ index() ] 2/ Cleanup the pile of entries (remove leading blanks, convert each 1st character to uper case) [ cleanup() ] 3/ Sort the pile alphabeticlly [ sort() ] 4/ Dump the pile to the disk, expanding each to correct width, and removing duplicate entries [ dump() ] 5/ Return to CP/M */ main(argc,argv) int argc; char *argv[]; { printf ("\nINDEX-- Text file Index generator (c) T. Jennings 7/21/81"); page_size =PAGE_LEN; lmargin =DEF_LMARGIN; rmargin =DEF_RMARGIN; found_index =FALSE; if (--argc >0) { strcpy (inname,argv[1]); /* save the names, */ strcpy (outname,argv[1]); if (fopen(inname,inbuf) ==ERROR) /* try to open source file,*/ { printf ("\nCan't open %s",inname); exit(); } add_ext (outname,"$I$"); /* make the output file, */ if (fcreat(outname,outbuf) ==ERROR) /* reuse and destroy name*/ { printf ("\nCan't create temporary file %s",outname); exit(); } } else { printf ("\nSpecify a file to make an index from"); exit(); } printf ("\nAdding an index to %s (removing any old index first)",inname); index(); /* scan the file, */ cleanup(); /* convert each 1st char to upper case */ printf ("\nSorting, "); sort(); /* sort it, */ printf ("saving it, "); dump(); /* write it to the disk, */ printf ("cleaning up, "); strcpy (tempname,inname);/* delete any .BAK */ add_ext (tempname,"BAK"); unlink (tempname); rename (inname,tempname);/* rename original to .BAK, */ rename (outname,inname); /* rename new to original */ printf ("done.\n"); exit(); /* exit. */ } /* Read the input file, and make a list of words to index. Maintain the global variables indicating word count, etc. Leave a pile of strings, followed by the page #, terminated with a control-z. When done, PILE[] will have sequential null terminated strings, terminated by a single control-z. Each pointer in POINTERS[] will point to the start of each string, with the last pointing to the control-z. KLUDGE: No limit check is done on the size of the pile, nor the size of POINTERS. (currently 1024 entries, 32 char. each) */ index() { int inword; /* blank or character flag */ int gotword; /* true if saving this word */ int gotphrase; /* true if saving this phrase */ int entry_len; /* size of word or phrase */ char linebuf[132]; /* character line buffer */ int j; int last_char_blank; /* suppress mult. spaces 'tween lines */ int k; inword =FALSE; /* no word yet, */ gotword =FALSE; /* no marked word found, */ gotphrase =FALSE; /* no marked phrase found, */ last_char_blank =FALSE; /* too early... */ i =pile; /* set the pointers, ... */ k=0; char_count =0; word_count =0; /* and our booleans */ line_count =0; entries =0; this_page =1; this_line =1; while (fill_line(linebuf) !=CONTROLZ) /* while not EOF, */ { if (scan_line(linebuf) ==TRUE) /* look for dot cmds */ break; /* get next line */ j=0; ++line_count; /* count total lines,*/ if (this_line++ >= page_size) /* current line, */ { this_line =1; ++this_page; } while ((c =linebuf[j++]) !=0x00) /* while not end/line*/ { ++char_count; if (c== ' ') { inword =FALSE; /* end of a word */ if (gotword) /* if we were looking*/ { ++i; /* leave null to mark*/ sprintf(i,"%2d",this_page); while (*i++); /* point to next */ ++entries; /* count another */ gotword =FALSE; /* done with word */ } } else if (inword ==FALSE) /* non-white char */ { inword =TRUE; ++word_count; } if (c==WORD_MARK) /* new word to save */ { gotword =TRUE; /* start saving next char */ entry_len =0; pointers[k++] =i;/* set the pointer */ } else if (c== PHRASE_MARK) { if (gotphrase) /* if we had one before, */ { ++i; /* let the last null mark it */ sprintf(i,"%2d",this_page); while (*i++); ++entries; /* counter another */ gotphrase =FALSE; } else /* new phrase */ { gotphrase =TRUE; /* else start now. */ entry_len =0; /* just starting */ pointers[k++] =i;/* point to it */ } } /* see if we should store a character */ else if ( (gotphrase || gotword) && (entry_len++ =' ') && (c !='.') && !(last_char_blank && c==' ') ) { *i++ =c; /* were saving now */ *i =0x00; /* null terminate it always, */ last_char_blank= (c==' '? TRUE : FALSE); } } } *i =CONTROLZ; /* mark the top of the pile, */ pointers[k++] =i; /* set its pointer */ printf ("\n Put %d words in the index ",entries); printf ("out of a total of %d words.",word_count); return; } /* Sort routine. The array (pile) contains the index entries in no particular order. Array of pointers points to each entry. Put the entire pile in ascending alphabetic order. Very rude sort routine. (interchange) */ sort() { int first,last; char *temp; int changing; do { first =0; last =1; changing =FALSE; while (*pointers[last] !=CONTROLZ) { if (comp (pointers[first],pointers[last]) >0) { temp =pointers[first]; pointers[first] =pointers[last]; pointers[last] =temp; changing =TRUE; } ++first; ++last; } } while (changing); /* until we make a do-nothing pass */ return; } /* Compare two strings. Return 0 if equal, >0 if first is greater than last, <0 if first less than last. Ignores case. */ int comp(first,last) char *first; char *last; { int i; while (toupper(*first) ==toupper(*last) ) { if (*first == 0x00) /* stop at the null(if we got*/ return (0) /* this far, it matched */ ; ++first; ++last; } i =(toupper(*first) -toupper(*last)); /* mismatch */ return (i); } /* Compare two strings, of a given length. Check only for equality. Return 0 if equal, else 1. */ compl(length,first,last) int length; char *first,*last; { for (; length >0; length--) { if ( (toupper(*first++)) != (toupper(*last++)) ) return (1) ; } return (0); } /* Dump the pile to the disk. Convert each entry to a single line, making each an even number of columns wide. Put the page number at the end. Look for duplicate entries, and remove them. */ dump() { int column; int i,x; char *current_entry; char *j; char c; int local_i; column =0; i =0; if (found_index ==FALSE) /* dont duplicate this */ { sendstr ("\015\012.pa"); /* if its already there */ sendstr ("\015\012..index\015\012"); } while (*pointers[i] !=CONTROLZ) { j =pointers[i++]; /* send chars until null */ if (*j !=0xff) /* if its a deleted dup copy, */ { /* skip this */ current_entry =j; /* else save a copy, */ for (x =lmargin; x>0; x--) /* tab to left margin */ putc(' ',outbuf) ; while (c =*j++) { putc (c,outbuf); ++column; } while (column++