program refs; {REFS-- find and list references in manuscripts COPYRIGHT 1985 by Ross A. Alford All commercial rights reserved. This software is released for nonprofit distribution only. Any commercial distribution should be undertaken only with the express consent of the copyright holder: Ross A. Alford Department of Zoology Duke University Durham, NC 27706 ...[decvax, ihnp4, akgua]!mcnc!ecsvax!alford REFS finds references in scientific manuscripts. It will list references found and the number of times they are occur to a file, a printer, or the system console. It should work with references of the forms: Smith, 1980 |Smith (1980) Smith, 1980a |Smith (1980a) Smith, 1980a, b |Smith (1980a, b) Smith, 1980a, 1980b |Smith (1980a, 1980b) Smith and Smith, 1980 |Smith and Smith (1980) Smith et al., 1980 |Smith et al. (1980) Smith's 1980 |Smith's (1980) Smith, Smith, and Smith, 1980 |Smith, Smith, and Smith (1980) Smith-Smythe and Smith 1980 and with most any similar style. It also allows the last digit of the year to be replaced by a letter, as Smith, 198x, for cases where the exact date is uncertain. It may not work entirely properly on references in tabular formats, specifically if a reference of the form Smith 1980a,b is split between lines so that the 'b' is widely separated from the 'a'. Month, year dates, as July, 1980, also are treated as references. You never know when some person might have the same name as a month. Operating the program is simple, and is documented in the msgexit function. Just run the program with no parameters for a description. I apologize for the paucity of comments, but after all this is self-documenting Pascal :-) To convert the program for use with MS-DOS Turbo, just change the getarg function, as documented in the comments for that function. I haven't had any trouble with Turbo stepping on command line arguments, but that is the first thing to check if the program doesn't seem to work. Just change the commenting in getarg so that a command line is requested after the program begins execution. Please let me know of any bugs found, bug fixes made, or improvements made. Ross Alford} const charsect = 128; namelen = 60; version = '1.3'; type fnamestr = string[14]; msgstr = string[80]; tabletyp = array[0..127] of boolean; buftype = array[1..CHARSECT] of byte; nametyp = string[NAMELEN]; datetyp = string[5]; refptr = ^reference; reference = record next : refptr; name : string[NAMELEN]; number : integer end; sectptr = ^sectrec; sectrec = record next : sectptr; previous : sectptr; data : buftype end; var inf : file; i,j,ptrsave,sinceref : integer; c : byte; oldyear,year : datetyp; xtra,name,tempname,oldname : nametyp; closeparen,notfound : boolean; outfname,infile : fnamestr; reflist : refptr; {intentional global variables- to speed things up} outf : text; lowcase,isupcase,otherbad,letter,number : tabletyp; cursectnum,numinfile : integer; infopen,outfopen,hitnumber,comma,done : boolean; sector,savesect : sectptr; {current sector in use} ptr,saveptr : integer; {location within sector} {---------exit gracefully with info---------------------------------------} procedure msgexit(msg : msgstr); begin if infopen then close(inf); if outfopen then close(outf); writeln; if msg <> '' then begin writeln(chr(7),msg); writeln end; writeln('REFS finds references in the name, date form in manuscripts.'); writeln; writeln('To run REFS enter a command line like:'); writeln; writeln('A>refs infile {outfile}'); writeln; writeln('Where infile is a CP/M filename of the form drive:filename.ext'); writeln; writeln('and outfile can be either a disk file, given in the same format '); writeln('as infile, or can be given as CON: to send output to the CRT screen'); writeln('or LST: to send output to the CP/M list device.'); writeln; writeln('If outfile is not specified, a file of the same base name as infile'); writeln('but with the extension .REF, will be created on the same drive that'); writeln('infile is read from.'); writeln; halt end; {---------------------read a sector into a sector buffer-------------------} procedure readsector(var sector : sectptr); begin if cursectnum < numinfile then begin blockread(inf,sector^.data,1); cursectnum := succ(cursectnum) end else done := TRUE end; {------------------------get a new sector buffer node-------------------} procedure getsectnode(var sector : sectptr); var n : integer; begin n := memavail; if ((n and $7fff) < 512) then msgexit('Out of memory during sector read'); new(sector); sector^.next := NIL; sector^.previous := NIL end; {----------------return the character currently pointed to-------------} function curbyte : byte; {uses globals sector and ptr} begin curbyte := sector^.data[ptr] and $7f end; {---------get next character, read a new sector if needed--------------} function nextbyte : byte; {uses globals sector and ptr} var tempsec : sectptr; t : byte; begin ptr := succ(ptr); if ptr > 128 then if sector^.next = NIL then begin tempsec := sector^.previous; if tempsec = NIL then getsectnode(tempsec); readsector(tempsec); if not done then begin tempsec^.previous := sector; sector^.previous := NIL; sector^.next := tempsec; tempsec^.next := NIL; sector := tempsec; ptr := 1 end end else begin tempsec := sector^.next; tempsec^.next := NIL; tempsec^.previous := sector; sector^.previous := NIL; sector := tempsec; ptr := 1; end; t := sector^.data[ptr]; if t = 26 then done := TRUE; nextbyte := t and $7f end; {--------------------return previous character--------------------} function prevbyte : byte; {uses globals sector and ptr} var tempsec : sectptr; ch : byte; begin ptr := pred(ptr); if ptr < 1 then begin if sector^.previous <> NIL then begin tempsec := sector^.previous; tempsec^.next := sector; tempsec^.previous := NIL; sector^.next := NIL; sector := tempsec; ptr := 128 end end; if (ptr < 1) then prevbyte := 0 else prevbyte := sector^.data[ptr] and $7f end; {return previous alphabetic word. Set the global 'comma'=TRUE if a comma follows it. Set the global 'hitnumber' TRUE if a digit is encountered. Return no word if any of the characters for which corresponding entries in the tables 'otherbad' or 'number' have been set true is encountered.} function prevword : nametyp; var c,d : byte; i : integer; gotalet : boolean; name : nametyp; begin i := 0; comma := FALSE; gotalet := FALSE; hitnumber := FALSE; name := ''; repeat c := prevbyte; i := succ(i); if (c = ord(',')) then comma := TRUE; if letter[c] then begin if (not number[prevbyte]) then gotalet := TRUE; d := nextbyte {readjust pointer} end; if number[c] then hitnumber := TRUE; if otherbad[c] then i := 126 until gotalet or (i = 126); if gotalet then while letter[c] do begin name := chr(c) + name; c := prevbyte end; c := nextbyte; prevword := name end; {--------------save position in file before backwards scan-----------} procedure saveposition; begin savesect := sector; saveptr := ptr end; {------------restore position in file after backwards scan------------} procedure restoreposition; begin sector := savesect; ptr := saveptr end; {Set up truth tables for membership in sets of characters. Indexing into these tables is much faster than using the standard set notation.} procedure inittables; var i : integer; begin for i := 0 to 127 do begin letter[i] := (((i > $40) and (i < $5b)) or ((i > $60) and (i < $7b))); number[i] := ((i >= ord('0')) and (i <= ord('9'))); isupcase[i] := ((i > $40) and (i < $5b)); lowcase[i] := ((i > $60) and (i < $7b)); otherbad[i] := FALSE; if chr(i) in ['=','<','>',':'] then otherbad[i] := TRUE end; letter[39] := TRUE; {apostrophe is a letter} letter[ord('-')] := TRUE; {so is hyphen} end; {Write the accumulated linked list of references onto the defined output file} procedure writelist(var outf : text; list : refptr); var current : refptr; totcites,totrefs : integer; begin totcites := 0; totrefs := 0; writeln(outf,'References from file ',infile); writeln(outf); writeln(outf,'Author(s) and date; number of times cited'); writeln(outf); current := list^.next; while current <> NIL do begin writeln(outf,current^.name,'; ',current^.number); totrefs := succ(totrefs); totcites := totcites + current^.number; current := current^.next end; writeln(outf); writeln(outf,'Total count of citations in text = ',totcites); writeln(outf,'Total number of references cited = ',totrefs); close(outf) end; {-------------return an initialized storage node for a reference-----------} procedure getrefnode(var x : refptr); var i : integer; begin i := memavail; if ((i > 0) and (i < 512)) then msgexit('Out of memory--too many references--try splitting input file'); new(x); x^.next := NIL; x^.name := 'A'; x^.number := 1 end; {------------------------------------------------------------------- function getarg reads a series of characters from the CP/M command line buffer. It returns everything up to the next space it encounters and saves what's left of the buffer. If there's nothing left, it returns the empty string. This function is VERY Turbo-specific: it relies on static variable allocation to preserve the command string between calls} function getarg : fnamestr; const called : boolean = FALSE; {used while debugging only} i : integer = 1; j : integer = 1; var cmdbuf : string[127] absolute $80; {to run after compiling to memory, comment out the 'absolute $80'} (*var cmdbuf : string[127] absolute CSeg:$80;*) {for MS/PC DOS command-line} begin (* if not called then {used for debugging} begin {with memory compilation} write('Enter command line: '); readln(cmdbuf); {remove comments to use} called := TRUE {after compiling to memory} end; *) while cmdbuf[i] = ' ' do {skip leading blanks} i := succ(i); j := i; {point to start} while (not(cmdbuf[i] = ' ') and (i <= length(cmdbuf))) do begin cmdbuf[i] := upcase(cmdbuf[i]); {all commands upper case} i := succ(i) {find end} end; getarg := copy(cmdbuf,j,i - j); {assign return value} j := i {new starting location} end; {Add a new reference to the list of references, maintaining sorted order} procedure addtolist(list : refptr; name : nametyp); var current, newnode, last : refptr; begin current := list; last := list; while ((current^.next <> NIL) and (name > current^.name)) do begin last := current; current := current^.next end; if name <> current^.name then begin getrefnode(newnode); newnode^.name := name; if name > current^.name then current^.next := newnode else begin newnode^.next := current; last^.next := newnode; newnode^.name := name end end else current^.number := succ(current^.number) end; begin writeln; writeln('REFS version ',VERSION); writeln('Copyright 1985 by Ross A. Alford'); writeln('All commercial rights reserved'); writeln; inittables; ptr := 0; year := ''; name := ''; comma := FALSE; closeparen := FALSE; infopen := FALSE; outfopen := FALSE; getsectnode(sector); getrefnode(reflist); infile := getarg; if infile = '' then msgexit('Input filename not specified'); assign(inf,infile); {$I-} reset(inf); {$I+} if ioresult <> 0 then msgexit('Input file not found'); infopen := TRUE; numinfile := filesize(inf); outfname := getarg; if outfname = '' then begin i := pos('.',infile) - 1; if i = 0 then i := length(infile); outfname := copy(infile,1,i); outfname := outfname + '.REF'; end; assign(outf,outfname); if ((outfname <> 'CON:') and (outfname <> 'LST:')) then begin {$I-} reset(outf); {$I+} if ioresult = 0 then msgexit('Output file exists--cannot overwrite') end; rewrite(outf); outfopen := TRUE; blockread(inf,sector^.data,1); cursectnum := 1; done := FALSE; repeat if nextbyte = ord('1') then {CHECK FOR POTENTIAL DATE} begin {add check for '2' in 1999} year := chr(curbyte); { :-) } comma := false; if number[nextbyte] then begin year := year + chr(curbyte); if number[nextbyte] then begin year := year + chr(curbyte); if (number[nextbyte] or letter[curbyte]) then begin year := year + chr(curbyte); if (not number[nextbyte]) then if lowcase[curbyte] then year := year + chr(curbyte); end end end end; if (length(year) = 4) or (length(year) = 5) then begin {got a date} saveposition; closeparen := FALSE; {for later reference} comma := FALSE; {for later reference} c := prevbyte; {skip back four to avoid date} c := prevbyte; c := prevbyte; c := prevbyte; name := ''; xtra := ''; repeat notfound := TRUE; repeat tempname := prevword until ((tempname = '') or (length(tempname) > 1)); if ((hitnumber) and (name <> '')) then tempname := ''; if tempname[length(tempname) - 1] = chr(39) then {fix posessives} tempname := copy(tempname,1,length(tempname) - 2); if tempname[length(tempname)] = chr(39) then {fix other posessives} tempname := copy(tempname,1,length(tempname) - 1); if isupcase[ord(tempname[2])] then tempname := ''; {no abbrevs} if ((tempname[1] = '-') or (tempname[length(tempname)] = '-')) then tempname := ''; {no leading/trailing hyphens} if (tempname <> '') and (isupcase[ord(tempname[1])]) then begin if ((name = '') or (xtra <> '') or comma) then begin name := tempname + ' ' + xtra + name; xtra := ''; notfound := FALSE end end else if ((tempname = 'and') or (tempname = 'et') or (tempname = 'al')) then begin xtra := tempname + ' ' + xtra; notfound := FALSE end; until notfound; if name <> '' then begin oldname := name; oldyear := year; name := name + year; addtolist(reflist,name); sinceref := 0 end; restoreposition; year := ''; end else if sinceref < 4 then {check for the Smith 1980a, b form} if (lowcase[curbyte] and (length(oldyear) = 5)) then begin if ((not letter[nextbyte]) and comma and (not closeparen)) then begin addtolist(reflist,oldname + copy(oldyear,1,4) + chr(prevbyte)); sinceref := 0 end else begin c := prevbyte; sinceref := 10 end end; comma := (comma or (curbyte = ord(','))); closeparen := (closeparen or (curbyte = ord(')'))); sinceref := succ(sinceref); until done; writelist(outf,reflist); end.