/* This program, written in BDS-C, compares two text files and displays the differences. Finding the end of a difference between the files requires some arbitrary rules since we don't want to generate a series of small difference reports just because the old and new text have a few words in common. To avoid this problem, comparisons are on a "line" basis and the user can specify the line delimiter character and how many lines must match to end a difference. The end of file mark is also a line delimiter, but is not considered part of the line. The null character is used as an end of buffer mark and is converted to endfile if read from a file. Note that the last line in a buffer may be incomplete but will be treated as complete. Since linefeeds and the delimiter are converted by the output routine to CR-LF pairs, to avoid multiple linefeeds or carriage returns, the user may specify one character code to be deleted from all file input. The CP/M command line has the format: DF filea fileb options where filea and fileb may include drives as in b:myname.doc and where options consist of key letters followed immediately by a decimal number. Valid options are: D decimal value of delimiter character M number of lines which must match to end an area of difference I decimal value of character code to be deleted on iniut. Defaults are linefeed, 2, carriage return. Although normal, small areas of difference are processed quickly, large differences can take several minutes to analyze. Be patient. */ #define EOF 0x1a /* end of file mark */ #define MAXDIFF 8192 /* difference buffer size */ struct iobuf { char xxx[134]; }; main(argc, argv) int argc; /* Number of arguments passed from CP/M. The first is garbage. */ char *argv[]; /* array of pointers to argument strings */ { /* Storage declarations */ struct iobuf iob[2]; /* File buffers with */ char buff[2][MAXDIFF+1]; /* Text difference buffers with delimiter bytes at ends*/ int arg, file; char *p; int nm; /* Number of matches arg */ char delim; /* Line delimiter */ char ignore; /* Input character to be ignored */ int topline[2]; /* Top line numbers*/ int n; /* Miscellaneous */ /* Set default arguments */ nm = 2; /* Number of matches to resynchronize */ delim = '\n'; /* Line delimiter (linefeed)*/ ignore = 0x0D; /* Ignore carriage returns in file input.*/ puts("DIFFERENCE DISPLAY FOR PRINTABLE FILES - VERSION 1.1 2/24/80\n"); puts("BY:\tRICHARD GREENLAW\n\t251 COLONY CT.\n\tGAHANNA, OHIO 43230\n(OMIT ARGUMENTS FOR HELP)\n"); if(argc < 3) { puts("USEAGE:\nDF FILEA FILEB OPTIONS\n"); puts("WHERE FILES CAN SPECIFY DRIVES AND\nOPTIONS ARE LETTER CODES FOLLOWED BY DECIMAL NUMBERS:\n"); puts("\tD\tLINE DELIMITER CHARACTER (DEFAULT=LF)\n"); puts("\tI\tINPUT CHARACTER TO BE IGNORED (DEFAULT=CR)\n"); puts("\tM\tNUMBER OF MATCHING LINES TO END DIFFERENCE (2)\n"); error("EXAMPLE: DF A:XY.C B:XY.BAK M3 I10 D13"); } /* Open the files specified in the CP/M command line */ for(file = 0; file < 2; ++file) { /* Protect against wild card name which could rename a good file */ for(p = argv[file+1]; *p; ++p) if(*p == '?') error("AMBIGUOUS FILE NAME NOT ALLOWED\n"); /* Try to open it */ if(fopen(argv[file+1], iob[file]) < 0) { puts(argv[file+1]); error(" WON'T OPEN\n"); } } /* Process options, if any, from command line */ /* Format is Letter followed by decimal number */ for(arg = 3; arg < argc; ++arg) { n = atoi(argv[arg] + 1); if(n <= 0 || n > 128) error("BAD OPTION VALUE\n"); switch(*argv[arg]) { case 'M': /* Number of line matches to require */ nm = n; break; case 'D': /* Line delimiter character code */ delim = n; break; case 'I': /* Input character to be ignored */ ignore = n; break; default: error("BAD OPTION CODE\n"); } } /* Mark the buffers empty and delimit ends */ buff[0][0] = buff[1][0] = buff[0][MAXDIFF] = buff[1][MAXDIFF]= '\0'; /* Initialize line numbers corresponding to tops of buffers */ topline[0] = topline[1] = 1; /* Process until both end of files are at top of buffers*/ while(buff[0][0] != EOF || buff[1][0] != EOF) { /* Stream text through the buffer, skipping matching lines, until missmatch or endfiles at top This fills or loads endfile into each buffer */ n = skipml(buff, iob, delim, ignore); /* Update line numbers at tops of buffers */ topline[0] += n; topline[1] += n; /* Find where files agree after difference, report differences, and stream text through buffers until the agreeing line groups are at the top of the buffers or both endfiles are at the top of the buffers */ if(buff[0][0] != EOF || buff[1][0] != EOF) /* There is a real missmatch at the top */ processmm(buff, delim, nm, topline, argv); } puts("-------------------END OF COMPARISONS---------------------------\n"); } /* Stream text through the buffers until the top lines missmatch or contain endfiles. This fills or loads an endfile into each buffer. NULL (0) is used to delimit the end of buffer. When the buffer is full the EOB (NULL) is in an extra byte just after the buffer. Returns the number of lines skipped bacause they match. */ skipml(buff, iob, delim, ignore) char buff[2][MAXDIFF+1], delim, ignore; struct iobuf iob[2]; { int len, file, skipcnt; char *bottom, *pto, *pfrom, *pa, *pb; skipcnt = 0; /*initialize number of lines skipped */ /* Continue as long as top lines match and are not EOF or buffers are empty */ while(cmpl(buff[0], buff[1], delim) || buff[0][0] == '\0') { /* Find missmatch, EOF or EOB */ for(pa = buff[0], pb = buff[1]; len = cmpl(pa, pb, delim); pa += len, pb += len) ++skipcnt; /* Pa, pb indicate line with missmatch, EOF or EOB */ /* Top lines match but may be null */ /* For each file */ for(file = 0; file <= 1; ++file) { /* find out how much information remains */ /* Copy up through EOF or EOB, discarding matching text */ pto = buff[file] - 1; pfrom = (file ? pb: pa) - 1; do *++pto = *++pfrom; while (*pto && *pto != EOF); /* pto indicates new EOB or EOF. The EOB can be just after the buffer.*/ if(*pto != EOF) { /* Fill remainder of buffer from file unless/until EOF */ bottom = &buff[file][MAXDIFF-1]; while(pto <= bottom && (*pto++ = getc2(iob[file], ignore)) != EOF) ; } } } return skipcnt; } /* Compare lines. A line consists of text terminated by the delimiter specified by delim or terminated by NULL or EOF. Delim is considered part of the line it terminates, but NULL and EOF are not. Returns character count of line if the two lines match. Returns zero if any missmatch or if nothing but NULL or EOF. */ cmpl(pa, pb, delim) char *pa, *pb, delim; { int k; for(k = 1; *pa == *pb; ++k, ++pa, ++pb) { switch(*pa) { case EOF: case '\0': return k -1; default: if (*pa == delim) return k; } } return 0; } /* Get characters from file and handle errors */ getc2(io, ignore) struct iobuf *io; char ignore; { int ci; while((ci = getc(io)) == ignore) ; return ci <= 0 ? EOF : ci; } error(p) char *p; { puts(p); exit(); } /* Process missmatch. Top lines missmatch. Find the point in each file where at least nm lines match (EOFs can match several times). Report everything above these points as a file difference. Then stream text through the buffers until the matching line groups or endfiles are at the top. To ensure minimum meaningful differences by matching at the minimum depths (in lines) from the tops of the buffers the search algorithm is as follows: Reference lines are chosen at increasing depths, alternating between the two buffers. Each is compared to every line in the other buffer from the top to the same depth as the reference line. If no matches, the next reference line (in the other buffer or at the next depth) is tried. If a match is found it is checked for nm lines in each file. If all lines match, synchronization has been achieved. EOFs are seen as an infinite series of EOF lines to force cleanup at the ends of the files. If the reference depth exceeds the depth of either buffer the program will give up and abort cleanly. */ processmm(buff, delim, nm, topline, argv) char buff[2][MAXDIFF+1], delim, *argv[]; int nm, topline[2]; { char *refp[2], *refpf; /* Reference line pointers */ char *olp, *olp2; /* line pointers in other buffer */ char match, search; /* Flags */ int rdepth; /* depth of reference line */ int odepth; /* depth in other file */ char *pa, *pb, *p[2]; int len; int rfile, ofile; char *rbottom, *obottom; /* last bytes in buffers*/ char *pto, *pfrom; int mcnt; /* match count */ int skipct[2]; /* Lines in each area og missmatch*/ /* Begin search as described above */ match = 0; search = 1; refp[0] = buff[0]; /* Points to reference line */ refp[1] = buff[1]; /* For increasing reference depth while still searching */ for(rdepth = 0; search; ++rdepth) { /* For each buffer at each reference depth */ for(rfile = 0; rfile <= 1 && search; ++rfile) { ofile = rfile ? 0 : 1; /* other file */ refpf = refp[rfile]; rbottom = &buff[rfile][MAXDIFF-1]; obottom = &buff[ofile][MAXDIFF-1]; /* Compare reference line to each line in other buffer up to reference depth */ if(refpf >= rbottom) search = 0; /* buffer too small */ else { for(odepth = 0, olp = buff[ofile]; odepth <= rdepth && search; ++odepth, olp += lnlen(olp, delim)) { olp2 = olp; /* Try for match of depth nm starting at refpf and olp */ for(mcnt = 0, match = 1, pa =refpf, pb = olp; mcnt < nm && match; ++mcnt, pa += len, pb += len) { /* Note won't scan past EOF or NULL delimiters because line length will be zero. Alligned EOFs will be forced to match, but NULLs won't */ match = len = cmpl(pa, pb, delim); if(*pa == EOF && *pb == EOF) match = 1; /*EOFS match with length zero */ } if(match) { search = 0; /* we found nm matches ! */ } } } /* Advance this file's reference pointer */ refp[rfile] += lnlen(refpf, delim); } } /* Search is over. Results are: match flag indicates successful match of nm lines or to EOFs. if successful: rfile has reference file plus one, refpf points to the reference line, olp2 points to the matching line in the other file, rdepth has line number in buffer plus one, odepth has line number in buffer plus one. */ if (!match) error("DIFFERENCE TOO BIG FOR BUFFER! ABORTING.\n"); /* Files resynchronized, get pointers and line counts as functions of file. */ if(rfile == 2) { p[0] = olp2; p[1] = refpf; skipct[0] = odepth -1; skipct[1] = rdepth -1; } else { p[0] = refpf; p[1] = olp2; skipct[0] = rdepth -1; skipct[1] = odepth -1; } /* Print dividing line */ for(len = 0; len < 63; ++len) putchar('-'); putchar('\n'); /* For each file */ for(rfile = 0; rfile <= 1; ++rfile) { /* Display the differing text */ printf("-------- TEXT FROM %s, %d LINES FROM LINE %d ----------\n",argv[rfile+1], skipct[rfile], topline[rfile]); topline[rfile] += skipct[rfile]; printbuff(buff[rfile], p[rfile] - 1, delim); /* Move matching and following lines to top through the end of buffer delimiter */ for(pto = buff[rfile], pfrom = p[rfile]; *pfrom ; ++pto, ++pfrom) *pto = *pfrom; *pto = '\0'; /* Copy the null too */ } } /* Determine length of line including delim but not EOF or NULL */ lnlen(p, delim) char *p, delim; { int k; for(k = 0; ;++k, ++p) { switch(*p) { case EOF: case '\0': return k; default: if(*p == delim) return k + 1; } } } /* Display characters in range of addresses */ printbuff(ps, pe, delim) char *ps, *pe, delim; { while(ps <= pe) { putchar(*ps == delim ? '\n' : *ps); ++ps; } }