#define VERSION "dif v2.2 08/18/87\n" /* * Differential file comparision program * * Chuck Forsberg * Computer Development Inc * 6700 S. W. 105th * Beaverton OR 97005 * 503/646-1599 RCP/M 503/621-3193 300,450,1200 baud * cc1 dif.c -e 4000 -o * clink dif yam8 -f dio -s (ndio may be used instead or dio) * * REVISION HISTORY * * 11/04/81 CAF V1.10 * added crc information for use by ssed 1.10 in verifying * antecedent file is correct * 11/17/81 CAF V2.00 * 07/18/84 CEM V2.10 Carl Mascott * create version for SWTW C/80 * 01/05/85 CEM V2.11 Carl Mascott * add casts to ptr alignment arith (ALIGN); * look for stdio.h on a: * 04/28/86 CEM V2.12 Carl Mascott * change '+' to '+++' to match '---'; * add hooks for profiler * 08/18/87 CEM V2.20 Carl Mascott * speed up character input */ /* system dependent stuff */ /* SWTW C/80 compiler */ #define C80 #include #ifdef C80 #include #define fgetc getc #endif /* must struct line be aligned on int boundary? */ /* #define ALIGN */ #define CPMEOF 26 #define DNODTYP struct dnodtyp #define LINE struct line /* E-O-F indicator */ #define LAST_TOKE 32765 /* at least 1 less than max positive int */ #define MAXLEN 255 /* max line length */ #define HUNKSIZE 8192 /* Size of each circular buffer */ #ifdef C80 #define MODE "rb" #else #define MODE "r" #endif FILE *finp[2]; FILE *in0, *in1; int linno[2]; char *patha, *pathb; /* names of each file */ /* data in the circular buffers is stored in linked form */ struct line { unsigned hash; /* hashed value of text special case EOF=0 */ char f; /* 0 or 1 which file it came from */ int num; /* number of line or LAST_TOKE if EOF */ struct line *next; /* pointer to next line, or null if last in buffer, or to itself if EOF */ char text[1]; /* asciz line of text */ } *bottom[2], /* bottom of each file's buffer */ *top[2], /* top of each buffer */ *curlna, *curlnb; /* line just fetched from buf */ LINE *getnext(); /* forward declarations */ int fgetc(); int getcr(); int Different, Verbose, Edit, Display, Unsqueeze; unsigned crc; /* accumulated crc of common lines */ int fudge; int ineof[2]; /* EOF seen on file input */ int (*getit[2])(); /* getchar function for each file */ /* Definitions and externals for unsqueezer function */ #define RECOGNIZE 0xFF76 /* unlikely pattern */ /* External declarations for USQ feature */ #define NUMVALS 257 /* 256 data values plus SPEOF*/ /* Decoding tree */ struct dnodtyp { int children[2]; /* left, right */ } *dnode; int bpos; /* last bit position read */ /* Variables associated with repetition decoding */ int repct; /*Number of times to return value*/ /* * htbl stores pointers to a buncha lines which are sorted when looking * for a match */ #define HASHCHUNK 16 /* # lines of each file to search for initial match */ #define HSIZE 2048 /* total # of lines that the hash table can hold */ LINE **htbl; /* pointer to hash table */ char *bfx[2]; /* pointers to circular buffers */ main(argc, argv) int argc; char **argv; { static char *cp; static int i; static LINE cdq[2]; /* reader from each file */ char *makebuf(); FILE *fopen(); #ifdef PROFIL initprof(); #endif Different=Edit=Unsqueeze=Display=Verbose=FALSE; getit[0]=fgetc; getit[1]=fgetc; while (--argc > 0 && *(cp = *++argv)=='-') { while( *++cp) { switch(tolower(*cp)) { case 'd': Display++; break; case 'e': Edit++; break; case 'u': Unsqueeze++; break; case 'v': Verbose++; break; default: goto usage; } } } if(argc < 1 || argc > 2) { usage: fprintf(stderr,VERSION); fprintf(stderr, "\nUsage: dif [-deuv] filea {fileb,outfile]\n"); fprintf(stderr,"\n\t-d display lines that match\n"); fprintf(stderr,"\t-e generate Editor script\n"); fprintf(stderr,"\t-u Unsqueeze filea\n"); fprintf(stderr,"\t-v Verbose\n"); exit(1); } if ((finp[0] = fopen(patha = *argv, MODE)) == NULL) { fprintf(stderr, "Can't open %s", patha); exit(128); } else in0=finp[0]; if (argc==2) { if ((finp[1] = fopen(pathb = *++argv, "r")) == NULL) { fprintf(stderr, "Can't open %s", pathb); exit(128); } else in1=finp[1]; } else { in1=finp[1]=stdin; pathb= "Standard Input"; } /* allocate buffers */ dnode = (DNODTYP *) makebuf((NUMVALS - 1) * sizeof(DNODTYP)); htbl = (LINE **) makebuf(HSIZE * sizeof(LINE *)); bfx[0] = makebuf(HUNKSIZE); bfx[1] = makebuf(HUNKSIZE); if(Unsqueeze) { getit[0]=getcr; /* switch getchar function */ init_usq(); /* initialize unpacking */ } crc=linno[0]=linno[1]=0; ineof[0]=ineof[1]=FALSE; cdq[0].next=bottom[0] = bfx[0]; cdq[1].next=bottom[1]=top[0]= bfx[1]; top[1]= bfx[1] + HUNKSIZE; if(Verbose) for(i=0; i<2; ++i ) fprintf(stderr,"bottom[%d]=%x top=%x\n", i, bottom[i], top[i]); fudge=0; /* initialize magic editing offset */ /* initialize curln so it won't get in the way of filling buffer */ curlna=curlnb=0; /* get the first line from each and start chain */ getln2(0, &cdq[0]); curlna=bottom[0]; getln2(1, &cdq[1]); curlnb=bottom[1]; while( curlna->num != LAST_TOKE | curlnb->num != LAST_TOKE) { if(strcmp(curlna->text, curlnb->text)) dodiff(); else { crc += curlna->hash; if(Display) printf(" %s", curlna->text); curlna=getnext(curlna); curlnb=getnext(curlnb); } } if(Edit) printf("$a %u\n.\n", crc); if(Different) fprintf(stderr,"Files are different\n"); else fprintf(stderr,"No differences encountered\n"); #ifdef PROFIL endprof(); #endif } /* getnext returns next line unless: eof: return current (its chained to itself); no room in buffer: return NULL; */ LINE *getnext(fp) register LINE *fp; { if(fp->next) return fp->next; /* link to next line */ getln2(fp->f, fp); /* end of buffer -get more */ return fp->next; } getln2(fn, fp0) int fn; LINE *fp0; { static char *p, *q, *qq, *qqq; static int c, count, *number, len, n; static int (*getthis)(); /* getchar function for current file */ static LINE *fp, *gp; static FILE *infp; unsigned crck(); n = fn; fp = fp0; number= &linno[n]; getthis=getit[n]; infp = finp[n]; q=top[n]; q -= (MAXLEN + 3 + sizeof(*fp)); qq=n?curlnb:curlna; qqq = qq - (MAXLEN + 3 + sizeof(*fp)); p=fp->next; if(p==NULL) { p=fp; p += sizeof(*fp)+strlen(fp->text); #ifdef ALIGN p += (int)p % sizeof(int); #endif } if(Verbose) fprintf(stderr,"File %d line %d q=%x qq=%x qqq=%x p=%x\n", n, *number, q, qq, qqq, p ); if(ineof[n]) { fprintf(stderr, "Getline called after E-O-F\n"); fprintf(stderr, "%d %3d fp=%x hash=%04x next=%x p=%x ", n, *number, fp, fp->hash, fp->next, p); fprintf(stderr, "%s", fp->text); return; } for(;;) { /* check if we need to wrap at end of buffer */ if(p > q) { p=bottom[n]; if(Verbose) fprintf(stderr,"Wrapped: p=%x\n", p); if(qq==0) goto yumyum; /* special case first time */ } /* is the buffer filled up ? */ if(p <= qq && p >= qqq) { /* before curln ? */ yumyum: if(Verbose) fprintf(stderr,"Buffer Filled\n"); return; } #ifdef ALIGN p += (int)p % sizeof(int); #endif gp=p; gp->f = n; for(p=gp->text, count=MAXLEN; --count; ) { if ((c = *p++ = (*getthis)(infp) ) >= ' ') continue; switch (c) { case CPMEOF: case EOF: if(Verbose) fprintf(stderr, "EOF on file %d at line %d\n", n, *number); ineof[n]=TRUE; gp->hash = ~0; gp->next = gp; /* link to self */ gp->num = LAST_TOKE; strcpy(gp->text, "**EOF**\n"); fp->next=gp; return; case 012: ++*number; goto gotline; case 015: --p; } } fprintf(stderr,"Line %d is too long\n", *number); gotline: *p=0; if(len=p - gp->text) gp->hash=crck(gp->text, len, 0); else gp->hash = 0; gp->num = *number; fp->next=gp; if(Verbose>3) { fprintf(stderr, "%d %3d gp=%x hash=%04x len=%3d p=%x ", n, *number, gp, gp->hash, len, p); fprintf(stderr, "%s", gp->text); } fp=gp; /* advance along new chain */ fp->next=NULL; ++p; /* bump pointer past trailing null */ } } /* * Sort htbl first by hashvalue, then by file number */ comp(a,b) LINE **a, **b; { if( (*a)->hash > (*b)->hash) return 1; if( (*a)->hash < (*b)->hash) return -1; return (*a)->f - (*b)->f; } dodiff() { register LINE **lp; static LINE *missm[2]; /* last line fetched for htbl */ static int quantity, m, l; static int keepatit, wanta, wantb; Different=TRUE; if(Verbose) fprintf(stderr,"Difference at %d:%d\n", curlna->num, curlnb->num); wanta=wantb=TRUE; lp=htbl; *lp++ =missm[0]=curlna; *lp++ =missm[1]=curlnb; /* * To minimize time in locating the matching lines after small * sections of text, start out by looking at just a few lines. * If that doesn't work, enlarge the search. */ for(quantity=2,l=HASHCHUNK, keepatit=TRUE; keepatit && (wanta||wantb); l+=l) { if(wanta) for(m=l; --m>=0; ) { if((*lp = getnext(missm[0]))==NULL) { wanta=FALSE; break; } missm[0]= *lp++; /* quit n-o-w if table filled */ if(++quantity==HSIZE) { keepatit=FALSE; goto nowlook; } /* Don't fill up the table with eof's */ if(missm[0]->num==LAST_TOKE) { wanta=FALSE; break; } } if(wantb) for(m=l; --m>=0; ) { if((*lp = getnext(missm[1]))==NULL) { wantb=FALSE; break; } missm[1]= *lp++; if(++quantity==HSIZE) { keepatit=FALSE; goto nowlook; } if(missm[1]->num==LAST_TOKE) { wantb=FALSE; break; } } nowlook: if(Verbose) fprintf(stderr, "Dodiff Quantity=%d k't=%d w'a=%d w'b=%d\n", quantity, keepatit, wanta, wantb); if(Verbose>2) for(m=0; mf, htbl[m]->hash, htbl[m]->num, htbl[m]->next); qsort(htbl, quantity, sizeof(LINE *), comp); if(lookferit(quantity)) { return; } } fprintf(stderr,"Can't find match at a:line %d b:line %d\n", curlna->num, curlnb->num); if(Verbose) for(m=0; mf, htbl[m]->hash, htbl[m]->num, htbl[m]->text); exit(1); } /* search for lowest matching lines using the hash index for quick look */ lookferit(quantity) { LINE *loa, *lob, **pe; static LINE **pa, **pb, *fp; static int lowa, lowb, skipa, skipb; lowa=lowb=LAST_TOKE+1; pe= &htbl[quantity]; for(pa=htbl; pa < pe; ++pa) { if((*pa)->f) continue; for(pb=pa+1; pb < pe; ++pb) { if((*pb)->f == 0) continue; if((*pa)->hash != (*pb)->hash) continue; if((*pa)->num > lowa || (*pb)->num > lowb) continue; if((*pa)->next==NULL || (*pb)->next==NULL) continue; if((*pa)->next->hash != (*pb)->next->hash) continue; if(strcmp((*pa)->text, (*pb)->text)) continue; if(strcmp((*pa)->next->text, (*pb)->next->text)) continue; lowa=(*pa)->num; lowb=(*pb)->num; loa= *pa; lob= *pb; } } if(lowa > LAST_TOKE) { return FALSE; } if(Verbose){ fprintf(stderr, "Found match at %d:%d\n", lowa, lowb); fprintf(stderr, "%s%s", loa->text, lob->text); } if(Edit) { skipa= lowa - curlna->num; skipb= lowb - (fp=curlnb)->num; if(Verbose) fprintf(stderr,"Fudge=%d skipa=%d skipb=%d\n", fudge, skipa, skipb); if(skipa) { printf("%d", fudge+curlna->num); if(skipa != 1) { putchar(','); if(lowa==LAST_TOKE) putchar('$'); else printf("%d", fudge + curlna->num + skipa - 1); } fudge -= skipa; } if(skipb) { if(skipa==0 || curlna->num==LAST_TOKE) /* append if no lines to remove */ printf("%da %u\n", curlnb->num -1, crc); else /* change if old lines gotta go */ printf("c %u\n", crc); for(; fp->num < lowb; fp=getnext(fp)) printf("%s", fp->text); printf(".\n"); fudge += skipb; } else printf("d %u\n", crc); } else { printf("-------- Line %4d of '%s' ----\n",curlna->num, patha); for(fp=curlna; fp->num < lowa; fp=getnext(fp)) printf("---%s", fp->text); printf("++++++++ Line %4d of '%s' ++++\n",curlnb->num, pathb); for(fp=curlnb; fp->num < lowb; fp=getnext(fp)) printf("+++%s", fp->text); } /* advance pointers to matched lines */ curlna= loa; curlnb= lob; return TRUE; } /* *** Stuff for first translation module *** */ #define DLE 0x90 /* *** Stuff for second translation module *** */ #define SPEOF 256 /* special endfile token */ #define LARGE 30000 init_usq() { char *p; int i; int numnodes; /* size of decoding tree */ char origname[14]; /* Original file name without drive */ static DNODTYP *dntmp; /* temp dnode pointer */ unsigned getw(); /* Initialization */ init_cr(); init_huff(); if(getw(in0)!=RECOGNIZE) { fprintf(stderr,"%s Not Squeezed\n", patha); exit(1); } /* Process rest of header */ getw(in0); /* ignore checksum ... */ /* Get original file name */ p = origname; /* send it to array */ do { *p = getc(in0); } while(*p++ != '\0'); fprintf(stderr, "(%s -> %s)\n", patha, origname); numnodes = getw(in0); if(numnodes < 0 || numnodes >= NUMVALS) { fprintf(stderr, "%s has invalid decode tree size\n", patha); exit(1); } /* Initialize for possible empty tree (SPEOF only) */ dnode->children[0] = -(SPEOF + 1); dnode->children[1] = -(SPEOF + 1); /* Get decoding tree from file */ dntmp = dnode; for(i = 0; i < numnodes; ++i) { dntmp->children[0] = getw(in0); dntmp->children[1] = getw(in0); ++dntmp; } } /* initialize decoding functions */ init_cr() { repct = 0; } init_huff() { bpos = 99; /* force initial read */ } /* Get bytes with decoding - this decodes repetition, * calls getuhuff to decode file stream into byte * level code with only repetition encoding. * * The code is simple passing through of bytes except * that DLE is encoded as DLE-zero and other values * repeated more than twice are encoded as value-DLE-count. */ int getcr(dummy) FILE *dummy; { static int c; static int value; /*current byte value or EOF */ if(repct > 0) { /* Expanding a repeated char */ --repct; return value; } else { /* Nothing unusual */ if((c = getuhuff()) != DLE) { /* It's not the special delimiter */ value = c; if(value == EOF) repct = LARGE; return value; } else { /* Special token */ if((repct = getuhuff()) == 0) /* DLE, zero represents DLE */ return DLE; else { /* Begin expanding repetition */ repct -= 2; /* 2nd time */ return value; } } } } /* Decode file stream into a byte level code with only * repetition encoding remaining. */ int getuhuff() { static int curin; /* last byte value read */ static int inch; /* Follow bit stream in tree to a leaf*/ inch = 0; /* Start at root of tree */ do { if(++bpos > 7) { if((curin = getc(in0)) == ERROR) return ERROR; bpos = 0; /* move a level deeper in tree */ inch = (dnode + inch)->children[1 & curin]; } else inch = (dnode + inch)->children[1 & (curin >>= 1)]; } while(inch >= 0); /* Decode fake node index to original data value */ inch = -(inch + 1); /* Decode special endfile token to normal EOF */ return (inch == SPEOF) ? EOF : inch; } /* get a 16-bit word from a file */ unsigned getw(fd) FILE *fd; { int ch, i; unsigned word; i = 2; /* 2 bytes per word */ word = 0; while (--i >= 0) { if ((ch = getc(fd)) == EOF) { fprintf(stderr, "Unexpected EOF\n"); exit(128); } word = (word >> 8) + (ch << 8); } return(word); } char *makebuf(size) int size; { static char *bufp; char *sbrk(); if ((bufp = sbrk(size)) == ERROR) { fprintf(stderr, "Buffer allocation error\n"); exit(128); } return(bufp); }