/* * TR.C - H. Moran - Written long ago, Cleaned up for public viewing 10/9/83 * * tr - transliterate characters * * This is a translation (not a re-rwite) of the RATFOR program * in "Software Tools". As a result of the fact that FORTRAN * which underlies RATFOR uses a call by address and C uses a * call by value, this program contains some peculiar looking C code. * * * to build: * cc tr -o -e 3000 * l2 tr dio * to use: * tr map_from [map_to] [outfile] * or * tr * * will give instructions * * 'map_from' and 'map_to' are ordered character groupings. Any characters * in the translated file which are in the set of 'map_from' will be replaced * by the corresponding character in the set of 'map_to'. * 'map_to' may be shorter than 'map_from' in which case any characters * from the set of 'map_from' which have no counterpart in 'map_to' will be * replaced by the last member from 'map_to'. * 'map_to' may be empty, in which case any characters in the set 'map_from' * will be deleted in the translation process. * * * \t represents TAB * \n represents NEWLINE * \s represents SPACE * \\ represents \ * \ua represents A * a-d represents abcd * ~ represents "everything but" (if it is the first char) * * examples: * * tr a-z \ua-\uz out * Copies file "in" to file "out" translating everything but letters and digits * to new lines. This means that each "identifier" and decimal constant * ends up being on a line of its own, which is a large part of the job * of creating a glossary keyword list. See programs SORTLEX.C and UNIQ.C * for aid in performing the rest of such a job. */ #include "bdscio.h" #include "dio.h" #define QUOTE '\\' #define INVERT '~' #define ELIDE '-' #define END_TEXT 0x1a #define END_FILE -1 #define FOREVER for(;;) /* "infinite" loop */ #define PROC int /* PROCedure i.e. function returning no value */ #define BOOL int /* BOOLean i.e. YES or NO */ #define YES 1 #define NO 0 #define MAXSET 100 main(argc, argv) int argc; char *argv[]; { int c; char map_from[MAXSET], map_to[MAXSET], *p; BOOL allbut, collap; int i, last_map_to; dioinit(&argc, argv); if ( argc < 2 ) { usage(); exit(1); } allbut = (argv[1][0] == INVERT); if ( allbut ) /* skip the INVERT mark */ ++argv[1]; strlower(argv[1]); /* make argv[1] lower case */ strlower(argv[2]); /* make argv[2] lower case */ if ( makset(argv[1], 0, map_from, MAXSET) == NO ) { printf("\nfrom: too large.\n"); exit(1); } if ( argc < 3 ) *map_to = '\0'; else if ( makset(argv[2], 0, map_to, MAXSET) == NO ) { printf("\nto: too large.\n"); exit(1); } last_map_to = strlen(map_to) - 1; collap = (allbut || ((strlen(map_from)-1) > last_map_to)); FOREVER { c = getchar(); if ( c == END_TEXT || c == END_FILE ) break; i = xindex(map_from, c, allbut, last_map_to); if ( collap && (i >= last_map_to) && (last_map_to >=0) ) { /* collapse */ putchar(map_to[last_map_to]); do { i = xindex(map_from, (c=getchar()), allbut, last_map_to); if ( c == END_TEXT || c == END_FILE ) goto breakout; } while ( i >= last_map_to ); } if ( i >=0 && last_map_to >= 0) /* translate */ putchar(map_to[i]); else if ( i < 0 ) /* copy */ putchar(c); else /* delete */ ; } breakout: dioflush(); } /* * makset - make set from array[k] in set */ BOOL makset(array, k, set, size) char array[], set[]; int k, size; { int junk1; junk1 = 0; filset('\0', array, &k, set, &junk1, size); return (addset('\0', set, &junk1, size)); } /* * filset - expand set at array[i] into set[j], stop at delim */ PROC filset(delim, array, i, set, j, maxset) char delim, array[], set[]; int *i, *j, maxset; { char *digits; digits = "0123456789"; char *lowalf; lowalf = "abcdefghijklmnopqrstuvwxyz"; char *upalf; upalf = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; for ( ; array[*i] != delim && array[*i] != '\0'; ++*i ) switch (array[*i]) { case QUOTE: addset(quote(array, i), set, j, maxset); break; case ELIDE: /* QUOTEd ELIDE i.e. literal */ if (*j <= 0 || array[*i+1] == '\0') addset(ELIDE, set, *j, maxset); else if ( isdigit(set[*j-1]) ) do_elide(digits, array, i, set, j, maxset); else if ( islower( set[*j-1]) ) do_elide(lowalf, array, i, set, j, maxset); else if ( isupper(set[*j-1]) ) do_elide(upalf, array, i, set, j, maxset); else addset(ELIDE, set, j, maxset); break; default: addset(array[*i], set, j, maxset); break; } } /* * addset - put c in string[j] if it fits, increment j */ BOOL addset (c, str, j, maxsiz) char c, str[]; int maxsiz; int *j; { if ( *j > maxsiz ) return (NO); str[*j] = c; *j += 1; return (YES); } /* * do_elide - expand array[i-1]-array[i+1] into set[j]... from valid */ PROC do_elide(valid, array, i, set, j, maxset) char valid[], array[], set[]; int *i, *j, maxset; { int k, limit; *i +=1; *j -=1; limit = index(valid, quote(array, i)); for ( k = index(valid, set[*j]); k <= limit; ++k ) addset(valid[k], set, j, maxset); } /* * xindex - invert condition returned by index */ xindex(array, c, allbut, last_map_to) char array[], c; BOOL allbut; int last_map_to; { int debug_tmp; if ( c == END_TEXT ) return (-1); else if ( ! allbut ) { return (index(array, c)); } else if (index(array, c) >= 0) { return (-1); } else return (last_map_to + 1); } /* * index -- determine index of char in array * -- return -1 if not found */ index(array, c) char array[], c; { int i; for ( i = 0; array[i]; ++i ) if ( c == array[i] ) return (i); return (-1); } /* * quote - map array[i] into quoted character if appropriate */ char quote(array, i) char array[]; int *i; { if ( array[*i] != QUOTE ) return (array[*i]); else if ( array[*i+1] == '\0' ) /* QUOTE not special at end */ return (QUOTE); else { *i +=1; if ( tolower(array[*i]) == 'n' ) return ('\n'); else if ( tolower(array[*i]) == 't' ) return ('\t'); else if ( tolower(array[*i]) == 's' ) return (' '); else if ( tolower(array[*i]) == 'u' ) { if ( array[*i+1] == '\0' ) return (array[*i]); else { *i += 1; return (toupper(array[*i])); } } else return (array[*i]); } } /* * usage() - explain the usage of this program */ PROC usage() { printf("\nusage:\n\ttr map_from map_to.\n\n"); printf("'map_from' and 'map_to' are ordered character groupings.\n"); printf("Any characters in the translated file which are in the\n"); printf("set of 'map_from' will be replaced by the corresponding\n"); printf("character in the set of 'map_to'. 'map_to' may be shorter\n"); printf("than 'map_from' in which case any characters from the set\n"); printf("of 'map_from' which have no counterpart in 'map_to' will\n"); printf("be replaced by the last member from 'map_to'.\n"); printf("'map_to' may be empty, in which case, any characters in"); printf(" 'map_from' will\n"); printf("be deleted in the translation process\n\n"); printf("\t%ct\trepresents TAB\n", QUOTE); printf("\t%cn\trepresents NEWLINE\n", QUOTE); printf("\t%cs\trepresents SPACE\n", QUOTE); printf("\t%c%c\trepresents %c\n", QUOTE, QUOTE, QUOTE); printf("\t%cua\trepresents A\n", QUOTE); printf("\ta%cd\trepresents abcd\n", ELIDE); printf("\t%c\trepresents \"everything but\" (if the first char)\n", INVERT); printf("\t%c%c\trepresents %c\n\n", QUOTE, INVERT, INVERT); printf("\nEXAMPLES:\n\n"); printf("\ttr a%cz %cua%c%cuz out\n\n", INVERT, ELIDE, QUOTE, ELIDE, QUOTE, ELIDE, QUOTE); printf("copies file \"in\" to file \"out\" translating everything\n"); printf("but letters and digits to new lines\n"); } /* * strlower - convert a string to lower case and return a pointer to it */ char * strlower(s) char *s; { char *p; for ( p = s; *p; ++p ) *p = tolower(*p); return (s); }