# /* Copyright (C) 1976 by the Board of Trustees of the University of Illinois All rights reserved NAME: lexi FUNCTION: This is the token scanner for indent ALGORITHM: 1) Strip off intervening blanks and/or tabs. 2) If it is an alphanumeric token, move it to the token buffer "token". Check if it is a special reserved word that indent will want to know about. 3) Non-alphanumeric tokens are handled with a big switch statement. A flag is kept to remember if the last token was a "unary delimiter", which forces a following operator to be unary as opposed to binary. PARAMETERS: None RETURNS: An integer code indicating the type of token scanned. GLOBALS: buf_ptr = had_eof last_u_d = Set to true iff this token is a "unary delimiter" CALLS: fill_buffer printf (lib) CALLED BY: main NOTES: Start of comment is passed back so that the comment can be scanned by pr_comment. Strings and character literals are returned just like identifiers. HISTORY: initial coding November 1976 D A Willcox of CAC 1/7/77 D A Willcox of CAC Fix to provide proper handling of "int a -1;" */ /* Here we have the token scanner for indent. It scans off one token and puts it in the global variable "token". It returns a code, indicating the type of token scanned. */ #include "indntglo.h"; #include "indntcod.h"; #define alphanum 1 #define opchar 3 struct templ { char *rwd; int rwcode; }; struct templ specials[] = { "switch", 1, "case", 2, "struct", 3, "default", 2, "int", 4, "char", 4, "float", 4, "double", 4, "long", 4, "short", 4, "typdef", 4, "unsigned", 4, "register", 4, "static", 4, "global", 4, "extern", 4, "if", 5, "while", 5, "for", 5, "else", 6, "do", 6, "sizeof", 0, 0, 0 }; char chartype[128] = { /* this is used to facilitate the decision of what type (alphanumeric, operator) each character is */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 3, 3, 0, 0, 0, 3, 3, 0, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3, 0, 3, 0 }; int last_nl = true; /* this is true if the last thing scanned was a newline */ int lexi () { register char *tok; /* local pointer to next char in token */ register int i; /* local loop counter */ register char *j; /* used for searching thru list of reserved words */ int unary_delim; /* this is set to 1 if the current token forces a following operator to be unary */ static int last_code; /* the last token type returned */ static int l_struct; /* set to 1 if the last token was 'struct' */ int found_it; int code; /* internal code to be returned */ char qchar; /* the delimiter character for a string */ tok = token; /* point to start of place to save token */ unary_delim = false; col_1 = last_nl; /* tell world that this token started in column 1 iff the last thing scanned was nl */ last_nl = false; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ col_1 = false; /* leading blanks imply token is not in column 1 */ if (++buf_ptr >= buf_end) fill_buffer (); } /*----------------------------------------------------------*\ | Scan an alphanumeric token \*----------------------------------------------------------*/ if (chartype[*buf_ptr & 0177] == alphanum) { register char c; /* we have a character or number */ while (chartype[c = *buf_ptr & 0177] == alphanum || c=='-' && tok[-1]=='e' && ('0'<=token[0]||token[0]<='9')) { /* copy it over */ *tok++ = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer (); } *tok++ = '\0'; if (l_struct) { /* if last token was 'struct', then this token should be treated as a declaration */ l_struct = false; last_code = ident; last_u_d = true; return (decl); } last_u_d = false; /* operator after indentifier is binary */ for (i = 0; specials[i].rwd != 0; ++i) { /* this loop will check if the token is a keyword. if so, a following operator is unary */ last_code = ident; /* remember that this is the code we will return */ j = specials[i].rwd; /* point at ith reserved word */ tok = token; /* point at scanned toekn */ found_it = true; /* set to false if not found */ do { if (*tok++ != *j) { found_it = false; break; } } while (*j++); if (found_it) { /* we have a keyword */ last_u_d = true; switch (specials[i].rwcode) { case 1: /* it is a switch */ return (swstmt); case 2: /* a case or default */ return (casestmt); case 3: /* a "struct" */ l_struct = true; /* Next time around, we will want to know that we have had a 'struct' */ case 4: /* one of the declaration keywords */ if(p_l_follow) break; /* inside parens: cast */ last_code = decl; return (decl); case 5: /* if, while, for */ return (sp_paren); case 6: /* do, else */ return (sp_nparen); default: /* all others are treated like any other identifier */ return (ident); } /* end of switch */ } /* end of if (found_it) */ } if (last_code == decl) /* if this is a declared variable, then following sign is unary */ last_u_d = true; /* will make "int a -1" work */ last_code = ident; return (ident); /* the ident is not in the list */ } /* end of procesing for alpanum character */ /*----------------------------------------------------------*\ | Scan a non-alphanumeric token \*----------------------------------------------------------*/ *tok++ = *buf_ptr; /* if it is only a one-character token, it is moved here */ *tok = '\0'; if (++buf_ptr >= buf_end) fill_buffer (); switch (*token) { case '\n': unary_delim = last_u_d; last_nl = true; /* remember that we just had a newline */ code = (had_eof ? 0 : newline); /* if data has been exausted, the newline is a dummy, and we should return code to stop */ break; case '\'': /* start of quoted character */ qchar = '\''; /* remember final delimiter */ goto copy_lit; /* and go to common literal code */ case '"': /* start of string */ qchar = '"'; copy_lit: do { /* copy the string */ while (1) { /* move one character or [/] */ if (*buf_ptr == '\n') { /* check for unterminated literal */ printf ("%d: Unterminated literal\n", line_no); goto stop_lit; /* Don't copy any more */ } *tok = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer (); if (had_eof || ((tok - token) > (bufsize - 2))) { printf ("Unterminated literal\n"); ++tok; goto stop_lit; /* get outof literal copying loop */ } if (*tok == '\\') { /* if escape, copy extra char */ if (*buf_ptr == '\n') /* check for escaped newline */ ++line_no; *(++tok) = *buf_ptr++; ++tok; /* we must increment this again because we copied two chars */ if (buf_ptr >= buf_end) fill_buffer (); } else break; /* we copied one character */ } /* end of while (1) */ } while (*tok++ != qchar); stop_lit: code = ident; break; case ('('): case ('['): unary_delim = true; code = lparen; break; case (')'): case (']'): code = rparen; break; case '#': unary_delim = last_u_d; code = preesc; break; case '?': unary_delim = true; code = question; break; case (':'): code = colon; unary_delim = true; break; case (';'): unary_delim = true; code = semicolon; break; case ('{'): unary_delim = true; code = lbrace; break; case ('}'): unary_delim = true; code = rbrace; break; case 014: /* a form feed */ unary_delim = last_u_d; last_nl = true; /* remember this so we can set 'col_1' right */ code = form_feed; break; case (','): unary_delim = true; code = comma; break; case '.': unary_delim = false; code = period; break; case '-': case '+': /* check for -, +, --, ++ */ code = (last_u_d ? unary_op : binary_op); unary_delim = true; if (*buf_ptr == token[0]) { /* check for doubled character */ *tok++ = *buf_ptr++; /* buffer overflow will be checked at end of loop */ if (last_code == ident || last_code == rparen) { code = (last_u_d ? unary_op : postop); /* check for following ++ or -- */ unary_delim = false; } } else if (*buf_ptr == '>' || *buf_ptr == '=') /* check for operator -> or += */ *tok++ = *buf_ptr++; /* buffer overflow will be checked at end of switch */ break; case '=': if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ *tok++ = *buf_ptr; /* move second character */ if (++buf_ptr >= buf_end) fill_buffer (); } code = binary_op; unary_delim = true; if (token[1] != '<' && token[1] != '>') /* check for possible 3 char operator */ break; /* can drop thru!!! */ case '>': case '<': case '!': /* ops like <, <<, <=, !=, etc */ if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { *tok++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer (); } if (*buf_ptr == '=') *tok++ = *buf_ptr++; code = (last_u_d ? unary_op : binary_op); unary_delim = true; break; default: if (token[0] == '/' && *buf_ptr == '*') { /* it is start of comment */ *tok++ = '*'; if (++buf_ptr >= buf_end) fill_buffer (); code = comment; unary_delim = last_u_d; break; } while (*(tok - 1) == *buf_ptr || *buf_ptr=='=') { /* handle ||, &&, etc, and also things as in int *****i */ *tok++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer (); } code = (last_u_d ? unary_op : binary_op); unary_delim = true; } /* end of switch */ if (code != newline) { l_struct = false; last_code = code; } if (buf_ptr >= buf_end) /* check for input buffer empty */ fill_buffer (); last_u_d = unary_delim; *tok = '\0'; /* null terminate the token */ return (code); };