pagesize 86 include bufseg.a86 ;buffer definitions data dseg word public NULL equ 0 CR equ 0dh LF equ 0ah LINENEW equ 0a0dh CHR equ 'C' BOL equ '^' EOL equ '$' ANY equ '?' CCL equ '[' ECCL equ ']' NCCL equ '~' EOS equ '.' CLOSURE equ '*' CLOSIZE equ 1 extrn outpat: byte extrn OUTPATSIZE: abs extrn inpat: byte extrn INPATSIZE: abs inpatlen rw 1 direction rw 1 slow_search rb 1 ;=1 if we need to process magic chars. extrn textseg: word ;data ends code cseg byte public ;*** assume cs:code, ds:data public slowly, forward, backward extrn get_mark: near, set_mark_si: near public search search: ;enter with ch=start mark, cl=end mark, dh=first mark, dl=last mark. ;start searching at mark ch. If the string is found, then return the ; beginning in mark dh, and the end in mark dl, and cy=0. If the string ; wasn't found, return cy=1. push dx ;save the first, last marks. push es mov es,textseg ;*** assume es:bufseg push ds ;save ds push es pop ds ;*** assume ds:bufseg ;for get_mark mov al,cl ;get the end mark. push cx call get_mark mov di,si ;save a copy of the end. pop cx mov al,ch ;get the start mark. call get_mark pop ds ;restore ds ;*** assume ds:data mov direction,0 cmp si,di ;start>=end? jb search_4 ;no. (doesn't matter if they're equal) mov direction,1 ;yes, go in reverse direction. search_4: cmp slow_search,0 ;must we do a slow search? jne search_2 ;yes. cmp direction,0 ;search in proper direction. jne search_5 call forward jmp search_3 search_5: call backward jmp search_3 search_2: call slowly search_3: pop es ;*** assume es:data pop dx jc search_1 ;not found. push ds mov ds,textseg ;for set_mark_si ;*** assume ds:bufseg push bx ;preserve pointer to end. mov al,dh call set_mark_si ;set the first mark. pop si ;pushed as bx. mov al,dl call set_mark_si ;set the last mark. pop ds ;*** assume ds:data clc ;return a match. ret search_1: stc ;return no match. ret public regexp_pat regexp_pat: ;enter with si, cx->pattern. ;exit with cy=1 if error. cmp cx,INPATSIZE ;too big? jae regexp_5 ;yes - return cy. mov di,offset inpat rep movsb mov al,NULL ;store the terminating null. stosb call makepat jc regexp_4 ;if any error, quit now. ;If outpat has only CHRs, we can use the fast search. Fix up inpat so it ; has the real string. We must do this because of the '\' escape char. mov si,offset outpat mov di,offset inpat mov bp,di ;make a copy of di. regexp_1: lodsb cmp al,EOS ;if we got to EOS, all is ok. je regexp_2 cmp al,CHR ;CHR? jne regexp_3 ;no - can't use fast search. movsb ;move the char to inpat. jmp regexp_1 regexp_2: sub di,bp ;subtract the base of the string. mov inpatlen,di ;save the real length. mov slow_search,0 clc jmps regexp_4 regexp_3: mov slow_search,1 clc regexp_4: ret regexp_5: stc ret public literal_pattern literal_pattern: ;enter with si, cx->pattern. ;exit with cy=1 if error. cmp cx,INPATSIZE ;too big? jae literal_3 ;yes - return cy. push si push cx mov inpatlen,cx mov di,offset inpat rep movsb pop cx pop si mov di,offset outpat jcxz literal_1 literal_2: mov ax,offset outpat-2 add ax,OUTPATSIZE cmp di,ax ;do we have enough room? jae literal_3 ;no - quit now. mov al,CHR stosb movsb loop literal_2 literal_1: mov al,EOS ;store the end of string. stosb mov slow_search,0 ;we always use a fast search. clc ret literal_3: stc ret ;*** assume ds:data, es:bufseg slowly: ;es:si -> first char to look at. ;es:di -> after last char to look at. ;return cy if no match, else nc, si->start of match, bx->after end of match. slowly_0: cmp si,es:topbot ;at topbot already? jne slowly_1 mov si,es:bottop slowly_1: cmp si,di ;at the end yet? stc ;assume not found. je slowly_3 ;yes - not found. push di mov di,offset outpat ;start at beginning of pattern. call amatch pop di ;restore current, end. jnc slowly_3 ;we found a match cmp direction,0 ;forwards or backwards? jne slowly_2 ;backwards. inc si cmp si,es:topbot ;at bottom of top? jne slowly_5 ;yes - load top of bottom. mov si,es:bottop jmp slowly_1 slowly_5: cmp es:word ptr 0ffffh[si],LINENEW ;at newline? jne slowly_1 ;no. inc si ;yes - skip LF part of newline. jmp slowly_0 slowly_2: cmp si,es:bottop ;at top of bottom? jne slowly_4 ;no. mov si,es:topbot ;yes - load bottom of top. slowly_4: dec si ;back up to previous character. cmp si,es:bottop ;at top of bottom now? je slowly_1 ;yes - can't possibly be split over newline. cmp es:word ptr 0ffffh[si],LINENEW ;at newline? jne slowly_1 ;no. dec si ;yes - skip to beginning of newline. jmp slowly_1 slowly_3: ret forward: ;es:si -> first char to look at. ;es:di -> after last char to look at. ;return cy if no match, else nc, si->start of match, bx->after end of match. cmp si,es:topbot ;is start in bottom? jne forward_8 ;yes - no need to split. mov si,es:bottop forward_8: cmp di,es:bottop ;is finish in top? jne forward_9 ;yes - no need to split. mov di,es:topbot forward_9: cmp si,es:bottop ;is start in bottom? jae forward_1 ;yes - no need to split. cmp di,es:topbot ;is finish in top? jbe forward_1 ;yes - no need to split. push di mov di,es:topbot call forward ;recursively search top mov ax,di pop di jnc forward_2 ;we found it - exit. mov si,ax ;start where forward left off. push di mov di,es:bottop ;and end where it will begin again. call slowly pop di jnc forward_2 ;they found it - exit. mov si,es:bottop ;no need to save the old si. call forward jmps forward_2 ;in any case, exit. forward_1: mov cx,di ;count the number of chars to look at. sub cx,si mov di,si ;prepare for scasb. mov bx,inpatlen dec bx sub cx,bx ;this many fewer chars to look at. jb forward_5 ;string is shorter than search. forward_3: jcxz forward_5 ;no chars to look at. mov si,offset inpat lodsb ;get the first char. forward_4: scasb ;look for the first char. loopnz forward_4 ;keep looking until we find it. jnz forward_5 ;we didn't xchg cx,bx ;set the count to the string length. push cx ;save the string length push di ;save the source position repe cmpsb ;is this it? mov cx,bx ;restore the search length pop di ;restore the source position pop bx ;restore the string length jne forward_3 ;no match - try at next position. cmp inpat,LF ;are we searching for an LF first string? jne forward_6 ;no - don't worry. cmp es:byte ptr 0fffeh[di],CR ;did we just match the LF part of a newline? je forward_3 ;yes - no match. forward_6: cmp inpat[bx],CR ;are we searching for a CR last string? jne forward_7 ;no - don't worry. cmp es:byte ptr [di+bx],LF ;did we just match the CR part of a newline? je forward_3 ;yes - no match. forward_7: mov si,di ;get the source position add bx,si ;add it to the count to get the end. dec si ;make it point to the first char again. clc jmps forward_2 forward_5: stc forward_2: ret backward: ;es:si -> first char to look at. ;es:di -> after last char to look at. ;return cy if no match, else nc, si->start of match, bx->after end of match. cmp si,es:bottop ;moving backwards, adjust topbot. jne backward_8 mov si,es:topbot backward_8: cmp di,es:bottop ;moving backwards, adjust topbot. jne backward_9 mov di,es:topbot backward_9: cmp di,es:topbot ;is finish in bottom? ja backward_1 ;yes - no need to split. cmp si,es:bottop ;is start in top? jb backward_1 ;yes - no need to split. push di mov di,es:bottop call backward ;recursively search top mov ax,di pop di jnc backward_2 ;we found it - exit. mov si,ax ;start where backward left off. push di mov di,es:topbot ;and end where backward will begin again. call slowly pop di jnc backward_2 ;they found it - exit. mov si,es:topbot ;no need to save the old si. call backward jmps backward_2 ;in any case, exit. backward_1: mov cx,si ;count the number of chars to look at. sub cx,di mov di,si ;prepare for scasb. mov bx,inpatlen dec bx sub cx,bx ;this many fewer chars to look at. jb backward_5 ;string is shorter than search. sub di,bx ;back up that many chars. add di,2-1 ;pre-increment for loop and size. backward_3: sub di,2 ;restore next char. jcxz backward_5 ;no chars to look at. mov si,offset inpat lodsb ;get the first char. std ;now scan backwards. backward_4: scasb ;look for the first char. loopnz backward_4 ;keep looking until we find it. cld ;now compare, etc. forwards. jnz backward_5 ;we didn't find it. add di,2 ;go forwards to the next char. xor al,al ;in case string length-1=0. xchg cx,bx ;set the count to the string length. push cx ;save the string length push di ;save the source position repe cmpsb ;is this it? mov cx,bx ;restore the search length pop di ;restore the source position pop bx ;restore the string length jne backward_3 ;no match - try at next position. cmp inpat,LF ;are we searching for an LF first string? jne backward_6 ;no - don't worry. cmp es:byte ptr 0fffeh[di],CR ;did we just match the LF part of a newline? je backward_3 ;yes - no match. backward_6: cmp inpat[bx],CR ;are we searching for a CR last string? jne backward_7 ;no - don't worry. cmp es:byte ptr [di+bx],LF ;did we just match the CR part of a newline? je backward_3 ;yes - no match. backward_7: mov si,di ;get the source position add bx,si ;add it to the count to get the end. dec si ;make it point to the first char again. clc jmps backward_2 backward_5: stc backward_2: ret amatch: ;es:si -> source text ;ds:di -> pattern ;return cy if no match, else nc, bx->end of matching string push si ;preserve input pointers. push di amatch_1: mov al,[di] cmp al,EOS mov bx,si ;prepare to exit. je amatch_success cmp al,CLOSURE jne amatch_3 add di,CLOSIZE mov bx,si ;save the first closure pattern. ;match as many as we can amatch_4: call omatch jnc amatch_4 ;match only as many as fit the next pattern call patsiz add di,ax amatch_5: push bx call amatch ;try to match rest of pattern. pop ax ;conserve stack jnc amatch_success ;go if it matched. mov bx,ax ;restore bx. cmp si,es:bottop ;backing up past the point? jne amatch_8 ;no - just decrement. mov si,es:topbot ;yes - get the bottom of the top. amatch_8: dec si ;point to the previous character. cmp si,bx ;zero or more matches still? jae amatch_5 ;yes. stc jmps amatch_exit ;can't get rest of pattern to match. amatch_3: call omatch jc amatch_exit ;unsuccessful - exit. amatch_7: call patsiz add di,ax jmp amatch_1 amatch_success: clc amatch_exit: pop di ;restore input pointers. pop si ret omatch: ;omatch matches at most one character, and only if it returns true at ; omatch_yes. When we get to omatch_yes, we see if we are at the point. ;es:si -> source text ;ds:di -> pattern mov al,[di] cmp al,CHR jne omatch_1 mov al,es:[si] cmp al,1[di] jne omatch_no inc si jmp omatch_yes omatch_1: cmp al,BOL jne omatch_2 cmp si,es:bottop ;are we at the point? jne omatch_1_1 ;no. push si ;yes - have to look at the top. mov si,es:topbot cmp es:word ptr 0fffeh[si],LINENEW pop si jne omatch_no jmp omatch_yes omatch_1_1: cmp es:word ptr 0fffeh[si],LINENEW jne omatch_no jmp omatch_yes omatch_2: cmp al,EOL jne omatch_3 cmp es:word ptr [si],LINENEW jne omatch_no jmp omatch_yes omatch_3: cmp al,ANY jne omatch_4 cmp es:word ptr [si],LINENEW je omatch_no inc si jmp omatch_yes omatch_4: cmp al,CCL jne omatch_5 cmp es:word ptr [si],LINENEW je omatch_no call locate jc omatch_no inc si jmp omatch_yes omatch_5: cmp al,NCCL jne omatch_6 cmp es:word ptr [si],LINENEW je omatch_no call locate jnc omatch_no inc si jmp omatch_yes omatch_6: ;error jmp omatch_no omatch_no: stc ret omatch_yes: cmp si,es:topbot ;at bottom of top? jne omatch_yes_1 mov si,es:bottop ;yes, go to top of bottom. omatch_yes_1: clc ret locate: ;es:si -> search string ;ds:di -> CCL ;exit with cy=0 if found. push cx mov cl,1[di] ;get the count. mov ch,0 mov al,es:[si] push es ;save es, di push di push ds ;outpat is in ds pop es add di,2 ;di now -> characters. repne scasb pop di ;restore es,di pop es pop cx jne locate_1 clc ret locate_1: stc ret patsiz: ;enter ds:di -> pat mov al,[di] cmp al,CHR jne patsiz_1 mov ax,2 ret patsiz_1: cmp al,CLOSURE jne patsiz_2 mov ax,CLOSIZE ret patsiz_2: cmp al,BOL je patsiz_3 cmp al,EOL je patsiz_3 cmp al,ANY jne patsiz_4 patsiz_3: mov ax,1 ret patsiz_4: cmp al,CCL je patsiz_5 cmp al,NCCL jne patsiz_6 patsiz_5: mov al,1[di] mov ah,0 add ax,2 ret patsiz_6: ;error ret ;*** assume ds:data, ds:data public eol_only eol_only: ;return zr if the search pattern matches eol only. ; we need this routine because search and replace should advance past the ; newline if we're matching eol only. cmp word ptr outpat,EOS*256 + EOL ret makepat: ;si -> source pat (null terminated) ;di -> dest pattern, dx -> last dest entry. ;bx -> last closure ;return cy=1 if error. mov si,0 mov di,0 mov dx,OUTPATSIZE mov bx,-1 makepat_1: cmp inpat[si],NULL je makepat_0 push di mov al,inpat[si] cmp al,'\' ;are we escaping something? jne makepat_a cmp inpat+1[si],NULL ;is the '\' at the end? je makepat_9 ;yes - just use \. inc si mov al,inpat[si] ;get the escaped char. jmp makepat_9 ;go stick it in. makepat_a: cmp al,ANY jne makepat_3 call addset jmp makepat_2 ;this really belongs at the end of makepat, but the short jump can't get there. makepat_0: mov al,EOS call addset cmp di,dx jne makepat__0_1 stc ret makepat__0_1: clc ret makepat_3: cmp al,BOL jne makepat_7 cmp si,0 jne makepat_6 call addset jmp makepat_2 makepat_6: call addchar jmp makepat_2 makepat_7: cmp al,EOL jne makepat_8 cmp inpat+1[si],NULL jne makepat_9 call addset jmp makepat_2 makepat_9: call addchar jmp makepat_2 makepat_8: cmp al,CCL jne makepat_10 call getccl jnc makepat_2 pop di stc ret makepat_10: cmp al,CLOSURE jne makepat_11 cmp bx,0 ;is bx>0? jnge makepat_12 ;no - not closure. mov al,outpat[bx] cmp al,CLOSURE ;trying to close a closure? je makepat_12 ;yes - not closure. cmp al,BOL ;trying to close a beginning of line? je makepat_12 ;yes - not closure. call stclos add sp,2 ;throw away the old previous. push bx jmp makepat_2 makepat_11: cmp al,NCCL jne makepat_13 cmp inpat+1[si],NULL ;not special at the end. je makepat_13 mov al,NCCL call addset mov al,1 ;one character follows. call addbyte mov al,inpat+1[si] call addbyte inc si ;skip the NCCL. jmp makepat_2 makepat_13: makepat_12: call addchar jmp makepat_2 makepat_2: pop bx inc si jmp makepat_1 addchar: ;al = CHR to put. push ax mov al,CHR call addset pop ax call addbyte ret addset: ;only command chars call addset. addbyte: ;al = char to put, di->dest, dx->end of dest. cmp di,dx je addbyte_1 mov outpat[di],al inc di addbyte_1: ret stclos: ;si->last set added + 1 ;bx-> last closure added push di stclos_1: dec di mov al,outpat[di] mov outpat+CLOSIZE[di],al cmp di,bx jne stclos_1 stclos_2: mov outpat[bx],CLOSURE pop di add di,CLOSIZE ret getccl: ;si -> source (null terminated) ;di -> dest, dx -> end of dest ;return cy=1 if error. inc si mov al,inpat[si] cmp al,NCCL jne getccl_1 call addset inc si jmp getccl_2 getccl_1: mov al,CCL call addset getccl_2: push bx mov bx,di call addbyte ;leave room for count call dodash mov ax,di sub ax,bx dec ax mov outpat[bx],al pop bx cmp inpat[si],ECCL je getccl_3 stc ret getccl_3: clc ret dodash: ;si -> source pattern (null terminated) ;di -> destination pattern ;dx -> end of destination pattern push bx mov bx,si dodash_1: mov al,inpat[si] or al,al je dodash_2 cmp al,ECCL je dodash_2 cmp al,'-' je dodash_4 call addbyte jmp dodash_8 dodash_4: cmp si,bx ;at beginning? je dodash_5 cmp inpat[si],NULL ;or at end? jne dodash_6 dodash_5: mov al,'-' ;if at beginning or at end, just a '-' call addbyte jmp dodash_8 dodash_6: mov al,inpat-1[si] cmp al,inpat+1[si] ja dodash_5 call alphanumeric jnc dodash_5 mov al,inpat+1[si] call alphanumeric jnc dodash_5 mov al,inpat-1[si] dodash_7: inc al ;pre-increment -- the first one's there. cmp al,inpat+1[si] ja dodash_9 call addbyte jmp dodash_7 dodash_9: inc si dodash_8: inc si jmp dodash_1 dodash_2: pop bx ret alphanumeric: ;return cy=1 if al is alphanumeric cmp al,'0' jb alphanumeric_1 cmp al,'9' jbe alphanumeric_2 cmp al,'A' jb alphanumeric_1 cmp al,'Z' jbe alphanumeric_2 cmp al,'a' jb alphanumeric_1 cmp al,'z' jbe alphanumeric_2 alphanumeric_1: clc ret alphanumeric_2: stc ret ;code ends end