#include <xfbuf.h>
#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <process.h>
#include <errno.h>
#include <ascii.h>

char *cpyqarg();
char *next_arg();
char *skip_delim();
char *getmem();
long sizmem();

/* Dos 2.00 handles */

/* Mass editor

munge [switches] "s1" pathspec

munge [switches] "s1" "s2" pathspec


	Go through all files in pathspec, and replace all occurences of
s1 with s2. A sliding string match is done; case is not signifigant 
unless /C is present. (Case is preserved on s2 always.)

	If only s1 is specified, then all occurences of s1 are listed
along with the line number; the files are read only.

	Filespec can be a full DOS path, and may contain a wildcard.

*/

#define WORKFILE "munge$$$.$$$"

int _stack = 4000;
long occurence;			/* times we found one */
char *membuf;
long memsize;

/* Upper to lower case plus 8th bit stripping table */

ult[] = {
	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
	17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
	' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
	'0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
	'@','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
	'p','q','r','s','t','u','v','w','x','y','z','[','\\',']','^','_',
	'`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
	'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~',127,

	128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
	144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
	160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
	176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
	192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
	208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
	224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
	240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
};
main(argc,argv,cl)	/* NOTE: Requires SPECIAL TINYMAIN() !!!!!! */
int argc;
char *argv[];
char cl[];		/* NOTE: Requires SPECIAL TINYMAIN() !!!!!! */
{
char buff[80];			/* G. P. buffer */
char pat[80];			/* pattern to find, */
char rep[80];			/* and one to replace with */
char switches[20];		/* switches from command line, */
char source_path[80];		/* files to search, */
char path_name[80];		/* path stripped from pathspec, */
char found_name[14];		/* found filename from search first/next, */
char source_name[80];		/* full filename to open and search, */
char work_name[80];		/* temp file name */
char dolineno;			/* 1 == print line numbers */
char igcase;			/* 1 == ignore case, */
char vanilla;			/* 1 == force vanilla text */
char quiet;			/* 1 == dont display progress */
char edit;			/* true to do the editing */
char errflg;			/* command line error */
int files;			/* files processed */
int outfile;			/* open file if any */
int f,i;
int input,output;		/* input and output file handles */
struct _xfbuf xfbuf;
char *cp;

	output= -1;
	igcase= 1;
	vanilla= 0;
	quiet= 0;
	occurence= 0L;
	edit= 0;
	*pat= '\0';
	*rep= '\0';
	*source_path= '\0';
	outfile= -1;

/* NOTE: This parses the command line directly, as passed by the special
tinymain() function, so that it can pick up quoted arguments with spaces, etc */

	cl= next_arg(cl);			/* skip program name */
	cp= "";					/* no error message */
	while (*cl && !*cp) {
		cl= skip_delim(cl);			/* skip leading crud */
		if (*cl == '"') {			/* if a quoted argument, */
			cl= cpyqarg(source_name,cl);	/* copy/process, */
			expand(work_name,source_name);	/* expand macros */

			if (! *pat) strcpy(pat,work_name);
			else {
				strcpy(rep,work_name);	/* (allows null replacements) */
				edit= 1;
			}
		} else {
			if (! *source_path) cpyarg(source_path,cl);
			cpyarg(buff,cl);		/* in case needed by options */
			strip_switch(switches,cl);	/* check for switches */
			cl= next_arg(cl);

			i= 0;
			while (switches[i]) {
				switch(switches[i++]) {
					case 'C': igcase= 0; break;
					case 'V': vanilla= 0; break;
					case 'Q': quiet= 1; break;
					case 'O': 
						if (strcmp(buff,source_path) == 0) {
							cprintf("Please use different names for input and output files!\r\n");
							exit(1);
						}
						outfile= open(buff,2);
						if (outfile == -1) outfile= creat(buff,2);
						else lseek(outfile,0L,2);
						break;

					default: cp= "I have no idea what option switch that is!"; break;
				}
			}
		}
	}
	if (*cp || !*pat || !*source_path) {	/* somebody fucked up */
		if (*cp) cprintf("\r\nERROR: %s\r\n",cp);
		cputs("MUNGE -- Copyright Tom Jennings Aug 91\r\n");
		cputs("Fido Software, Box 77731, San Francisco CA 94107\r\n");
		cputs("\r\n");
		cputs("MUNGE \"pattern\" filespec                Displays lines in matching files that\r\n");
		cputs("                                        contain \"pattern\", or ...\r\n");
		cputs("MUNGE \"pattern\" \"replacement\" filespec  Replace all occurences of \"pattern\"\r\n");
		cputs("                                        with \"replacement\" in matching files\r\n");
		cputs("\r\n");
		cputs("You can enter control codes in \"pattern\" or \"replacement\" by using \"^A\" for \r\n");
		cputs("Control-A (1 decimal), \"^X\" for Control-X (24 decimal), or \"\\194\" for character\r\n");
		cputs("code 194, etc.\r\n");
		cputs("\r\n");
		cputs("MUNGE \"pattern\" ... filespec/C            Case sensitive\r\n");
		cputs("MUNGE ... filename/O                      Output results to filename\r\n");
		cputs("MUNGE ... /Q                              Do not display on screen (faster)\r\n");
		cputs("\r\n");
		cputs("Option \"/C\" makes \"pattern\" case sensitive, ie. \"A\" is different from \"a\", etc.\r\n");
		cputs("\r\n");
		cputs("MUNGE \"abc\" *.DOC                       Displays lines containing \"abc\"\r\n");
		cputs("MUNGE \"abc\" \"DEF\" FILE.EXT              Changes all \"abc\"s to \"DEF\"s\r\n");
		cputs("MUNGE \"^M^J\" \"^M\" BIGFILE               Changes all CR/LFs to just CRs\r\n");
		cputs("\r\n");
		cputs("ERRORLEVEL returned is 1 for any error, else the number of occurences\r\n");
		cputs("of \"pattern\" + 1. (255 means 254 or more occurences.)\r\n");
		exit(1);
	}

	strip_path(path_name,source_path);	/* seperate the path, */

	fastfile(2);				/* two fast files */
	for (memsize= 65000; memsize > 1000; memsize -= 100) {
		membuf= (char *) malloc(memsize);/* get a big chunk */
		if (membuf != NULL) break;		/* big as possible */
	}
	if ((memsize < 5000) || (membuf == NULL)) { /* oops... */
		error("Not enough memory!");
	}

/* Compile the list of names to process; we scroll these out to disk, 
because working on them inline screws up the _find. */

	f= -1;						/* no work file yet */
	i= 0;						/* find() counter */
	files= 0;					/* number of matches */
	xfbuf.s_attrib= 0;				/* normal files only */
	while (_find(source_path,i,&xfbuf)) {
		++i;
		if (strcmp(xfbuf.name,WORKFILE) == 0) continue;
		++files;				/* found another, */
		strcpy(source_name,path_name);		/* put in the path, */
		strcat(source_name,xfbuf.name);		/* the file name, */
		strcat(source_name,"\r\n");		/* and a cr/lf for rline() */
		if (f == -1) f= creat(WORKFILE,2);	/* make output file if nec. */
		if (f == -1) error("Can't create work file %s\r\n",WORKFILE);
		write(f,source_name,strlen(source_name));
	}
	close(f);

	errflg= 0;					/* no error yet */
	f= open(WORKFILE,2);				/* now read 'em back */
	while (rline(f,source_name,sizeof(source_name))) {
		if (edit) {				/* if editing, */
			strcpy(work_name,source_name);	/* make work file name */
			chgext(work_name,".$$$");	/* FILENAME.$$$ */
			output= creat(work_name,2);
			if (output == -1) {
				printf("Error creating output file %s!\r\n",work_name);
				++errflg;
				break;
			}
		}
		input= open(source_name,0);		/* open for reading, */
		i= occurence;				/* see if it changes */

		errflg= ! subs(source_name,
		    igcase,edit,quiet,
		    input,output,outfile,
		    pat,rep);
		close(input);				/* close input, */
		if (edit) {				/* if doing editing, */
			close(output);			/* close output, */
			if (errflg) break;		/* stop if write error */

			if (i == occurence) {			/* if string not found */
				delete(work_name);		/* do not touch file */

			} else {				/* it changed, do it all */
				chgext(work_name,".bak");	/* backup file; */
				delete(work_name);		/* delete any old one, */
				rename(source_name,work_name);	/* rename orig to .BAK */
				chgext(work_name,".$$$");	/* make work name again */
				rename(work_name,source_name);	/* rename to orig name */
			}
			if (occurence != i) {
				printf("\"%s\" changed to \"%s\" ",pat,rep);
				times(occurence - i);
			}

		} else if (occurence != i) {
			printf("Found \"%s\" ",pat);
			times(occurence - i);
		}
	}
	close(f);
	if (outfile != -1) close(outfile);
	delete(WORKFILE);
	if (files > 1) printf("%,ld times in %,d files\r\n",occurence,files);

/* Return a result code indicating what happened. */

	if (errflg) {
		printf("Disk full! Everything comes to a grinding halt.\r\n");
		exit(1);
	}
	if (occurence > 254) occurence= 254;	/* make ERRORLEVEL code */
	f= occurence + 1;			/* 1 plus number of occurences */
	exit(f);
}

/* Display "N times" */

times (n)
long n;
{
	if (n == 1) printf("once\r\n");
	else if (n == 2) printf("twice\r\n");
	else printf("%,ld times\r\n",n);
}
/* Copy one argument from the line, return a pointer to the start
of the next arg on the line. */

char *cpyqarg(dp,sp)
char *dp,*sp;
{
char lastc,c,q;

	if (*sp == '"') q= *sp++; else q= NUL;		/* optional quote stripping */
	lastc= NUL;
	while (c= *sp) {
		++sp;				/* next ... */
		if ((c == q) && (lastc != '\\')) /* if the quote char & not quoted */
			break;			/* end of argument */
		if (!q && delim(c)) break;	/* else stop if a delimiter */
		*dp++= c;			/* else part of same arg */
		lastc= c;			/* remember last char */
	}
	*dp= NUL;

	return(sp);
}
/* Expand formatted control strings. */

expand(dp,sp)
char *dp,*sp;
{
char *cp,c;
int i;

	while (c= *sp++) {
		if (c == '^') {				/* control characters */
			c= *sp;				/* get the next char */
			if (c) ++sp;			/* (dont skip the NUL!) */
			c= toupper(c) & 0xbf;		/* convert it */

		} else if (c == '\\') {			/* if literal, */
			c= *sp;				/* get the next char */
			if (c) ++sp;			/* (dont skip the NUL!) */
			switch (tolower(c)) {
				case '0':
				case '1':
				case '2':
				case '3':
				case '4':
				case '5':
				case '6':
				case '7':
				case '8':
				case '9':
					c= atoi(--sp); 
					while (isdigit(*sp)) ++sp;
					break;

				case 'r': c= CR; break;
				case 'n': c= LF; break;
				case 'b': c= BS; break;
				case 'e': c= ESC; break;
				case 'c': c= ETX; break;

				default: break;		/* else leave literal as-is */
			}
		}
		if (c) *dp++= c;
	}
	*dp= NUL;
}

/* Report an error, then error exit. */

error(s)
char *s;
{
char buff[80];

	_spr(buff,&s);
	printf(buff);
	exit(1);
}
/* Change the extention of a filename to the one
passed. Strip off any existing one. */

chgext(fname,ext)
char *fname,*ext;
{
	while (*fname) {		/* find any existing extention */
		if (*fname == '.') break;
		++fname;		/* to strip it off */
	}
	strcpy(fname,ext);		/* force new extention */
}

/* Flying string substitution; return 1 if OK. Hard to read code: this
is coded for speed. indexes are used to chase partial strings around the
ring buffer. Indexes are advanced as pattern matching progresses, and
reset when a mismatch occurs. */

subs(source_name,igcase,edit,quiet,input,output,outfile,pat,rep)
char *source_name;	/* name of current file */
int igcase;		/* 1 == ignore case */
int edit;		/* 1 == do replacements */
int quiet;		/* 1 == dont display on screen */
int input,output;	/* in and out files */
int outfile;		/* file to output results to */
char pat[];		/* pattern string  */
char rep[];		/* replacement string */
{
unsigned s;		/* screen line buffer index */
unsigned b;		/* buffer index */
unsigned m;		/* buffer match index */
unsigned u;		/* unmatched */
unsigned p;		/* pattern index */
unsigned rlen;		/* length of replacement */
unsigned col;		/* character column */
int lineno;		/* line number */
unsigned count;		/* bytes in the buffer */
unsigned full;		/* disk full error flag */
char cp,cb,c;
char disp_name;		/* 1 == already displayed filename */

	disp_name= 0;				/* need to display filename */
	lineno= 0;
	s= 0;					/* line for the screen index */
	b= 0;					/* current buffer index */
	m= 0;					/* match buffer index */
	u= 0;					/* no match */
	p= 0;					/* reset pattern index */
	rlen= strlen(rep);			/* replacement length */
	full= 0;				/* no disk write error yet */
	count= 0;				/* buffer is empty */

	while (1) {

/* Read some text from the file. We read from the beginning of the buffer
up to m, since it is either equal to b, or less than if we have a partial
match. */

		if (! count) {
			if (u) {		/* if deferred output, */
				if (edit) full= writeout(output,m,b);
				m= b;		/* flush it first */
				if (full) break;/* stop if write error */
			}

			count= m;		/* fill up to pointer, */
			if (count == 0) 
				count= memsize;	/* or whole buffer if wrapped */
			count= read(input,membuf,count);
			if (! count) break;
		}

		cp= pat[p];				/* sample the chars */
		cb= membuf[b];				/* to test */
		if (cb == SUB) {
			if (disp_name) cprintf("File terminated by ^Z\r\n");
			break;
		}
		if (igcase) {				/* if ignoring case, */
			cp= ult[cp];
			cb= ult[cb];
		}

		if (! edit) {				/* do line stuff */
			if (cb == LF) {			/* line stuff */
				s= b + 1;		/* beg/end of lines */
				++lineno;		/* line numbers */
			}
		}

/* The m pointer is used to flag the last match or mismatch; its current use
depends on the last test, and is indicated by the index u. Outputting is
deferred until the last possible minute, since MSDOS and the 8086 are so
damn slow. */

		if (cp != cb) {				/* MISMATCH */
			if (! u) {			/* if prev. match, */
				if (edit) full= writeout(output,m,b);/* flush, */
				u= 1;			/* flag mismatch */
				p= 0;			/* reset pattern */
				m= b;			/* sync indexes */
			}
			if (++b >= memsize) b= 0;	/* advance buffer index, */

		} else {				/* MATCH */
			if (u) {			/* if prev. mismatch */
				if (edit) full= writeout(output,m,b);/* flush, */
				u= 0;			/* flag match */
				m= b;			/* sync indexes */
			}
			if (++b >= memsize) b= 0;	/* advance buffer index, */
			if (! pat[++p]) {		/* if end of pattern, */
				++occurence;		/* FULL MATCH */

/* If not editing, then output the found string to the screen. This makes
no attempt to correct for lines wrapping around the buffer boundaries; it
may display partial lines. If editing, then do the replacement and don't 
display anything. */

				if (!quiet && !disp_name) {
					printf("File: %s\r\n",source_name);
					disp_name= 1;
				}

				if (! edit) {
					if (!quiet && dolineno) printf("%4d: ",lineno);
					col= 0;		/* output to screen */
					while (1) {
						if (s >= memsize) s= 0;
						c= membuf[s++];
						if (! quiet) {
							if ((c == CR) || (c == LF)) lconout(c);
							else if ((c < ' ') || (c == 127)) printf("^%c",c + '@');
							else if (c > 127) printf("\\%d",c);
							else lconout(c);
						}
						if (outfile != -1) write(outfile,&c,1);
						if (c == LF) break;
					}
/*					if (! quiet) printf("\r\n");
*/
				} else full |= (write(output,rep,rlen) != rlen);
				m= b;			/* sync indexes */
				p= 0;			/* reset pattern index */
			}
		}
		if (full) break;			/* stop if error */
		--count;				/* count one taken */
	}

/* Terminated either because of end of input file or write error. Flush
the output buffer if appropriate, and return. */

	if (edit) full |= writeout(output,m,b);
	return(! full);
}
/* Output text in the buffer, between m and b, if any. Return error. */

writeout(output,m,b)
int output;
unsigned m,b;
{
int t;		/* total bytes to write out */
int e;

	if (m > b) {
		t= memsize - m; 	/* here to end of buffer */
		e= write(output,&membuf[m],t);
		t += b;			/* beg. of buffer to here */
		e += write(output,membuf,b);

	} else if (m < b) {
		t= b - m;
		e= write(output,&membuf[m],t);

	} else e= t= 0;

	return(e != t);			/* true if error */
}

