ftp.nice.ch/pub/next/unix/editor/ne-1.0.NI.s.tar.gz#/ne-1.0.NI.s/src/search.c

This is search.c in view mode; [Download] [Up]

/* Search/replace functions (with and without regular expressions).

   Copyright (C) 1993 Sebastiano Vigna

    This file is part of ne, the nice editor.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2, or (at your option)
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

In other words, you are welcome to use, share and improve this program.
You are forbidden to forbid anyone else to use, share and improve
what you give them.   Help stamp out software-hoarding!  */


#include "ne.h"
#include "regex.h"


/* This is the initial allocation size for regex.library. */

#define START_BUFFER_SIZE 4096


/* This array is used both by the Boyer-Moore algorithm and by the regex
library. It is updated if b->find_string_changed is TRUE (it should always
be the first time the string is searched for). */

static unsigned char d[256];

/* This macro upper cases a character or not, depending on the boolean
sense_case. It is used in find(). Note that the char returned in unsigned,
but the argument needs not to be. */

#define CONV(c)	(unsigned char)(sense_case ? c : up_case[(unsigned char)c])


/* This function performs a search for the given pattern with a simplified
Boyer-Moore algorithm starting at the given position, in the given
direction, skipping a possible match at the current cursor position if
skip_first is TRUE. If dir is 0, it is fetched from b->search_back. If
pattern is NULL, it is fetched from b->find_string. In this case,
b->find_string_changed is checked, and, if it is FALSE, the string is not
recompiled. Please check to set b->find_string_changed whenever a new string
is set in b->find_string. The cursor is moved on the occurrence position if
a match is found. */

int find(buffer *b, const char *pattern, int dir, int skip_first) {

	int sense_case = (b->case_search != 0);
	unsigned char c, first_char;
	char *p;
	long i, m;
	int recompile_string;
	long y;
	line_desc *ld;

	if (!pattern) {
		pattern = b->find_string;
		recompile_string = b->find_string_changed;
	}
	else recompile_string = TRUE;

	if (!dir) dir = b->search_back ? -1 : 1;

	if (!pattern || !(m = strlen(pattern))) return(ERROR);

	if (recompile_string) memset(d, m, sizeof(d));

	ld = b->cur_line_desc;
	y = b->cur_line;

	if (dir>0) {

		if (recompile_string) {
			for(i=0; i<m-1; i++) d[CONV(pattern[i])] = m-i-1;
			b->find_string_changed = 0;
		}

		p = ld->line + b->cur_pos + m-1 + (skip_first ? 1 : 0);
		first_char = CONV(pattern[m-1]);

		while(y<b->line_num) {

			assert(ld->ld_node.next != NULL);

			if (ld->line_len >= m) {

				while((p - ld->line) < ld->line_len) {

					if ((c = CONV(*p)) != first_char) p+=d[c];
					else {
						for (i=1; i<m; i++)
							if (CONV(*(p-i)) != CONV(pattern[m-i-1])) {
								p+=d[c];
								break;
							}
						if (i == m) {
							goto_line(b, y);
							goto_pos(b, (p - ld->line)-m+1);
							return(OK);
						}
					}
				}
			}

			ld = (line_desc *)ld->ld_node.next;
			if (ld->ld_node.next) p = ld->line+m-1;
			y++;
		}
	}
	else {

		if (recompile_string) {
			for(i=m-1; i>0; i--) d[(unsigned char)CONV(pattern[i])] = i;
			b->find_string_changed = 0;
		}

		p = ld->line + (b->cur_pos > ld->line_len-m ? ld->line_len-m : b->cur_pos + (skip_first ? -1 : 0));
		first_char = CONV(pattern[0]);

		while(y>=0) {

			assert(ld->ld_node.prev != NULL);

			if (ld->line_len >= m) {

				while((p - ld->line) >= 0) {

					if ((c = CONV(*p)) != first_char) p-=d[c];
					else {
						for (i=1; i<m; i++)
							if (CONV(*(p+i)) != CONV(pattern[i])) {
								p-=d[c];
								break;
							}
						if (i == m) {
							goto_line(b, y);
							goto_pos(b, p - ld->line);
							return(OK);
						}
					}
				}
			}

			ld = (line_desc *)ld->ld_node.prev;
			if (ld->ld_node.prev) p = ld->line+ld->line_len-m;
			y--;
		}
	}

	return(NOT_FOUND);
}



/* This function replaces n characters with the given string at the current
cursor position, and then moves it to the end of the string. */

int replace(buffer *b, int n, const char *string) {

	int len;

	if (!string) string = b->replace_string;

	if (!string) return(ERROR);

	len = strlen(string);

	start_undo_chain(b);

	delete_stream(b, b->cur_line_desc, b->cur_line, b->cur_pos, n);

	if (len) insert_stream(b, b->cur_line_desc, b->cur_line, b->cur_pos, string, len+1);

	end_undo_chain(b);

	goto_pos(b, b->cur_pos+len);

	return(OK);
}



/* The following variables are used by regex. In particular, re_reg holds
the stard/end of the extended replacement registers. */

static struct re_pattern_buffer re_pb;
static struct re_registers re_reg;


/* This function works exactly like find(), but uses regex.library instead. */

int find_regexp(buffer *b, const char *regex, int dir, int skip_first) {

	const char *p;
	int y, recompile_string;
	long i, start_pos;
	line_desc *ld;

	if (!regex) {
		regex = b->find_string;
		recompile_string = b->find_string_changed;
	}
	else recompile_string = TRUE;

	if (!dir) dir = b->search_back ? -1 : 1;

	if (!regex || !strlen(regex)) return(ERROR);

	if (re_pb.buffer == NULL) {
		if (re_pb.buffer = malloc(START_BUFFER_SIZE)) re_pb.allocated = START_BUFFER_SIZE;
		else return(OUT_OF_MEMORY);
	}

	re_pb.fastmap = (void *)d;

	/* We have to be careful: even if the search string has not changed, it
	is possible that case sensitivity has. In this case, we force recompilation. */

	if (b->case_search) {
		if (re_pb.translate != 0) recompile_string = TRUE;
		re_pb.translate = 0;
	}
	else {
		if (re_pb.translate != (char *)up_case) recompile_string = TRUE;
		re_pb.translate = (char *)up_case;
	}

	/* Here we have a very dirty hack: since we cannot return the error of regex,
	we print it here. Which means that we access term.c's functions. Ack! */

	if (recompile_string && (p = re_compile_pattern(regex, strlen(regex), &re_pb))) {
		print_message(p);
		ring_bell();
		return(ERROR);
	}

	b->find_string_changed = 0;

	ld = b->cur_line_desc;
	y = b->cur_line;

	if (dir>0) {

		start_pos = b->cur_pos+(skip_first ? 1 : 0);

		while(y<b->line_num) {

			assert(ld->ld_node.next != NULL);

			if (start_pos <= ld->line_len &&
				(i = re_search(&re_pb, ld->line, ld->line_len, start_pos, ld->line_len-start_pos, &re_reg))>=0) {
				goto_line(b, y);
				goto_pos(b, i);
				return(0);
			}

			ld = (line_desc *)ld->ld_node.next;
			start_pos = 0;
			y++;
		}
	}
	else {

		start_pos = b->cur_pos+(skip_first ? -1 : 0);

		while(y>=0) {

			assert(ld->ld_node.prev != NULL);

			if (start_pos >= 0 &&
				(i =re_search(&re_pb, ld->line, ld->line_len, start_pos, -start_pos-1, &re_reg))>=0) {
				goto_line(b, y);
				goto_pos(b, i);
				return(OK);
			}

			ld = (line_desc *)ld->ld_node.prev;
			if (ld->ld_node.prev) start_pos = ld->line_len;
			y--;
		}
	}

	return(NOT_FOUND);
}



/* This function replaces a regular expression. The given string can contain
\0, \1 etc. for the pattern matched by the i-th pair of brackets (\0 is the
whole string). */

int replace_regexp(buffer *b, const char *string) {

	int i, pos, len, c, reg_used = FALSE;
	char *p, *q, *t = NULL;

	if (!string) string = b->replace_string;

	if (!string) return(ERROR);

	if (q = p = str_dup(string)) {

		len = strlen(p);

		while(TRUE) {
			while(*q && *q != '\\') q++;

			if (!*q) break;

			i = *(q+1)-'0';

			if (*(q+1) == '\\') {
				memmove(q, q+1, strlen(q+1)+1);
			}
			else if (i>=0 && i<RE_NREGS && re_reg.start[i]>=0) {
				*q++ = 0;
				*q++ = i;
				reg_used = TRUE;
			}
			else {
				free(p);
				return(WRONG_CHAR_AFTER_BACKSLASH);
			}
		}

		if (reg_used) {
			if (t = malloc(re_reg.end[0]-re_reg.start[0]+1)) {
				memcpy(t, b->cur_line_desc->line+re_reg.start[0], re_reg.end[0]-re_reg.start[0]);
				t[re_reg.end[0]-re_reg.start[0]] = 0;
			}
			else {
				free(p);
				return(OUT_OF_MEMORY);
			}
		}

		for(i=RE_NREGS-1; i>=0; i--) {
			re_reg.end[i] -= re_reg.start[0];
			re_reg.start[i] -= re_reg.start[0];
		}

		start_undo_chain(b);

		delete_stream(b, b->cur_line_desc, b->cur_line, b->cur_pos, re_reg.end[0]);

		q = p;
		pos = 0;

		while(TRUE) {
			if (strlen(q)) {
				insert_stream(b, b->cur_line_desc, b->cur_line, b->cur_pos+pos, q, strlen(q)+1);
				pos += strlen(q);
			}

			q += strlen(q)+1;

			if (q-p > len) break;

			if (re_reg.end[*q]-re_reg.start[*q]) {

				c = t[re_reg.end[*q]];
				t[re_reg.end[*q]] = 0;

				insert_stream(b, b->cur_line_desc, b->cur_line, b->cur_pos+pos, t+re_reg.start[*q], re_reg.end[*q]-re_reg.start[*q]+1);

				t[re_reg.end[*q]] = c;

				pos += re_reg.end[*q]-re_reg.start[*q];
			}

			q++;
		}

		end_undo_chain(b);

		goto_pos(b, b->cur_pos+pos);

		free(t);
		free(p);
	}
	else return(OUT_OF_MEMORY);

	return(OK);
}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.