mirror of https://github.com/MariaDB/server
				
				
			
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							1098 lines
						
					
					
						
							29 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							1098 lines
						
					
					
						
							29 KiB
						
					
					
				| /* Copyright (C) 2000 MySQL AB | |
|  | |
|    This program is free software; you can redistribute it and/or modify | |
|    it under the terms of the GNU General Public License as published by | |
|    the Free Software Foundation; version 2 of the License. | |
|  | |
|    This program is distributed in the hope that it will be useful, | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | |
|    GNU General Public License for more details. | |
|  | |
|    You should have received a copy of the GNU General Public License | |
|    along with this program; if not, write to the Free Software | |
|    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */ | |
| 
 | |
| /* | |
|   Replace strings in textfile | |
|  | |
|   This program replaces strings in files or from stdin to stdout. | |
|   It accepts a list of from-string/to-string pairs and replaces | |
|   each occurrence of a from-string with the corresponding to-string. | |
|   The first occurrence of a found string is matched. If there is more | |
|   than one possibility for the string to replace, longer matches | |
|   are preferred before shorter matches. | |
|  | |
|   Special characters in from string: | |
|   \^    Match start of line. | |
|   \$	Match end of line. | |
|   \b	Match space-character, start of line or end of line. | |
|         For end \b the next replace starts locking at the end space-character. | |
|         An \b alone or in a string matches only a space-character. | |
|   \r, \t, \v as in C. | |
|   The programs make a DFA-state-machine of the strings and the speed isn't | |
|   dependent on the count of replace-strings (only of the number of replaces). | |
|   A line is assumed ending with \n or \0. | |
|   There are no limit exept memory on length of strings. | |
|  | |
|   Written by Monty. | |
|   fill_buffer_retaining() is taken from gnu-grep and modified. | |
| */ | |
| 
 | |
| #define DONT_USE_RAID | |
| #include <my_global.h> | |
| #include <m_ctype.h> | |
| #include <my_sys.h> | |
| #include <m_string.h> | |
| #include <errno.h> | |
|  | |
| #define PC_MALLOC		256	/* Bytes for pointers */ | |
| #define PS_MALLOC		512	/* Bytes for data */ | |
|  | |
| typedef struct st_pointer_array {		/* when using array-strings */ | |
|   TYPELIB typelib;				/* Pointer to strings */ | |
|   uchar *str;					/* Strings is here */ | |
|   int7	*flag;					/* Flag about each var. */ | |
|   uint  array_allocs,max_count,length,max_length; | |
| } POINTER_ARRAY; | |
| 
 | |
| #define SPACE_CHAR	256 | |
| #define START_OF_LINE	257 | |
| #define END_OF_LINE	258 | |
| #define LAST_CHAR_CODE	259 | |
|  | |
| typedef struct st_replace { | |
|   my_bool   found; | |
|   struct st_replace *next[256]; | |
| } REPLACE; | |
| 
 | |
| typedef struct st_replace_found { | |
|   my_bool found; | |
|   char *replace_string; | |
|   uint to_offset; | |
|   int from_offset; | |
| } REPLACE_STRING; | |
| 
 | |
| #ifndef WORD_BIT | |
| #define WORD_BIT (8*sizeof(uint)) | |
| #endif | |
|  | |
| 	/* functions defined in this file */ | |
| 
 | |
| static int static_get_options(int *argc,char * * *argv); | |
| static int get_replace_strings(int *argc,char * * *argv, | |
| 				   POINTER_ARRAY *from_array, | |
| 				   POINTER_ARRAY *to_array); | |
| static int insert_pointer_name(POINTER_ARRAY *pa, char * name); | |
| static void free_pointer_array(POINTER_ARRAY *pa); | |
| static int convert_pipe(REPLACE *,FILE *,FILE *); | |
| static int convert_file(REPLACE *, char *); | |
| static REPLACE *init_replace(char * *from, char * *to,uint count, | |
|                              char * word_end_chars); | |
| static uint replace_strings(REPLACE *rep, char * *start,uint *max_length, | |
|                             char * from); | |
| static int initialize_buffer(void); | |
| static void reset_buffer(void); | |
| static void free_buffer(void); | |
| 
 | |
| static int silent=0,verbose=0,updated=0; | |
| 
 | |
| 	/* The main program */ | |
| 
 | |
| int main(int argc, char *argv[]) | |
| { | |
|   int i,error; | |
|   char word_end_chars[256],*pos; | |
|   POINTER_ARRAY from,to; | |
|   REPLACE *replace; | |
|   MY_INIT(argv[0]); | |
| 
 | |
|   if (static_get_options(&argc,&argv)) | |
|     exit(1); | |
|   if (get_replace_strings(&argc,&argv,&from,&to)) | |
|     exit(1); | |
| 
 | |
|   for (i=1,pos=word_end_chars ; i < 256 ; i++) | |
|     if (my_isspace(&my_charset_latin1,i)) | |
|       *pos++= (char) i; | |
|   *pos=0; | |
|   if (!(replace=init_replace((char**) from.typelib.type_names, | |
| 			     (char**) to.typelib.type_names, | |
| 			     (uint) from.typelib.count,word_end_chars))) | |
|     exit(1); | |
|   free_pointer_array(&from); | |
|   free_pointer_array(&to); | |
|   if (initialize_buffer()) | |
|     return 1; | |
| 
 | |
|   error=0; | |
|   if (argc == 0) | |
|     error=convert_pipe(replace,stdin,stdout); | |
|   else | |
|   { | |
|     while (argc--) | |
|     { | |
|       error=convert_file(replace,*(argv++)); | |
|     } | |
|   } | |
|   free_buffer(); | |
|   my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); | |
|   exit(error ? 2 : 0); | |
|   return 0;					/* No compiler warning */ | |
| } /* main */ | |
| 
 | |
| 
 | |
| 	/* reads options */ | |
| 	/* Initiates DEBUG - but no debugging here ! */ | |
| 
 | |
| static int static_get_options(argc,argv) | |
| register int *argc; | |
| register char **argv[]; | |
| { | |
|   int help,version; | |
|   char *pos; | |
| 
 | |
|   silent=verbose=help=0; | |
| 
 | |
|   while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') { | |
|     while (*++pos) | |
|     { | |
|       version=0; | |
|       switch((*pos)) { | |
|       case 's': | |
| 	silent=1; | |
| 	break; | |
|       case 'v': | |
| 	verbose=1; | |
| 	break; | |
|       case '#': | |
| 	DBUG_PUSH (++pos); | |
| 	pos= (char*) " ";			/* Skip rest of arguments */ | |
| 	break; | |
|       case 'V': | |
| 	version=1; | |
|       case 'I': | |
|       case '?': | |
| 	help=1;					/* Help text written */ | |
| 	printf("%s  Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE, | |
| 	       MACHINE_TYPE); | |
| 	if (version) | |
| 	  break; | |
| 	puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n"); | |
| 	puts("This program replaces strings in files or from stdin to stdout.\n" | |
| 	     "It accepts a list of from-string/to-string pairs and replaces\n" | |
| 	     "each occurrence of a from-string with the corresponding to-string.\n" | |
|          "The first occurrence of a found string is matched. If there is\n" | |
|          "more than one possibility for the string to replace, longer\n" | |
|          "matches are preferred before shorter matches.\n\n" | |
| 	     "A from-string can contain these special characters:\n" | |
| 	     "  \\^      Match start of line.\n" | |
| 	     "  \\$      Match end of line.\n" | |
| 	     "  \\b      Match space-character, start of line or end of line.\n" | |
| 	     "          For a end \\b the next replace starts locking at the end\n" | |
| 	     "          space-character. A \\b alone in a string matches only a\n" | |
| 	     "          space-character.\n"); | |
| 	  printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname); | |
| 	puts("or"); | |
| 	  printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname); | |
| 	puts(""); | |
| 	puts("Options: -? or -I \"Info\"  -s \"silent\"      -v \"verbose\""); | |
| 	break; | |
|       default: | |
| 	fprintf(stderr,"illegal option: -%c\n",*pos); | |
| 	break; | |
|       } | |
|     } | |
|   } | |
|   if (*argc == 0) | |
|   { | |
|     if (!help) | |
|       my_message(0,"No replace options given",MYF(ME_BELL)); | |
|     exit(0);					/* Don't use as pipe */ | |
|   } | |
|   return(0); | |
| } /* static_get_options */ | |
| 
 | |
| 
 | |
| static int get_replace_strings(argc,argv,from_array,to_array) | |
| register int *argc; | |
| register char **argv[]; | |
| POINTER_ARRAY *from_array,*to_array; | |
| { | |
|   char *pos; | |
| 
 | |
|   bzero((char*) from_array,sizeof(from_array[0])); | |
|   bzero((char*) to_array,sizeof(to_array[0])); | |
|   while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2])) | |
|   { | |
|     insert_pointer_name(from_array,pos); | |
|     (*argc)--; | |
|     (*argv)++; | |
|     if (!*argc || !strcmp(**argv,"--")) | |
|     { | |
|       my_message(0,"No to-string for last from-string",MYF(ME_BELL)); | |
|       return 1; | |
|     } | |
|     insert_pointer_name(to_array,**argv); | |
|     (*argc)--; | |
|     (*argv)++; | |
|   } | |
|   if (*argc) | |
|   {					/* Skip "--" argument */ | |
|     (*argc)--; | |
|     (*argv)++; | |
|   } | |
|   return 0; | |
| } | |
| 
 | |
| static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name) | |
| { | |
|   uint i,length,old_count; | |
|   uchar *new_pos; | |
|   const char **new_array; | |
|   DBUG_ENTER("insert_pointer_name"); | |
| 
 | |
|   if (! pa->typelib.count) | |
|   { | |
|     if (!(pa->typelib.type_names=(const char **) | |
| 	  my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/ | |
| 		     (sizeof(char *)+sizeof(*pa->flag))* | |
| 		     (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME)))) | |
|       DBUG_RETURN(-1); | |
|     if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD), | |
| 				     MYF(MY_WME)))) | |
|     { | |
|       my_free((uchar*) pa->typelib.type_names,MYF(0)); | |
|       DBUG_RETURN (-1); | |
|     } | |
|     pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+ | |
| 					       sizeof(*pa->flag)); | |
|     pa->flag= (int7*) (pa->typelib.type_names+pa->max_count); | |
|     pa->length=0; | |
|     pa->max_length=PS_MALLOC-MALLOC_OVERHEAD; | |
|     pa->array_allocs=1; | |
|   } | |
|   length=(uint) strlen(name)+1; | |
|   if (pa->length+length >= pa->max_length) | |
|   { | |
|     pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC; | |
|     pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD; | |
|     if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str, | |
| 				      (uint) pa->max_length, | |
| 				      MYF(MY_WME)))) | |
|       DBUG_RETURN(1); | |
|     if (new_pos != pa->str) | |
|     { | |
|       my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str); | |
|       for (i=0 ; i < pa->typelib.count ; i++) | |
| 	pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff, | |
| 					      char*); | |
|       pa->str=new_pos; | |
|     } | |
|   } | |
|   if (pa->typelib.count >= pa->max_count-1) | |
|   { | |
|     int len; | |
|     pa->array_allocs++; | |
|     len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD); | |
|     if (!(new_array=(const char **) my_realloc((uchar*) pa->typelib.type_names, | |
| 					       (uint) len/ | |
| 					 (sizeof(uchar*)+sizeof(*pa->flag))* | |
| 					 (sizeof(uchar*)+sizeof(*pa->flag)), | |
| 					 MYF(MY_WME)))) | |
|       DBUG_RETURN(1); | |
|     pa->typelib.type_names=new_array; | |
|     old_count=pa->max_count; | |
|     pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag)); | |
|     pa->flag= (int7*) (pa->typelib.type_names+pa->max_count); | |
|     memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count), | |
| 	   old_count*sizeof(*pa->flag)); | |
|   } | |
|   pa->flag[pa->typelib.count]=0;			/* Reset flag */ | |
|   pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length); | |
|   pa->typelib.type_names[pa->typelib.count]= NullS;	/* Put end-mark */ | |
|   (void) strmov((char*) pa->str + pa->length, name); | |
|   pa->length+=length; | |
|   DBUG_RETURN(0); | |
| } /* insert_pointer_name */ | |
| 
 | |
| 
 | |
| 	/* free pointer array */ | |
| 
 | |
| static void free_pointer_array(reg1 POINTER_ARRAY *pa) | |
| { | |
|   if (pa->typelib.count) | |
|   { | |
|     pa->typelib.count=0; | |
|     my_free((uchar*) pa->typelib.type_names,MYF(0)); | |
|     pa->typelib.type_names=0; | |
|     my_free((uchar*) pa->str,MYF(0)); | |
|   } | |
|   return; | |
| } /* free_pointer_array */ | |
| 
 | |
| 
 | |
| 	/* Code for replace rutines */ | |
| 
 | |
| #define SET_MALLOC_HUNC 64 | |
|  | |
| typedef struct st_rep_set { | |
|   uint  *bits;				/* Pointer to used sets */ | |
|   short	next[LAST_CHAR_CODE];		/* Pointer to next sets */ | |
|   uint	found_len;			/* Best match to date */ | |
|   int	found_offset; | |
|   uint  table_offset; | |
|   uint  size_of_bits;			/* For convinience */ | |
| } REP_SET; | |
| 
 | |
| typedef struct st_rep_sets { | |
|   uint		count;			/* Number of sets */ | |
|   uint		extra;			/* Extra sets in buffer */ | |
|   uint		invisible;		/* Sets not chown */ | |
|   uint		size_of_bits; | |
|   REP_SET	*set,*set_buffer; | |
|   uint		*bit_buffer; | |
| } REP_SETS; | |
| 
 | |
| typedef struct st_found_set { | |
|   uint table_offset; | |
|   int found_offset; | |
| } FOUND_SET; | |
| 
 | |
| typedef struct st_follow { | |
|   int chr; | |
|   uint table_offset; | |
|   uint len; | |
| } FOLLOWS; | |
| 
 | |
| 
 | |
| static int init_sets(REP_SETS *sets,uint states); | |
| static REP_SET *make_new_set(REP_SETS *sets); | |
| static void make_sets_invisible(REP_SETS *sets); | |
| static void free_last_set(REP_SETS *sets); | |
| static void free_sets(REP_SETS *sets); | |
| static void internal_set_bit(REP_SET *set, uint bit); | |
| static void internal_clear_bit(REP_SET *set, uint bit); | |
| static void or_bits(REP_SET *to,REP_SET *from); | |
| static void copy_bits(REP_SET *to,REP_SET *from); | |
| static int cmp_bits(REP_SET *set1,REP_SET *set2); | |
| static int get_next_bit(REP_SET *set,uint lastpos); | |
| static short find_set(REP_SETS *sets,REP_SET *find); | |
| static short find_found(FOUND_SET *found_set,uint table_offset, | |
|                         int found_offset); | |
| static uint start_at_word(char * pos); | |
| static uint end_of_word(char * pos); | |
| static uint replace_len(char * pos); | |
| 
 | |
| static uint found_sets=0; | |
| 
 | |
| 
 | |
| 	/* Init a replace structure for further calls */ | |
| 
 | |
| static REPLACE *init_replace(char * *from, char * *to,uint count, | |
|                              char * word_end_chars) | |
| { | |
|   uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr; | |
|   int used_sets,chr; | |
|   short default_state; | |
|   char used_chars[LAST_CHAR_CODE],is_word_end[256]; | |
|   char * pos, *to_pos, **to_array; | |
|   REP_SETS sets; | |
|   REP_SET *set,*start_states,*word_states,*new_set; | |
|   FOLLOWS *follow,*follow_ptr; | |
|   REPLACE *replace; | |
|   FOUND_SET *found_set; | |
|   REPLACE_STRING *rep_str; | |
|   DBUG_ENTER("init_replace"); | |
| 
 | |
|   /* Count number of states */ | |
|   for (i=result_len=max_length=0 , states=2 ; i < count ; i++) | |
|   { | |
|     len=replace_len(from[i]); | |
|     if (!len) | |
|     { | |
|       errno=EINVAL; | |
|       my_message(0,"No to-string for last from-string",MYF(ME_BELL)); | |
|       DBUG_RETURN(0); | |
|     } | |
|     states+=len+1; | |
|     result_len+=(uint) strlen(to[i])+1; | |
|     if (len > max_length) | |
|       max_length=len; | |
|   } | |
|   bzero((char*) is_word_end,sizeof(is_word_end)); | |
|   for (i=0 ; word_end_chars[i] ; i++) | |
|     is_word_end[(uchar) word_end_chars[i]]=1; | |
| 
 | |
|   if (init_sets(&sets,states)) | |
|     DBUG_RETURN(0); | |
|   found_sets=0; | |
|   if (!(found_set= (FOUND_SET*) my_malloc(sizeof(FOUND_SET)*max_length*count, | |
| 					  MYF(MY_WME)))) | |
|   { | |
|     free_sets(&sets); | |
|     DBUG_RETURN(0); | |
|   } | |
|   (void) make_new_set(&sets);			/* Set starting set */ | |
|   make_sets_invisible(&sets);			/* Hide previus sets */ | |
|   used_sets=-1; | |
|   word_states=make_new_set(&sets);		/* Start of new word */ | |
|   start_states=make_new_set(&sets);		/* This is first state */ | |
|   if (!(follow=(FOLLOWS*) my_malloc((states+2)*sizeof(FOLLOWS),MYF(MY_WME)))) | |
|   { | |
|     free_sets(&sets); | |
|     my_free((uchar*) found_set,MYF(0)); | |
|     DBUG_RETURN(0); | |
|   } | |
| 
 | |
| 	/* Init follow_ptr[] */ | |
|   for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++) | |
|   { | |
|     if (from[i][0] == '\\' && from[i][1] == '^') | |
|     { | |
|       internal_set_bit(start_states,states+1); | |
|       if (!from[i][2]) | |
|       { | |
| 	start_states->table_offset=i; | |
| 	start_states->found_offset=1; | |
|       } | |
|     } | |
|     else if (from[i][0] == '\\' && from[i][1] == '$') | |
|     { | |
|       internal_set_bit(start_states,states); | |
|       internal_set_bit(word_states,states); | |
|       if (!from[i][2] && start_states->table_offset == (uint) ~0) | |
|       { | |
| 	start_states->table_offset=i; | |
| 	start_states->found_offset=0; | |
|       } | |
|     } | |
|     else | |
|     { | |
|       internal_set_bit(word_states,states); | |
|       if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2])) | |
| 	internal_set_bit(start_states,states+1); | |
|       else | |
| 	internal_set_bit(start_states,states); | |
|     } | |
|     for (pos=from[i], len=0; *pos ; pos++) | |
|     { | |
|       if (*pos == '\\' && *(pos+1)) | |
|       { | |
| 	pos++; | |
| 	switch (*pos) { | |
| 	case 'b': | |
| 	  follow_ptr->chr = SPACE_CHAR; | |
| 	  break; | |
| 	case '^': | |
| 	  follow_ptr->chr = START_OF_LINE; | |
| 	  break; | |
| 	case '$': | |
| 	  follow_ptr->chr = END_OF_LINE; | |
| 	  break; | |
| 	case 'r': | |
| 	  follow_ptr->chr = '\r'; | |
| 	  break; | |
| 	case 't': | |
| 	  follow_ptr->chr = '\t'; | |
| 	  break; | |
| 	case 'v': | |
| 	  follow_ptr->chr = '\v'; | |
| 	  break; | |
| 	default: | |
| 	  follow_ptr->chr = (uchar) *pos; | |
| 	  break; | |
| 	} | |
|       } | |
|       else | |
| 	follow_ptr->chr= (uchar) *pos; | |
|       follow_ptr->table_offset=i; | |
|       follow_ptr->len= ++len; | |
|       follow_ptr++; | |
|     } | |
|     follow_ptr->chr=0; | |
|     follow_ptr->table_offset=i; | |
|     follow_ptr->len=len; | |
|     follow_ptr++; | |
|     states+=(uint) len+1; | |
|   } | |
| 
 | |
| 
 | |
|   for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++) | |
|   { | |
|     set=sets.set+set_nr; | |
|     default_state= 0;				/* Start from beginning */ | |
| 
 | |
|     /* If end of found-string not found or start-set with current set */ | |
| 
 | |
|     for (i= (uint) ~0; (i=get_next_bit(set,i)) ;) | |
|     { | |
|       if (!follow[i].chr) | |
|       { | |
| 	if (! default_state) | |
| 	  default_state= find_found(found_set,set->table_offset, | |
| 				    set->found_offset+1); | |
|       } | |
|     } | |
|     copy_bits(sets.set+used_sets,set);		/* Save set for changes */ | |
|     if (!default_state) | |
|       or_bits(sets.set+used_sets,sets.set);	/* Can restart from start */ | |
| 
 | |
|     /* Find all chars that follows current sets */ | |
|     bzero((char*) used_chars,sizeof(used_chars)); | |
|     for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;) | |
|     { | |
|       used_chars[follow[i].chr]=1; | |
|       if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr && | |
| 	   follow[i].len > 1) || follow[i].chr == END_OF_LINE) | |
| 	used_chars[0]=1; | |
|     } | |
| 
 | |
|     /* Mark word_chars used if \b is in state */ | |
|     if (used_chars[SPACE_CHAR]) | |
|       for (pos= word_end_chars ; *pos ; pos++) | |
| 	used_chars[(int) (uchar) *pos] = 1; | |
| 
 | |
|     /* Handle other used characters */ | |
|     for (chr= 0 ; chr < 256 ; chr++) | |
|     { | |
|       if (! used_chars[chr]) | |
| 	set->next[chr]= (short) (chr ? default_state : -1); | |
|       else | |
|       { | |
| 	new_set=make_new_set(&sets); | |
| 	set=sets.set+set_nr;			/* if realloc */ | |
| 	new_set->table_offset=set->table_offset; | |
| 	new_set->found_len=set->found_len; | |
| 	new_set->found_offset=set->found_offset+1; | |
| 	found_end=0; | |
| 
 | |
| 	for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; ) | |
| 	{ | |
| 	  if (!follow[i].chr || follow[i].chr == chr || | |
| 	      (follow[i].chr == SPACE_CHAR && | |
| 	       (is_word_end[chr] || | |
| 		(!chr && follow[i].len > 1 && ! follow[i+1].chr))) || | |
| 	      (follow[i].chr == END_OF_LINE && ! chr)) | |
| 	  { | |
| 	    if ((! chr || (follow[i].chr && !follow[i+1].chr)) && | |
| 		follow[i].len > found_end) | |
| 	      found_end=follow[i].len; | |
| 	    if (chr && follow[i].chr) | |
| 	      internal_set_bit(new_set,i+1);		/* To next set */ | |
| 	    else | |
| 	      internal_set_bit(new_set,i); | |
| 	  } | |
| 	} | |
| 	if (found_end) | |
| 	{ | |
| 	  new_set->found_len=0;			/* Set for testing if first */ | |
| 	  bits_set=0; | |
| 	  for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;) | |
| 	  { | |
| 	    if ((follow[i].chr == SPACE_CHAR || | |
| 		 follow[i].chr == END_OF_LINE) && ! chr) | |
| 	      bit_nr=i+1; | |
| 	    else | |
| 	      bit_nr=i; | |
| 	    if (follow[bit_nr-1].len < found_end || | |
| 		(new_set->found_len && | |
| 		 (chr == 0 || !follow[bit_nr].chr))) | |
| 	      internal_clear_bit(new_set,i); | |
| 	    else | |
| 	    { | |
| 	      if (chr == 0 || !follow[bit_nr].chr) | |
| 	      {					/* best match  */ | |
| 		new_set->table_offset=follow[bit_nr].table_offset; | |
| 		if (chr || (follow[i].chr == SPACE_CHAR || | |
| 			    follow[i].chr == END_OF_LINE)) | |
| 		  new_set->found_offset=found_end;	/* New match */ | |
| 		new_set->found_len=found_end; | |
| 	      } | |
| 	      bits_set++; | |
| 	    } | |
| 	  } | |
| 	  if (bits_set == 1) | |
| 	  { | |
| 	    set->next[chr] = find_found(found_set, | |
| 					new_set->table_offset, | |
| 					new_set->found_offset); | |
| 	    free_last_set(&sets); | |
| 	  } | |
| 	  else | |
| 	    set->next[chr] = find_set(&sets,new_set); | |
| 	} | |
| 	else | |
| 	  set->next[chr] = find_set(&sets,new_set); | |
|       } | |
|     } | |
|   } | |
| 
 | |
| 	/* Alloc replace structure for the replace-state-machine */ | |
| 
 | |
|   if ((replace=(REPLACE*) my_malloc(sizeof(REPLACE)*(sets.count)+ | |
| 				    sizeof(REPLACE_STRING)*(found_sets+1)+ | |
| 				    sizeof(char *)*count+result_len, | |
| 				    MYF(MY_WME | MY_ZEROFILL)))) | |
|   { | |
|     rep_str=(REPLACE_STRING*) (replace+sets.count); | |
|     to_array=(char **) (rep_str+found_sets+1); | |
|     to_pos=(char *) (to_array+count); | |
|     for (i=0 ; i < count ; i++) | |
|     { | |
|       to_array[i]=to_pos; | |
|       to_pos=strmov(to_pos,to[i])+1; | |
|     } | |
|     rep_str[0].found=1; | |
|     rep_str[0].replace_string=0; | |
|     for (i=1 ; i <= found_sets ; i++) | |
|     { | |
|       pos=from[found_set[i-1].table_offset]; | |
|       rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1); | |
|       rep_str[i].replace_string=to_array[found_set[i-1].table_offset]; | |
|       rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos); | |
|       rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+ | |
| 	end_of_word(pos); | |
|     } | |
|     for (i=0 ; i < sets.count ; i++) | |
|     { | |
|       for (j=0 ; j < 256 ; j++) | |
| 	if (sets.set[i].next[j] >= 0) | |
| 	  replace[i].next[j]=replace+sets.set[i].next[j]; | |
| 	else | |
| 	  replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1)); | |
|     } | |
|   } | |
|   my_free((uchar*) follow,MYF(0)); | |
|   free_sets(&sets); | |
|   my_free((uchar*) found_set,MYF(0)); | |
|   DBUG_PRINT("exit",("Replace table has %d states",sets.count)); | |
|   DBUG_RETURN(replace); | |
| } | |
| 
 | |
| 
 | |
| static int init_sets(REP_SETS *sets,uint states) | |
| { | |
|   bzero((char*) sets,sizeof(*sets)); | |
|   sets->size_of_bits=((states+7)/8); | |
|   if (!(sets->set_buffer=(REP_SET*) my_malloc(sizeof(REP_SET)*SET_MALLOC_HUNC, | |
| 					      MYF(MY_WME)))) | |
|     return 1; | |
|   if (!(sets->bit_buffer=(uint*) my_malloc(sizeof(uint)*sets->size_of_bits* | |
| 					   SET_MALLOC_HUNC,MYF(MY_WME)))) | |
|   { | |
|     my_free((uchar*) sets->set,MYF(0)); | |
|     return 1; | |
|   } | |
|   return 0; | |
| } | |
| 
 | |
| 	/* Make help sets invisible for nicer codeing */ | |
| 
 | |
| static void make_sets_invisible(REP_SETS *sets) | |
| { | |
|   sets->invisible=sets->count; | |
|   sets->set+=sets->count; | |
|   sets->count=0; | |
| } | |
| 
 | |
| static REP_SET *make_new_set(REP_SETS *sets) | |
| { | |
|   uint i,count,*bit_buffer; | |
|   REP_SET *set; | |
|   if (sets->extra) | |
|   { | |
|     sets->extra--; | |
|     set=sets->set+ sets->count++; | |
|     bzero((char*) set->bits,sizeof(uint)*sets->size_of_bits); | |
|     bzero((char*) &set->next[0],sizeof(set->next[0])*LAST_CHAR_CODE); | |
|     set->found_offset=0; | |
|     set->found_len=0; | |
|     set->table_offset= (uint) ~0; | |
|     set->size_of_bits=sets->size_of_bits; | |
|     return set; | |
|   } | |
|   count=sets->count+sets->invisible+SET_MALLOC_HUNC; | |
|   if (!(set=(REP_SET*) my_realloc((uchar*) sets->set_buffer, | |
| 				   sizeof(REP_SET)*count, | |
| 				  MYF(MY_WME)))) | |
|     return 0; | |
|   sets->set_buffer=set; | |
|   sets->set=set+sets->invisible; | |
|   if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer, | |
| 				      (sizeof(uint)*sets->size_of_bits)*count, | |
| 				      MYF(MY_WME)))) | |
|     return 0; | |
|   sets->bit_buffer=bit_buffer; | |
|   for (i=0 ; i < count ; i++) | |
|   { | |
|     sets->set_buffer[i].bits=bit_buffer; | |
|     bit_buffer+=sets->size_of_bits; | |
|   } | |
|   sets->extra=SET_MALLOC_HUNC; | |
|   return make_new_set(sets); | |
| } | |
| 
 | |
| static void free_last_set(REP_SETS *sets) | |
| { | |
|   sets->count--; | |
|   sets->extra++; | |
|   return; | |
| } | |
| 
 | |
| static void free_sets(REP_SETS *sets) | |
| { | |
|   my_free((uchar*)sets->set_buffer,MYF(0)); | |
|   my_free((uchar*)sets->bit_buffer,MYF(0)); | |
|   return; | |
| } | |
| 
 | |
| static void internal_set_bit(REP_SET *set, uint bit) | |
| { | |
|   set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT); | |
|   return; | |
| } | |
| 
 | |
| static void internal_clear_bit(REP_SET *set, uint bit) | |
| { | |
|   set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT)); | |
|   return; | |
| } | |
| 
 | |
| 
 | |
| static void or_bits(REP_SET *to,REP_SET *from) | |
| { | |
|   reg1 uint i; | |
|   for (i=0 ; i < to->size_of_bits ; i++) | |
|     to->bits[i]|=from->bits[i]; | |
|   return; | |
| } | |
| 
 | |
| static void copy_bits(REP_SET *to,REP_SET *from) | |
| { | |
|   memcpy((uchar*) to->bits,(uchar*) from->bits, | |
| 	 (size_t) (sizeof(uint) * to->size_of_bits)); | |
| } | |
| 
 | |
| static int cmp_bits(REP_SET *set1,REP_SET *set2) | |
| { | |
|   return memcmp(set1->bits, set2->bits, | |
|                 sizeof(uint) * set1->size_of_bits); | |
| } | |
| 
 | |
| 
 | |
| 	/* Get next set bit from set. */ | |
| 
 | |
| static int get_next_bit(REP_SET *set,uint lastpos) | |
| { | |
|   uint pos,*start,*end,bits; | |
| 
 | |
|   start=set->bits+ ((lastpos+1) / WORD_BIT); | |
|   end=set->bits + set->size_of_bits; | |
|   bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1); | |
| 
 | |
|   while (! bits && ++start < end) | |
|     bits=start[0]; | |
|   if (!bits) | |
|     return 0; | |
|   pos=(uint) (start-set->bits)*WORD_BIT; | |
|   while (! (bits & 1)) | |
|   { | |
|     bits>>=1; | |
|     pos++; | |
|   } | |
|   return pos; | |
| } | |
| 
 | |
| 	/* find if there is a same set in sets. If there is, use it and | |
| 	   free given set, else put in given set in sets and return it's | |
| 	   position */ | |
| 
 | |
| static short find_set(REP_SETS *sets,REP_SET *find) | |
| { | |
|   uint i; | |
|   for (i=0 ; i < sets->count-1 ; i++) | |
|   { | |
|     if (!cmp_bits(sets->set+i,find)) | |
|     { | |
|       free_last_set(sets); | |
|       return (short) i; | |
|     } | |
|   } | |
|   return (short) i;			/* return new postion */ | |
| } | |
| 
 | |
| 
 | |
| /* | |
|   find if there is a found_set with same table_offset & found_offset | |
|   If there is return offset to it, else add new offset and return pos. | |
|   Pos returned is -offset-2 in found_set_structure because it's is | |
|   saved in set->next and set->next[] >= 0 points to next set and | |
|   set->next[] == -1 is reserved for end without replaces. | |
| */ | |
| 
 | |
| static short find_found(FOUND_SET *found_set,uint table_offset, | |
|                         int found_offset) | |
| { | |
|   int i; | |
|   for (i=0 ; (uint) i < found_sets ; i++) | |
|     if (found_set[i].table_offset == table_offset && | |
| 	found_set[i].found_offset == found_offset) | |
|       return (short) (-i-2); | |
|   found_set[i].table_offset=table_offset; | |
|   found_set[i].found_offset=found_offset; | |
|   found_sets++; | |
|   return (short) (-i-2);			/* return new postion */ | |
| } | |
| 
 | |
| 	/* Return 1 if regexp starts with \b or ends with \b*/ | |
| 
 | |
| static uint start_at_word(char * pos) | |
| { | |
|   return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0); | |
| } | |
| 
 | |
| static uint end_of_word(char * pos) | |
| { | |
|   char * end=strend(pos); | |
|   return ((end > pos+2 && !memcmp(end-2,"\\b",2)) || | |
| 	  (end >= pos+2 && !memcmp(end-2,"\\$",2))) ? | |
| 	    1 : 0; | |
| } | |
| 
 | |
| 
 | |
| static uint replace_len(char * str) | |
| { | |
|   uint len=0; | |
|   while (*str) | |
|   { | |
|     if (str[0] == '\\' && str[1]) | |
|       str++; | |
|     str++; | |
|     len++; | |
|   } | |
|   return len; | |
| } | |
| 
 | |
| 
 | |
| 	/* The actual loop */ | |
| 
 | |
| static uint replace_strings(REPLACE *rep, char **start, uint *max_length, | |
|                             char *from) | |
| { | |
|   reg1 REPLACE *rep_pos; | |
|   reg2 REPLACE_STRING *rep_str; | |
|   char *to, *end, *pos, *new; | |
| 
 | |
|   end=(to= *start) + *max_length-1; | |
|   rep_pos=rep+1; | |
|   for(;;) | |
|   { | |
|     while (!rep_pos->found) | |
|     { | |
|       rep_pos= rep_pos->next[(uchar) *from]; | |
|       if (to == end) | |
|       { | |
| 	(*max_length)+=8192; | |
| 	if (!(new=my_realloc(*start,*max_length,MYF(MY_WME)))) | |
| 	  return (uint) -1; | |
| 	to=new+(to - *start); | |
| 	end=(*start=new)+ *max_length-1; | |
|       } | |
|       *to++= *from++; | |
|     } | |
|     if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string) | |
|       return (uint) (to - *start)-1; | |
|     updated=1;			/* Some char * is replaced */ | |
|     to-=rep_str->to_offset; | |
|     for (pos=rep_str->replace_string; *pos ; pos++) | |
|     { | |
|       if (to == end) | |
|       { | |
| 	(*max_length)*=2; | |
| 	if (!(new=my_realloc(*start,*max_length,MYF(MY_WME)))) | |
| 	  return (uint) -1; | |
| 	to=new+(to - *start); | |
| 	end=(*start=new)+ *max_length-1; | |
|       } | |
|       *to++= *pos; | |
|     } | |
|     if (!*(from-=rep_str->from_offset) && rep_pos->found != 2) | |
|       return (uint) (to - *start); | |
|     rep_pos=rep; | |
|   } | |
| } | |
| 
 | |
| static char *buffer;		/* The buffer itself, grown as needed. */ | |
| static int bufbytes;		/* Number of bytes in the buffer. */ | |
| static int bufread,my_eof;		/* Number of bytes to get with each read(). */ | |
| static uint bufalloc; | |
| static char *out_buff; | |
| static uint out_length; | |
| 
 | |
| static int initialize_buffer() | |
| { | |
|   bufread = 8192; | |
|   bufalloc = bufread + bufread / 2; | |
|   if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME)))) | |
|     return 1; | |
|   bufbytes=my_eof=0; | |
|   out_length=bufread; | |
|   if (!(out_buff=my_malloc(out_length,MYF(MY_WME)))) | |
|     return(1); | |
|   return 0; | |
| } | |
| 
 | |
| static void reset_buffer() | |
| { | |
|   bufbytes=my_eof=0; | |
| } | |
| 
 | |
| static void free_buffer() | |
| { | |
|   my_free(buffer,MYF(MY_WME)); | |
|   my_free(out_buff,MYF(MY_WME)); | |
| } | |
| 
 | |
| 
 | |
| /* | |
|   Fill the buffer retaining the last n bytes at the beginning of the | |
|   newly filled buffer (for backward context).  Returns the number of new | |
|   bytes read from disk. | |
| */ | |
| 
 | |
| static int fill_buffer_retaining(fd,n) | |
| File fd; | |
| int n; | |
| { | |
|   int i; | |
| 
 | |
|   /* See if we need to grow the buffer. */ | |
|   if ((int) bufalloc - n <= bufread) | |
|   { | |
|     while ((int) bufalloc - n <= bufread) | |
|     { | |
|       bufalloc *= 2; | |
|       bufread *= 2; | |
|     } | |
|     buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME)); | |
|     if (! buffer) | |
|       return(-1); | |
|   } | |
| 
 | |
|   /* Shift stuff down. */ | |
|   bmove(buffer,buffer+bufbytes-n,(uint) n); | |
|   bufbytes = n; | |
| 
 | |
|   if (my_eof) | |
|     return 0; | |
| 
 | |
|   /* Read in new stuff. */ | |
|   if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes, | |
|                        (size_t) bufread, MYF(MY_WME))) < 0) | |
|     return -1; | |
| 
 | |
|   /* Kludge to pretend every nonempty file ends with a newline. */ | |
|   if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n') | |
|   { | |
|     my_eof = i = 1; | |
|     buffer[bufbytes] = '\n'; | |
|   } | |
| 
 | |
|   bufbytes += i; | |
|   return i; | |
| } | |
| 
 | |
| 	/* Return 0 if convert is ok */ | |
| 	/* Global variable update is set if something was changed */ | |
| 
 | |
| static int convert_pipe(rep,in,out) | |
| REPLACE *rep; | |
| FILE *in,*out; | |
| { | |
|   int retain,error; | |
|   uint length; | |
|   char save_char,*end_of_line,*start_of_line; | |
|   DBUG_ENTER("convert_pipe"); | |
| 
 | |
|   updated=retain=0; | |
|   reset_buffer(); | |
| 
 | |
|   while ((error=fill_buffer_retaining(fileno(in),retain)) > 0) | |
|   { | |
|     end_of_line=buffer ; | |
|     buffer[bufbytes]=0;			/* Sentinel  */ | |
|     for (;;) | |
|     { | |
|       start_of_line=end_of_line; | |
|       while (end_of_line[0] != '\n' && end_of_line[0]) | |
| 	end_of_line++; | |
|       if (end_of_line == buffer+bufbytes) | |
|       { | |
| 	retain= (int) (end_of_line - start_of_line); | |
| 	break;				/* No end of line, read more */ | |
|       } | |
|       save_char=end_of_line[0]; | |
|       end_of_line[0]=0; | |
|       end_of_line++; | |
|       if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) == | |
| 	  (uint) -1) | |
| 	return 1; | |
|       if (!my_eof) | |
| 	out_buff[length++]=save_char;	/* Don't write added newline */ | |
|       if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP))) | |
| 	DBUG_RETURN(1); | |
|     } | |
|   } | |
|   DBUG_RETURN(error); | |
| } | |
| 
 | |
| 
 | |
| static int convert_file(REPLACE *rep, char * name) | |
| { | |
|   int error; | |
|   FILE *in,*out; | |
|   char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name; | |
| #ifdef HAVE_READLINK | |
|   char link_name[FN_REFLEN]; | |
| #endif | |
|   File temp_file; | |
|   size_t dir_buff_length; | |
|   DBUG_ENTER("convert_file"); | |
| 
 | |
|   /* check if name is a symlink */ | |
| #ifdef HAVE_READLINK   | |
|   org_name= (!my_disable_symlinks &&  | |
|              !my_readlink(link_name, name, MYF(0))) ? link_name : name; | |
| #endif | |
|   if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME)))) | |
|     DBUG_RETURN(1); | |
|   dirname_part(dir_buff, org_name, &dir_buff_length); | |
|   if ((temp_file= create_temp_file(tempname, dir_buff, "PR", O_WRONLY, | |
|                                    MYF(MY_WME))) < 0) | |
|   { | |
|     my_fclose(in,MYF(0)); | |
|     DBUG_RETURN(1); | |
|   }     | |
|   if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME)))) | |
|   { | |
|     my_fclose(in,MYF(0)); | |
|     DBUG_RETURN(1); | |
|   } | |
| 
 | |
|   error=convert_pipe(rep,in,out); | |
|   my_fclose(in,MYF(0)); my_fclose(out,MYF(0)); | |
| 
 | |
|   if (updated && ! error) | |
|     my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING)); | |
|   else | |
|     my_delete(tempname,MYF(MY_WME)); | |
|   if (!silent && ! error) | |
|   { | |
|     if (updated) | |
|       printf("%s converted\n",name); | |
|     else if (verbose) | |
|       printf("%s left unchanged\n",name); | |
|   } | |
|   DBUG_RETURN(error); | |
| }
 |