C: implementing str_replace to replace all occurrences of substring
Last time, I showed how to replace PHP's str_replace in C.
//z 2013-10-19 18:04:32 IS2120@BG57IV3 T2080877905.K.F2560461818[T2,L61,R2,V7]
The previous code was only replacing one occurrence of substr which might be sufficient in most cases... but will not do the job when the pattern appears more than once within the original string.
This new piece of code will replace ALL occurrences of substring by the replacement pattern.
The following bit of code might miss some optimization, for instance we could first check how many times the pattern is found and then do only one big allocation and enter another loop to replace all patterns, but for now, this is what I came with.
//z 2013-10-19 18:04:32 IS2120@BG57IV3 T2080877905.K.F2560461818[T2,L61,R2,V7]
Maybe at a later stage I will come with another version a bit more optimized.
- /**
- * vim: tabstop=2:shiftwidth=2:softtabstop=2:expandtab
- *
- * str_replace.c implements a str_replace PHP like function
-
* Copyright (C) 2010 chantra <chantra__A__debuntu__D__org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
-
* of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
*
- * gcc -o str_replace_all str_replace_all.c
- */
- #include <stdio.h>
-
#include <string.h>
- #include <stdlib.h>
- void usage(char *p){
- fprintf(stderr, "USAGE: %s string tok replacement ", p );
-
}
- char *
- str_replace ( const char *string, const char *substr, const char*replacement ){
- char *tok = NULL;
-
char *newstr = NULL;
- char *oldstr = NULL;
- /* if either substr or replacement is NULL, duplicate string a let caller handle it */
- if ( substr == NULL || replacement == NULL ) return strdup (string);
- newstr = strdup (string);
-
while ( (tok = strstr ( newstr, substr ))){
- oldstr = newstr;
- newstr = malloc ( strlen ( oldstr ) - strlen ( substr ) + strlen (replacement ) + 1 );
- /*failed to alloc mem, free old string and return NULL */
- if ( newstr == NULL ){
-
free (oldstr);
- return NULL;
- }
- memcpy ( newstr, oldstr, tok - oldstr );
- memcpy ( newstr + (tok - oldstr), replacement, strlen ( replacement ));
-
memcpy ( newstr + (tok - oldstr) + strlen( replacement ), tok +strlen ( substr ), strlen ( oldstr ) - strlen ( substr ) - ( tok - oldstr) );
- memset ( newstr + strlen ( oldstr ) - strlen ( substr ) + strlen (replacement ) , 0, 1 );
- free (oldstr);
- }
- return newstr;
-
}
- int main( int argc, char **argv ){
- char *ns = NULL;
- if( argc != 4 ) {
-
usage(argv[0]);
- return 1;
- }
- ns = str_replace( argv[1], argv[2], argv[3] );
- fprintf( stdout, "Old string: %s Tok: %s Replacement: %s New string: %s ", argv[1], argv[2], argv[3], ns );
-
free(ns);
- return 0;
- }
Will output:
$ gcc -o str_replace_all str_replace_all.c
$ ./str_replace_all "(uid=%u/%u)" "%u" chantra
Old string: (uid=%u/%u)
Tok: %u
Replacement: chantra
New string: (uid=chantra/chantra)
yet another version
Tks Landouglas,
Yeah, I forgot this case :s which is really bad.
Your solution is also missing something and will only replace 1 occurrence within the string.
Here is a revised solution:
- char *
- str_replace ( const char *string, const char *substr, const char*replacement ){
- char *tok = NULL;
- char *newstr = NULL;
-
char *oldstr = NULL;
- char *head = NULL;
- /* if either substr or replacement is NULL, duplicate string a let caller handle it */
- if ( substr == NULL || replacement == NULL ) return strdup(string);
-
newstr = strdup (string);
- head = newstr;
- while ( (tok = strstr ( head, substr ))){
- oldstr = newstr;
- newstr = malloc ( strlen ( oldstr ) - strlen ( substr ) +strlen ( replacement ) + 1 );
-
/*failed to alloc mem, free old string and return NULL */
- if ( newstr == NULL ){
- free (oldstr);
- return NULL;
- }
-
memcpy ( newstr, oldstr, tok - oldstr );
- memcpy ( newstr + (tok - oldstr), replacement, strlen (replacement ) );
- memcpy ( newstr + (tok - oldstr) + strlen( replacement ), tok +strlen ( substr ), strlen ( oldstr ) - strlen ( substr ) - ( tok -oldstr ) );
- memset ( newstr + strlen ( oldstr ) - strlen ( substr ) +strlen ( replacement ) , 0, 1 );
- /* move back head right after the last replacement */
-
head = newstr + (tok - oldstr) + strlen( replacement );
- free (oldstr);
- }
- return newstr;
- }
// Here is the code for unicode strings!
int mystrstr(wchar_t *txt1,wchar_t *txt2)
{
wchar_t *posstr=wcsstr(txt1,txt2);
if(posstr!=NULL)
{
return (posstr-txt1);
}else
{
return -1;
}
}
// assume: supplied buff is enough to hold generated text
void StringReplace(wchar_t *buff,wchar_t *txt1,wchar_t *txt2)
{
wchar_t *tmp;
wchar_t *nextStr;
int pos;
tmp=wcsdup(buff);
pos=mystrstr(tmp,txt1);
if(pos!=-1)
{
buff[0]=0;
wcsncpy(buff,tmp,pos);
buff[pos]=0;
wcscat(buff,txt2);
nextStr=tmp+pos+wcslen(txt1);
while(wcslen(nextStr)!=0)
{
pos=mystrstr(nextStr,txt1);
if(pos==-1)
{
wcscat(buff,nextStr);
break;
}
wcsncat(buff,nextStr,pos);
wcscat(buff,txt2);
nextStr=nextStr+pos+wcslen(txt1);
}
}
free(tmp);
}
//z 2013-10-19 18:04:32 IS2120@BG57IV3 T2080877905.K.F2560461818[T2,L61,R2,V7]
update on your code
FYI, your code works well as long as the thing you're replacement string doesn't include the needle value, otherwise the code is trapped in an infinite loop.
ie:
./str_replace_all foobar bar bar2
*should* have returned foobar2 but instead it gets stuck in an infinite loop building foobar2222222222222222222222222...... because the strstr() check in the while() condition starts looking at the beginning of the haystack string (or new haystack) every time, so 'bar' would get replaced with 'bar2' over and over again.
A few simple changes fixes the problem:
Line 37 adds a new pointer
Line 42 points strhead to the beginning of your new haystack string
Line 43 changes your strstr() call to use strhead
Line 44 points strhead to the token found from strstr()
Thanks for the code!