开发者

how to find duplicate string in an array of strings

开发者 https://www.devze.com 2023-04-12 18:44 出处:网络
I have an array of string from which i have to find duplicate string and then remove that duplicate string like i have string

I have an array of string from which i have to find duplicate string and then remove that duplicate string like i have string

 char aa[50]="Amit Hanish Mahesh Amit"

Now Amit is duplicate and have to remove it from string .

#include "string.h"
main()
{
  char x[100] = "Amit 开发者_C百科Hanish Mahesh Amit";
  char y[3][100];
  int i = 0, k = 0, j = 0, c = 0, end, t;
  int current = 1;
  while (x[i] != '\0') {
    if (x[i] != ' ') {
      y[k][j] = x[i];
      j++;
      i++;
    } else {
      // c = c + 1;
      i++;
      k++;
      j = 0;
    }
    y[k][j] = '\0';
  }

  for (end = 1; end <= 3; end++) {
    for (t = 0; t < end; t++) {
      if (strcmp(y[end], y[t]) == 0) break;
    }
    if (end == t) {
      strcpy(y[current],y[t]);
       current++;
    }
  }
  y[current] = 0;
  printf("%s",y);
}

I have written a smalll routine for it .Does not seems to be worked .Any one have any suggestion where i am going wrong?


The other answers you got work fine for a small number strings (your example code only has 4). But, if you're comparing a large number this will be quite slow since you're doing n^2 comparisons. I'd suggest first splitting the string into an array of strings, then sorting the array using qsort(). In a sorted array all duplicates are guaranteed to be adjacent. This reduces the time from n^2 to n log n -- the time required to sort.


I would split the string array using strtok (see the man page).

So I would have something like this

char x[100]="Amit Hanish Mahesh Amit";

/* Preparing the result string */
size_t sz_result = sizeof(char) * (strlen(x) + 1);
char* result = (char*) malloc( sz_result );
result[0] = '\0';

/* Parsing the string from one element to the other */
char* elm = strtok(x, " ");
while( (elm = strtok(NULL, " ")) != NULL )
{
  ...

You will have each element of the string to verify if they are unique.

Then I would use something like a hashmap (you can use the one from the glib) or I would put the read string element in a new string only if it is not already in.

Here is an example for the second solution:

  ...
  /* Is the element already in the result string? */
  if ( strstr(result, elm) == NULL )
  {
    /* Then we should add it */
    snprintf( result, sz_result - 1, "%s %s", result, elm );
  }
}

In the end if you want x to be modified, you simply copy result in x:

strncpy( x, result, 99 );

Here is a sample code (not optimised, not using the strn* primitives, etc.)

#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>


int main()
{
  char x[100]="Amit Hanish Mahesh Amit";

  /* Preparing the result string */
  size_t sz_result = sizeof(char) * (strlen(x) + 1);
  char* result = (char*) malloc( sz_result );
  result[0] = '\0';

  /* Parsing the string from one element to the other */
  char* elm = strtok(x, " ");
  if (elm != NULL) strcpy(result, elm);
  while( (elm = strtok(NULL, " ")) != NULL )
  {
    /* Is the element already in the result string? */
    if ( strstr(result, elm) == NULL )
    {
      /* Then we should add it */
      strcat( result, " " );
      strcat( result, elm );
    }
  }

  strcpy( x, result );

  fprintf( stdout, "Result: %s\n", x );
}


To remove duplicates from an array without preserving the order of elements:

  1. sort the array
  2. copy unique elements to the beginning of the array
  3. remove the tail with duplicate elements
int remove_duplicates(StringArray array) {
  if (! (array and array->items)) return 0; // empty array or NULL

  StringArray_sort(array); // sort

  // unique_copy()
  String result = array->items, last = array->items + array->size;
  for (String first = array->items; first != last; ++result) {
    String_copy(result, first); // copy first to result
    for (String prev = first; ++first != last and String_cmp(prev, first) == 0;)
      { /* skip adjacent equal items */ }
  }
  // shrink
  return StringArray_remove(array, result, last);
}

Example

int main() {
  char text[] = "Mahesh Amit  Hanish Amit";
  StringArray array = split(text, sizeof(text));
  StringArray_dump(array, "<"); // print array before removing duplicates
  if (remove_duplicates(array) < 0)
    perror("error remove_duplicates(), OS error if any");
  StringArray_dump(array, ">"); // print it after
  StringArray_destroy(array);
  return 0;
}

Where split() is:

StringArray split(const char* text, size_t size) {
  if (! (text and text[size-1] == '\0')) return NULL;

  StringArray array = StringArray_create();
  if (! array) return NULL;

  size_t n = -1;
  for (const char* p = text; p != text+size; p += n+1) {
    n = strcspn(p, " \t\n"); // find index of the next whitespace
    if (n == 0) continue; // skip consecutive whitespace

    // append characters in range [p, p+n)
    // as a string to the array
    const String string = String_create(p, n);
    if (StringArray_append(array, string) < 0) {
      String_destroy(string);
      StringArray_destroy(array);
      return NULL;
    }
    String_destroy(string);
   }  
  return array;
}

Output

Mahesh<Amit<Hanish<Amit<
Amit>Hanish>Mahesh>

Full source code


I'm pretty sure, that the following line is not intended (assignment, not comparison)

 if (end = t) {

See what happens, if you code a == and come back, if you still have problems.

Hint: Always code blanks around operators, so expressions are easier to read.


It's always fun to try to solve this kind of simple problems in C as exercise. Here's my take.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

char* strstrn(const char *haystack, const char *needle, size_t needle_len)
{
    while((haystack = strchr(haystack, *needle)))
    {
        if (strncmp(haystack, needle, needle_len) == 0)
            return (char *) haystack;
        haystack++;
    }
    return NULL;
 }

char* find_duplicate(const char* str, size_t len, size_t dup_len)
{
    for(size_t i = 0; i < (len - dup_len); i++)
    {
        char* r = strstrn(str + i + 1, str + i, dup_len);
        if(r) return r;
    }
    return NULL;
}

int main(int argc, char** argv)
{
    if(argc < 3)
    {
        fprintf(stderr, "Usage: %s haystack dup_size\n", argv[0]);
        return 1;
    }
    char* haystack = argv[1];
    size_t len = atoi(argv[2]);
    char* r;
    while((r = find_duplicate(haystack, strlen(haystack), len)))
    {
        strcpy(r, r + len);
    }
    puts(haystack);
    return 0;
}


/* 
 * C Program to Find the Frequency of  Every Word in a 
 * given String
 */
#include <stdio.h>
#include <string.h>

void main()
{
    int count = 0, c = 0, i, j = 0, k, space = 0;
    char str[100], p[50][100], str1[20], ptr1[50][100];
    printf("Enter the string\n");
    scanf(" %[^\n]s", str);
    printf("string length is %d\n", strlen(str));
    for (i = 0;i<strlen(str);i++)
    {
        if ((str[i] == ' ')||(str[i] == ', ')||(str[i] == '.'))
        {
            space++;
        }
    }
    for (i = 0, j = 0, k = 0;j < strlen(str);j++)
    {
        if ((str[j] == ' ')||(str[j] == 44)||(str[j] == 46))  
        {    
            p[i][k] = '\0';
            i++;
            k = 0;
        }        
        else
             p[i][k++] = str[j];
    }
    k = 0;
    for (i = 0;i <= space;i++)
    {
        for (j = 0;j <= space;j++)
        {
            if (i == j)
            {
                strcpy(ptr1[k], p[i]);
                k++;
                count++;
                break;
            }
            else
            {
                if (strcmp(ptr1[j], p[i]) != 0)
                    continue;
                else
                    break;
            }
        }
    }
    for (i = 0;i < count;i++) 
    {
        for (j = 0;j <= space;j++)
        {
            if (strcmp(ptr1[i], p[j]) == 0)
                c++;
        }
        printf("%s -> %d times\n", ptr1[i], c);
        c = 0;
    }
}
0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号