开发者

Reading strings in C

开发者 https://www.devze.com 2023-03-30 00:42 出处:网络
If I was using C gets(), and I was reading a string from the user, but I have no idea how big of a buffer I need, and the input could be very large.

If I was using C gets(), and I was reading a string from the user, but I have no idea how big of a buffer I need, and the input could be very large. Is there a way I can determine how large the string the user inputted was, then allocate memory and then put it in the variable? Or at least a way to accept input without kno开发者_运维知识库wing how large it is, with a possibility it won't fit in the buffer i've already allocated.


I think use an intermediate buffer which is suitably large, and input the string into it with fgets or other function by limiting the string length to the max buffer size. Later when the string is input,. calculate the string length and allocate a buffer of the size of the string and copy it into the newly allocated buffer. The old large buffer can be reused to such inputs.

You can do:

fgets (buffer, BUFSIZ, stdin);

or

scanf ("%128[^\n]%*c", buffer);

Here you can specify the buffer length 128 bytes as %128.. and also include all the blankspace within the string.

And then calculate the length and allocate new buffer with:

len = strlen (buffer);
string = malloc (sizeof (char) * len + 1);
strcpy (string, buffer);
.
.
.
free (string);

EDIT

Here is one way i worked out:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main (void)
{
  char *buffer[10];  /* temporary buffers 10 nos, or make this dynamically allocated */
  char *main_str;    /* The main string to work with after input */
  int k, i=0, n, retval;

  while (1)
  {
    buffer[i] = malloc (sizeof (char) * 16); /* allocate buffer size 16 */
    scanf ("%15[^\n]%n", buffer[i], &n);     /* input length 15 string + 1 byte for null */
    if (n<16)                                /* Buffer is not filled and end of string reached */
      break;
    n=0;                                     /* reinitialize n=0 for next iteration. to make the process work if the length of the string is exactly the sizeof the buffer */
    i++;
  }
  /* need to fix the while loop so that the buffer array does not overflow and protect it from doing so */

  /* allocate buffer of exact size of the string */
  main_str = malloc (sizeof (char) * 16 * i + strlen (buffer[i]));

  /* copy the segmented string into the main string to be worked with 
   * and free the buffers
   */
  strcpy (main_str, "");
  for (k=0; k<=i; k++)
  {
    strcat (main_str, buffer[k]);
    free (buffer[k]);
  }

  /* work with main string */
  printf ("\n%s", main_str);

  /* free main string */
  free (main_str);

  return 0;
}

You need to fix the code to stop crashing in some cases, but this should answer your question.


Not with gets(). Use fgets() instead.

You cannot safely get user input with gets().

You need to use fgets() (or fgetc()) in a loop.


Don't use gets(). Use fgets(), and over approximate how much buffer space you will need.

The advantage of fgets is that if you go over, it will only write that max number of characters, and it won't clobber the memory of another part of your program.

char buff[100];
fgets(buff,100,stdin);

will only read up to 99 characters or until it hits a `'\n'. If there's room, it will read the newline into the array.


Allocate your buffer dynamically and use fgets. If you fill the buffer right up then it wasn't big enough so grow it using realloc and then fgets again (but write to the end of the string to maintain what you've already grabbed). Keep doing that until your buffer is larger than the input:

buffer = malloc(bufsize);
do{
    GotStuff = fgets(buffer, bufsize, stdin))
    buffer[bufsize-1] = 0;
    if (GotStuff && (strlen(buffer) >= bufsize-1))
    {
        oldsize = bufsize;
        buffer = realloc(bufsize *= 2);
        GotStuff = fgets( buffer + oldsize, bufsize - oldsize, stdin )
        buffer[bufsize-1] = 0;
    }
} while (GotStuff && (strlen(buffer) >= bufsize-1));


The problem you describe with gets() - having no way of knowing how big the target buffer needs to be to store the input - is exactly why that library call was deprecated in the 1999 standard, and is expected to be gone completely from the next revision; expect most compilers to follow suit relatively quickly. The mayhem caused by that one library function is scarier than the prospect of breaking 40 years' worth of legacy code.

One solution is to read the input piecemeal using fgets() and a fixed-length buffer, then appending that into a dynamically-resizable target buffer. For example:

#include <stdio.h>
#include <stdlib.h>

#define SIZE 512;

char *getNextLine(FILE *stream, size_t *length)
{
  char *output;
  char input[SIZE+1];
  *length = 0;
  int foundNewline = 0;

  /**
   * Initialize our output buffer
   */
  if ((output = malloc(1)) != NULL);
  {
    *output = 0;
    *length = 1;
  }
  else
  {
    return NULL;
  }

  /**
   * Read SIZE chars from the input stream until we hit EOF or
   * see a newline character
   */
  while(fgets(input, sizeof input, stream) != NULL && !foundNewline)
  {
    char *newline = strchr(input, '\n');
    char *tmp = NULL;

    /**
     * Strip the newline if present
     */
    foundNewline = (newline != NULL);
    if (foundNewline)
    {
      *newline = 0;
    }

    /**
     * Extend the output buffer 
     */
    tmp = realloc(output, *length + strlen(input));
    if (tmp)
    {
        output = tmp;
        strcat(output, input);
        *length += strlen(input);
    }
  }
  return *output;
}

The caller will be responsible for freeing the buffer when it's done with the input.


If you're on a Unix platform you should probably use getline() which is made exactly for this kind of thing.

And if your platform doesn't have getline(), here's some public domain code that should let you use it. This post is somewhat long, but that's because the code makes an attempt to actually handle real life errors and situations (and even not-so-real life ones like running out of memory).

It's probably not the most performant version nor the most elegant version possible. It picks characters off one-by-one using fgetc(), and it puts the null terminator at the end of the data every chance it gets as it's reading characters. But, I believe it to be correct even in the face of errors and large and small sets of data. It performs well enough for my purposes.

I'm not particularly fond of the getline() interface, but I use it because it's a standard of sorts.

The following will compile with GCC (MinGW) and under MSVC (as C++ - it uses declarations mixed with statements, which MSVC still doesn't support when compiling as C. Maybe I'll fix that one day).

#define _CRT_SECURE_NO_WARNINGS 1

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <limits.h>
#include <sys/types.h>


#if !__GNUC__
#if _WIN64
typedef long long ssize_t;
#else
typedef long ssize_t;
#endif
#endif


#if !defined(SSIZE_MAX)
#define SSIZE_MAX ((ssize_t)(SIZE_MAX/2))
#endif

#if !defined(EOVERFLOW)
#define EOVERFLOW (ERANGE)      /* is there something better to use? */
#endif



ssize_t nx_getdelim(char **lineptr, size_t *n, int delim, FILE *stream);
ssize_t nx_getline(char **lineptr, size_t *n, FILE *stream);




/*
    nx_getdelim_get_realloc_size()

    Helper function for getdelim() to figure out an appropriate new
    allocation size that's not too small or too big.

    These numbers seem to work pretty well for most text files.

    returns the input value if it decides that new allocation block
    would be too big (the caller should handle this as 
    an error).
*/
static
size_t nx_getdelim_get_realloc_size( size_t current_size)
{
    enum {
        k_min_realloc_inc = 32,
        k_max_realloc_inc = 1024,
    };

    if (SSIZE_MAX < current_size) return current_size;

    if (current_size <= k_min_realloc_inc) return current_size + k_min_realloc_inc;

    if (current_size >= k_max_realloc_inc) return current_size + k_max_realloc_inc;

    return current_size * 2;
}



/*
    nx_getdelim_append() 

    a helper function for getdelim() that adds a new character to 
    the outbuffer, reallocating as necessary to ensure the character
    and a following null terminator can fit

*/
static
int nx_getdelim_append( char** lineptr, size_t* bufsize, size_t count, char ch)
{
    char* tmp = NULL;
    size_t tmp_size = 0;

    // assert the contracts for this functions inputs
    assert( lineptr != NULL);
    assert( bufsize != NULL);

    if (count >= (((size_t) SSIZE_MAX) + 1)) {
        // writing more than SSIZE_MAX to the buffer isn't supported
        return -1;
    }

    tmp = *lineptr;
    tmp_size = tmp ? *bufsize : 0;

    // need room for the character plus the null terminator
    if ((count + 2) > tmp_size) {
        tmp_size = nx_getdelim_get_realloc_size( tmp_size);

        tmp = (char*) realloc( tmp, tmp_size);

        if (!tmp) {
            return -1;
        }
    }

    *lineptr = tmp;
    *bufsize = tmp_size;

    // remember, the reallocation size calculation might not have 
    // changed the block size, so we have to check again
    if (tmp && ((count+2) <= tmp_size)) {
        tmp[count++] = ch;
        tmp[count] = 0;
        return 1;
    }

    return -1;
}


/*
    nx_getdelim()

    A getdelim() function modeled on the Linux/POSIX/GNU 
    function of the same name.

    Read data into a dynamically resizable buffer until 
    EOF or until a delimiter character is found.  The returned
    data will be null terminated (unless there's an error 
    that prevents it).



    params:

        lineptr -   a pointer to a char* allocated by malloc() 
                    (actually any pointer that can legitimately be
                    passed to free()).  *lineptr will be updated 
                    by getdelim() if the memory block needs to be 
                    reallocated to accommodate the input data.

                    *lineptr can be NULL (though lineptr itself cannot),
                    in which case the function will allocate any necessary 
                    buffer.

        n -         a pointer to a size_t object that contains the size of 
                    the buffer pointed to by *lineptr (if non-NULL).

                    The size of whatever buff the resulting data is 
                    returned in will be passed back in *n

        delim -     the delimiter character.  The function will stop
                    reading one this character is read form the stream.

                    It will be included in the returned data, and a
                    null terminator character will follow it.

        stream -    A FILE* stream object to read data from.

    Returns:

        The number of characters placed in the returned buffer, including
        the delimiter character, but not including the terminating null.

        If no characters are read and EOF is set (or attempting to read 
        from the stream on the first attempt caused the eof indication 
        to be set), a null terminator will be written to the buffer and
        0 will be returned.

        If an error occurs while reading the stream, a 0 will be returned.
        A null terminator will not necessarily be at the end of the data 
        written.

        On the following error conditions, the negative value of the error 
        code will be returned:

            ENOMEM:     out of memory
            EOVERFLOW:  SSIZE_MAX character written to te buffer before 
                        reaching the delimiter
                        (on Windows, EOVERFLOW is mapped to ERANGE)

         The buffer will not necessarily be null terminated in these cases.


    Notes:

        The returned data might include embedded nulls (if they exist
        in the data stream) - in that case, the return value of the
        function is the only way to reliably determine how much data
        was placed in the buffer.

        If the function returns 0 use feof() and/or ferror() to determine
        which case caused the return.

        If EOF is returned after having written one or more characters
        to the buffer, a normal count will be returned (but there will 
        be no delimiter character in the buffer).  

        If 0 is returned and ferror() returns a non-zero value,
        the data buffer may not be null terminated.

        In other cases where a negative value is returned, the data
        buffer is not necessarily null terminated and there 
        is no reliable means to determining what data in the buffer is
        valid.

        The pointer returned in *lineptr and the buffer size
        returned in *n will be valid on error returns unless
        NULL pointers are passed in for one or more of these
        parameters (in which case the return value will be -EINVAL).

*/
ssize_t nx_getdelim(char **lineptr, size_t *n, int delim, FILE *stream)
{
    int retval = 0;

    if (!lineptr || !n) {
        return -EINVAL;
    }

    ssize_t result = 0;    
    char* line = *lineptr;
    size_t size = *n;
    size_t count = 0;
    int err = 0;

    int ch;

    for (;;) {
        ch = fgetc( stream);

        if (ch == EOF) {
            break;
        }

        result = nx_getdelim_append( &line, &size, count, ch);

        // check for error adding to the buffer (ie., out of memory)
        if (result < 0) {
            err = -ENOMEM;
            break;
        }

        ++count;

        // check if we're done because we've found the delimiter
        if ((unsigned char)ch == (unsigned char)delim) {
            break;
        }

        // check if we're passing the maximum supported buffer size
        if (count > SSIZE_MAX) {
            err = -EOVERFLOW;
            break;
        }
    }

    // update the caller's data
    *lineptr = line;
    *n = size;

    // check for various error returns
    if (err != 0) {
        return err;
    }

    if (ferror(stream)) {
        return 0;
    }

    if (feof(stream) && (count == 0)) {
        if (nx_getdelim_append( &line, &size, count, 0) < 0) {
            return -ENOMEM;
        }
    }

    return count;
}




ssize_t nx_getline(char **lineptr, size_t *n, FILE *stream)
{
    return nx_getdelim( lineptr, n, '\n', stream);
}



/*
    versions of getline() and getdelim() that attempt to follow
    POSIX semantics (ie. they set errno on error returns and
    return -1 when the stream error indicator or end-of-file
    indicator is set (ie., ferror() or feof() would return
    non-zero).
*/
ssize_t getdelim(char **lineptr, size_t *n, char delim, FILE *stream)
{
    ssize_t retval = nx_getdelim( lineptr, n, delim, stream);

    if (retval < 0) {
        errno = -retval;
        retval = -1;
    }

    if (retval == 0) {
        retval = -1;
    }

    return retval;
}

ssize_t getline(char **lineptr, size_t *n, FILE *stream)
{
    return getdelim( lineptr, n, '\n', stream);
}
0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号