Strings

Strings — miscellaneous string-handling utilities

Functions

int string_is_blank ()
int has_suffix ()
int has_native_data_suffix ()
int numeric_string ()
int integer_string ()
int count_fields ()
int count_lines ()
double dot_atof ()
void set_atof_point ()
int gretl_dotpos ()
int gretl_slashpos ()
char * gretl_delchar ()
int gretl_charpos ()
int ends_with_backslash ()
int gretl_namechar_spn ()
int double_quote_position ()
char * gretl_trunc ()
char * gretl_delete ()
char * gretl_unquote ()
char * gretl_strdup ()
char * gretl_strndup ()
char * gretl_strdup_printf ()
char * gretl_word_strdup ()
char * gretl_quoted_string_strdup ()
char ** gretl_string_split ()
char ** gretl_string_split_quoted ()
char * gretl_str_expand ()
char * gretl_charsub ()
char * gretl_substring ()
char * comma_separate_numbers ()
char * shift_string_left ()
char * gretl_lower ()
char * gretl_strstrip ()
char * gretl_strstrip_copy ()
char * switch_ext ()
char * switch_ext_new ()
int equation_get_lhs_and_rhs ()
int top_n_tail ()
char * tailstrip ()
char * compress_spaces ()
char * space_to_score ()
char ** strings_array_new ()
char ** strings_array_realloc_with_length ()
int strings_array_add ()
char ** strings_array_new_with_length ()
char ** strings_array_dup ()
int strings_array_sort ()
int strings_array_cmp ()
void strings_array_free ()
char * get_obs_string ()
double obs_str_to_double ()
char * colonize_obs ()
void modify_date_for_csv ()
char * print_time ()
int gretl_xml_validate ()
char * gretl_xml_encode ()
int gretl_xml_encode_to_buf ()
void unescape_url ()
char * make_varname_unique ()
int fix_varname_duplicates ()
char * append_dir ()
char * build_path ()
const char * path_last_element ()
char * trim_slash ()
int gretl_string_ends_with ()
void get_column_widths ()
char * gretl_utf8_strncat ()
char * gretl_utf8_strncat_trim ()
int gretl_scan_varname ()
char * gretl_regexp_replace ()
char * gretl_literal_replace ()

Types and Values

#define SLASH
#define SLASHSTR
#define CTRLZ

Object Hierarchy


Includes

#include <libgretl.h>

Description

Various functions for creating, testing and manipulating strings and arrays of strings.

Functions

string_is_blank ()

int
string_is_blank (const char *s);

Parameters

s

the string to examine.

 

Returns

1 if the string is NULL, of length zero, or contains nothing but space characters, otherwise returns 0.


has_suffix ()

int
has_suffix (const char *str,
            const char *sfx);

Parameters

str

the string to check.

 

sfx

the suffix to check for, including the leading '.'

 

Returns

1 if str ends with sfx (on a case-insensitive comparison), 0 otherwise.


has_native_data_suffix ()

int
has_native_data_suffix (const char *fname);

Parameters

fname

the filename to check.

 

Returns

1 if fname ends with a suffix indicating it is a native gretl data file, 0 otherwise.


numeric_string ()

int
numeric_string (const char *str);

Parameters

str

the string to examine.

 

Returns

1 if the given str is numeric, otherwise 0.


integer_string ()

int
integer_string (const char *str);

Parameters

str

the string to examine.

 

Returns

1 if the given str represents an integer, otherwise 0.


count_fields ()

int
count_fields (const char *s,
              const char *sep);

Parameters

s

the string to process.

 

sep

string containing the character(s) to count as field separators, or NULL. If sep is NULL only the space character counts.

 

Returns

the number of fields in s .


count_lines ()

int
count_lines (const char *s);

Parameters

s

the string to process.

 

Returns

the number of complete lines (lines ending with the newline character) in s .


dot_atof ()

double
dot_atof (const char *s);

Parameters

s

the string to convert.

 

Returns

the double-precision numeric interpretation of s , where the decimal point character is forced to be '.', regardless of the current locale.


set_atof_point ()

void
set_atof_point (char c);


gretl_dotpos ()

int
gretl_dotpos (const char *str);

Parameters

str

the string to examine.

 

Returns

the integer position of the last "." within str , or strlen(str ) in case a dot is not found, or the string ends with a (backward or forward) slash.


gretl_slashpos ()

int
gretl_slashpos (const char *str);

Parameters

str

the string to examine.

 

Returns

the integer position of the last SLASH within str , or 0 in case a SLASH is not found.


gretl_delchar ()

char *
gretl_delchar (int c,
               char *str);

Deletes all instances of c within str .

Parameters

c

the character to delete.

 

str

the string from which to delete c .

 

Returns

the possibly modified string.


gretl_charpos ()

int
gretl_charpos (char c,
               const char *s);

Parameters

c

the character to look for.

 

s

the string to examine.

 

Returns

the first position of c in s , or -1 if c is not found.


ends_with_backslash ()

int
ends_with_backslash (const char *s);

Parameters

s

the string to examine.

 

Returns

1 if the last non-space character in s is a backslash, otherwise 0.


gretl_namechar_spn ()

int
gretl_namechar_spn (const char *s);

Parameters

s

the string to examine.

 

Returns

the length of the intial segment of s which consists of characters that are valid in a gretl variable or object name, namely a-z, A-Z, 0-9 and _, starting with a letter.


double_quote_position ()

int
double_quote_position (const char *s);

Parameters

s

the source string.

 

Returns

the 0-based index of the position of the next unescaped double-quote character in s , or -1 if no such character is found.


gretl_trunc ()

char *
gretl_trunc (char *str,
             size_t n);

Truncates the given str to the specified length.

Parameters

str

the string to truncate.

 

n

the desired length of the truncated string.

 

Returns

the possibly truncated string.


gretl_delete ()

char *
gretl_delete (char *str,
              int idx,
              int count);

Deletes count characters from str , starting at position idx .

Parameters

str

the string to process.

 

idx

the starting point for deleting characters.

 

count

the number of characters to delete.

 

Returns

the modified string.


gretl_unquote ()

char *
gretl_unquote (char *str,
               int *err);

If str begins with the ASCII double-quote character, checks that the last character is also a double-quote, and in that case trims the quotes from both ends. If the first character is a double quote but the last is not, flags an error. If the string is not quoted at all, returns the original string.

Parameters

str

the string to process.

 

err

location to receive error code.

 

Returns

the input string, possibly modified in place.


gretl_strdup ()

char *
gretl_strdup (const char *src);

Parameters

src

the string to duplicate.

 

Returns

an allocated copy of src , or NULL on error.


gretl_strndup ()

char *
gretl_strndup (const char *src,
               size_t n);

Parameters

src

the string to be copied.

 

n

the maximum number of characters to copy.

 

Returns

an allocated copy of at most n characters from src , or NULL on error.


gretl_strdup_printf ()

char *
gretl_strdup_printf (const char *format,
                     ...);

Print the arguments according to format .

Parameters

format

as in printf().

 

Returns

allocated result of the printing, or NULL on failure.


gretl_word_strdup ()

char *
gretl_word_strdup (const char *src,
                   const char **ptr,
                   gretlopt opt,
                   int *err);

Copies the first 'word' found in src , where a word is defined as consisting of alphanumeric characters and the underscore. If ptr is not NULL, on exit it points at the next position in src after the copied word.

Parameters

src

the source string.

 

ptr

location to receive end of word pointer, or NULL.

 

opt

can include OPT_S for "strict" operation: in this case an error is flagged if src contains any characters other than 'word' characters (see below), comma and space.

 

err

location to receive error code.

 

Returns

the allocated word or NULL in case no word is found, or on error.


gretl_quoted_string_strdup ()

char *
gretl_quoted_string_strdup (const char *s,
                            const char **ptr);

If s starts with a quote (double or single), return a copy of the portion of s that is enclosed in quotes. That is, from s + 1 up to but not including the next matching quote. If ptr is not NULL, on output it receives a pointer to the next byte in s after the closing quote.

Parameters

s

the source string.

 

ptr

location to receive end pointer, or NULL.

 

Returns

the allocated string or NULL on failure.


gretl_string_split ()

char **
gretl_string_split (const char *s,
                    int *n,
                    const char *sep);

Parses s into a set of zero or more substrings and creates an array of those substrings. On sucessful exit n holds the number of substrings.

Parameters

s

the source string.

 

n

location to receive the number of substrings.

 

sep

string containing the character(s) to count as field separators, or NULL. If sep is NULL only the space character counts.

 

Returns

the allocated array or NULL in case of failure.


gretl_string_split_quoted ()

char **
gretl_string_split_quoted (const char *s,
                           int *n,
                           const char *sep,
                           int *err);

Similar to gretl_string_split(), except that this variant allows for the presence of double-quoted substrings which may contain spaces. The quotes are removed in the members of the returned array.

Parameters

s

the source string.

 

n

location to receive the number of substrings.

 

sep

string containing the character(s) to count as field separators, or NULL. If sep is NULL only space, tab and newline count.

 

err

location to receive error code.

 

Returns

allocated array of substrings or NULL in case of failure.


gretl_str_expand ()

char *
gretl_str_expand (char **orig,
                  const char *add,
                  const char *sep);

Creates a newly allocated string built by concatenating orig and add , with sep interpolated unless sep is NULL, and replaces the content of orig with the new string. As a special case, if orig is NULL, or if the content of orig is NULL, we just duplicate add .

Parameters

orig

pointer to the base string.

 

add

the string to be added.

 

sep

string to be interpolated, or NULL.

 

Returns

the reallocated string, or NULL on failure. In case of failure the content of orig is freed, if orig is not NULL, to avoid memory leakage.


gretl_charsub ()

char *
gretl_charsub (char *str,
               char find,
               char repl);

Replaces all occurrences of find with repl in str .

Parameters

str

the string to operate on.

 

find

the character to replace.

 

repl

the replacement character.

 

Returns

the (possibly modified) string.


gretl_substring ()

char *
gretl_substring (const char *str,
                 int first,
                 int last,
                 int *err);

Parameters

str

the string to operate on.

 

first

1-based index of initial character.

 

last

1-based index of final character.

 

err

location to receive error code.

 

Returns

a substring of str , from first to last .


comma_separate_numbers ()

char *
comma_separate_numbers (char *s);

Given a string which contains two or more numbers separated by spaces and/or commas, revise the string to ensure that all the numbers are comma-separated.

Parameters

s

the string to operate on.

 

Returns

the (possibly modified) string.


shift_string_left ()

char *
shift_string_left (char *str,
                   size_t move);

Shifts the content of str left by move places, dropping leading bytes as needed.

Parameters

str

the string to process.

 

move

the number of places to shift.

 

Returns

the modified string.


gretl_lower ()

char *
gretl_lower (char *str);

Converts any upper case characters in str to lower case.

Parameters

str

the string to transform.

 

Returns

the possibly modified string.


gretl_strstrip ()

char *
gretl_strstrip (char *str);

Removes leading and trailing white space from a string.

Parameters

str

the string to process.

 

Returns

the possibly modified string.


gretl_strstrip_copy ()

char *
gretl_strstrip_copy (const char *str,
                     int *err);

Parameters

str

the string to process.

 

Returns

a copy of str , from which both leading and trailing white space have been removed.


switch_ext ()

char *
switch_ext (char *targ,
            const char *src,
            const char *ext);

For processing filenames: copies src to targ , minus any existing filename extension, and adds to targ the specified extension.

Parameters

targ

the target or output string (must be pre-allocated).

 

src

the source or input string.

 

ext

the extension or suffix to attach.

 

Returns

the output string, targ .


switch_ext_new ()

char *
switch_ext_new (const char *src,
                const char *ext);

For processing filenames: creates a copy of src in which any existing dot-extension is removed and ext is appended (with a dot automatically inserted).

Parameters

src

the original string.

 

ext

the extension or suffix to attach (without leading '.').

 

Returns

the newly allocated string.


equation_get_lhs_and_rhs ()

int
equation_get_lhs_and_rhs (const char *s,
                          char **plh,
                          char **prh);

Given a string s , parse it into a left-hand side and a right-hand side, separated by an equals sign. Return in plh and prh allocated copies of the respective sides, with any leading or trailing white space trimmed.

Parameters

s

equation in string form.

 

plh

pointer to receive left-hand side expression.

 

prh

pointer to receive right-hand side expression.

 

Returns

0 on success, 1 on error.


top_n_tail ()

int
top_n_tail (char *str,
            size_t maxlen,
            int *err);

Drop leading space and trailing space and newline from string, then replace a trailing backslash (if any) with a space. If str does not end with a newline within the limit set by maxlen , and err is not NULL, then E_TOOLONG is written to err .

Parameters

str

the string to process.

 

maxlen

maximum length of string, including NUL termination.

 

err

location to receive error code, or NULL.

 

Returns

1 if a trailing backslash, comma or left parenthesis was found, otherwise 0.


tailstrip ()

char *
tailstrip (char *str);

Drop trailing space (and newline if any) from string.

Parameters

str

the string to process.

 

Returns

the modified string.


compress_spaces ()

char *
compress_spaces (char *s);

Reduce multiple contiguous space characters to single spaces within s .

Parameters

s

the string to process.

 

Returns

the compressed string.


space_to_score ()

char *
space_to_score (char *s);

Replace any spaces with underscores in s .

Parameters

s

the string to process.

 

Returns

the (possibly) modified string.


strings_array_new ()

char **
strings_array_new (int nstrs);

Allocates storage for nstrs strings and initalizes all to NULL.

Parameters

nstrs

number of strings in array.

 

Returns

the allocated array, or NULL on failure.


strings_array_realloc_with_length ()

char **
strings_array_realloc_with_length (char ***pS,
                                   int oldn,
                                   int newn,
                                   int len);

Adjusts the storage in pS to a size of newn strings, each of them len bytes long. The first byte of any additional strings is initialized to 0. This function may be used either to expand or to shrink an existing array of strings.

Parameters

pS

existing array to reallocate.

 

oldn

original number of strings in the array.

 

newn

new number of strings in array.

 

len

number of bytes per string.

 

Returns

the new array, or NULL on failure.


strings_array_add ()

int
strings_array_add (char ***pS,
                   int *n,
                   const char *p);

Allocates storage for an extra member of S and adds a copy of string p in the last position. On success, the content of n is incremented by 1.

Parameters

pS

pointer to strings array.

 

n

location of present number of strings in array.

 

p

string to add to array.

 

Returns

0 on success, E_ALLOC on failure.


strings_array_new_with_length ()

char **
strings_array_new_with_length (int nstrs,
                               int len);

Allocates storage for nstrs strings, each of them len bytes long. The first byte of each string is initialized to 0.

Parameters

nstrs

number of strings in array.

 

len

number of bytes per string.

 

Returns

the allocated array, or NULL on failure.


strings_array_dup ()

char **
strings_array_dup (char **strs,
                   int n);

Parameters

strs

array of strings to be copied.

 

n

number of strings in array.

 

Returns

an allocated copy of strs , or NULL on failure.


strings_array_sort ()

int
strings_array_sort (char ***pS,
                    int *n,
                    gretlopt opt);

Sorts an array of strings in ascending lexicographical order. If OPT_U is given, n holds the number of unique strings on exit. It is assumed that storage for the strings array was obtained via strings_array_new() or a similar libgretl function.

Parameters

pS

location of array of strings.

 

n

location of the number of strings in the array.

 

opt

may contain OPT_U to trim the sorted array so that it contains only unique entries.

 

Returns

0 on success, non-zero on error.


strings_array_cmp ()

int
strings_array_cmp (char **strs1,
                   char **strs2,
                   int n);

Compares for equality two arrays of strings, each of which must contain at least n elements. Equality of the arrays means that strcmp returns 0 for each pair of strings strs1 [i], strs2 [i], for i equals 0 to n - 1.

Parameters

strs1

first array of strings.

 

strs2

second array of strings.

 

n

number of strings to examine.

 

Returns

0 if the arrays compare equal, non-zero otherwise.


strings_array_free ()

void
strings_array_free (char **strs,
                    int nstrs);

Frees each allocated string in strs , then frees strs itself. Checks that strs is not NULL before proceeding.

Parameters

strs

array of allocated strings.

 

nstrs

number of strings in array.

 

get_obs_string ()

char *
get_obs_string (char *obs,
                int t,
                const DATASET *dset);

Parameters

obs

char array big enough to hold the observation (OBSLEN).

 

t

zero-based observation number.

 

dset

pointer to dataset information.

 

Returns

the observation string corresponding to t .


obs_str_to_double ()

double
obs_str_to_double (const char *obs);

Parameters

obs

string representation of observation number.

 

Returns

the floating-point counterpart of obs , or NADBL on invalid input.


colonize_obs ()

char *
colonize_obs (char *obs);

Converts a decimal point in obs to a colon.

Parameters

obs

string representation of observation number.

 

Returns

the (possibly) modified obs string.


modify_date_for_csv ()

void
modify_date_for_csv (char *s,
                     int pd);


print_time ()

char *
print_time (char *s);

Parameters

s

string into which to print: must be at least 48 bytes.

 

Returns

s , which will contain a locale-dependent representation of the current time. In English, this will be in the format Y/m/d H:M.


gretl_xml_validate ()

int
gretl_xml_validate (const char *s);

Parameters

s

string to be tested.

 

Returns

1 if s is acceptable for insertion into an XML file as is, 0 if it contains special characters that need to be escaped. See also gretl_xml_encode().


gretl_xml_encode ()

char *
gretl_xml_encode (const char *str);

Parameters

str

NUL-terminated source string.

 

Returns

an allocated re-write of str , with characters that are special in XML encoded as character entities. See also gretl_xml_validate().


gretl_xml_encode_to_buf ()

int
gretl_xml_encode_to_buf (char *targ,
                         const char *src,
                         int n);

Writes into targ a version of src in which characters that are special in XML are encoded as character entities. See also gretl_xml_encode() for the case where the encoding of src is of unknown size at compile time.

Parameters

targ

target buffer.

 

src

NUL-terminated source string.

 

n

size of targ in bytes.

 

Returns

0 on success or 1 on error. An error occurs if (a) the encoded version of src is longer than n bytes (allowing for NUL termination), or (b) src does not validate as UTF-8. On error the conversion is not done.


unescape_url ()

void
unescape_url (char *url);

Parameters

url

string representing a URL.

 

make_varname_unique ()

char *
make_varname_unique (char *vname,
                     int v,
                     DATASET *dset);

Given a tentative name for a new variable, check that it is not a duplicate of an existing varname. If it is, modify the new name so that it becomes unique. The ID number v is required so that, if the variable has already been added to the dataset, its name does not appear to conflict with itself! If the name to be tested is not associated with an existing variable, pass 0 for v .

Parameters

vname

tentative name for variable.

 

v

the ID number for the new variable.

 

dset

dataset information.

 

Returns

the (possibly modified) variable name.


fix_varname_duplicates ()

int
fix_varname_duplicates (DATASET *dset);

Returns


append_dir ()

char *
append_dir (char *fname,
            const char *dir);

Returns


build_path ()

char *
build_path (char *targ,
            const char *dirname,
            const char *fname,
            const char *ext);

Writes to targ a full path composed of dirname , fname and (optionally) ext . This function ensures that an appropriate separator is inserted between dirname and fname , if dirname is not already terminated with such a separator.

Parameters

targ

target string to write to (must be pre-allocated).

 

dirname

first part of path.

 

fname

filename.

 

ext

filename extension to be appended (or NULL).

 

Returns

the target string, targ .


path_last_element ()

const char *
path_last_element (const char *path);

Parameters

path

path to work on.

 

Returns

a pointer to the last element of path , that is, the element following the last path separator character, if any. If path does not contain a separator, path itself is returned. Note that the return value may be the empty string, if path ends with a separator.


trim_slash ()

char *
trim_slash (char *s);

If s ends with SLASH, remove this character.

Parameters

s

string to work on.

 

Returns

the (possibly) modified string.


gretl_string_ends_with ()

int
gretl_string_ends_with (const char *s,
                        const char *test);

Parameters

s

string to examine.

 

test

string to test for.

 

Returns

1 if s ends with test , else 0.


get_column_widths ()

void
get_column_widths (const char **strs,
                   int *widths,
                   int n);

If need be, increases the column widths in widths to accomodate the current translations of strs .

Parameters

strs

array of n strings.

 

widths

array of n default column widths.

 

n

number of columns.

 

gretl_utf8_strncat ()

char *
gretl_utf8_strncat (char *dest,
                    const char *src,
                    size_t n);

Works just like strncat(), except that it ensures that we don't end up with an incomplete UTF-8 character preceding the terminating NUL byte.

Parameters

dest

destination string.

 

src

source string.

 

n

maximum number of bytes to append.

 

Returns

the destination string.


gretl_utf8_strncat_trim ()

char *
gretl_utf8_strncat_trim (char *dest,
                         const char *src,
                         size_t n);

The same as gretl_utf8_strncat(), except that any leading and/or trailing white space is trimmed from dest .

Parameters

dest

destination string.

 

src

source string.

 

n

maximum number of bytes to append.

 

Returns

the destination string.


gretl_scan_varname ()

int
gretl_scan_varname (const char *src,
                    char *targ);

Performs sscanf() on src , using a conversion specifier which allows for writing up to VNAMELEN-1 bytes into targ . The latter must therefore be at least VNAMELEN bytes long.

Parameters

src

source string.

 

targ

target string.

 

Returns

the return value from sscanf().


gretl_regexp_replace ()

char *
gretl_regexp_replace (const char *orig,
                      const char *match,
                      const char *repl,
                      int *err);

Builds a string based on orig but in which all occurrences of match (which is interpreted as a regular expression of the Perl type) are replaced by means of repl (also interpreted as a regular expression).

Parameters

orig

the original string.

 

match

the pattern to match.

 

repl

the replacement expression for match .

 

err

location to receive error code.

 

Returns

newly allocated string or NULL on failure.


gretl_literal_replace ()

char *
gretl_literal_replace (const char *orig,
                       const char *match,
                       const char *repl,
                       int *err);

Builds a string based on orig but in which all occurrences of match (which is interpreted as a straight string literal) are replaced by repl (also a straight string literal).

Parameters

orig

the original string.

 

match

the substring to match.

 

repl

the replacement string for match .

 

err

location to receive error code.

 

Returns

newly allocated string or NULL on failure.

Types and Values

SLASH

#define SLASH '\\'


SLASHSTR

#define SLASHSTR "\\"


CTRLZ

#define CTRLZ 26