Strings

Strings — miscellaneous string-handling utilities

Synopsis

#include <libgretl.h>

#define             SLASH
#define             SLASHSTR
#define             CTRLZ
int                 string_is_blank                     (const char *s);
int                 has_suffix                          (const char *str,
                                                         const char *sfx);
int                 numeric_string                      (const char *str);
int                 integer_string                      (const char *str);
int                 count_fields                        (const char *s,
                                                         const char *sep);
double              dot_atof                            (const char *s);
void                set_atof_point                      (char c);
int                 gretl_dotpos                        (const char *str);
int                 gretl_slashpos                      (const char *str);
char *              gretl_delchar                       (int c,
                                                         char *str);
int                 gretl_charpos                       (char c,
                                                         const char *s);
int                 ends_with_backslash                 (const char *s);
int                 gretl_namechar_spn                  (const char *s);
char *              gretl_trunc                         (char *str,
                                                         size_t n);
char *              gretl_delete                        (char *str,
                                                         int idx,
                                                         int count);
char *              gretl_unquote                       (char *str,
                                                         int *err);
char *              gretl_strdup                        (const char *src);
char *              gretl_strndup                       (const char *src,
                                                         size_t n);
char *              gretl_strdup_printf                 (const char *format,
                                                         ...);
char *              gretl_word_strdup                   (const char *src,
                                                         const char **ptr,
                                                         gretlopt opt,
                                                         int *err);
char *              gretl_quoted_string_strdup          (const char *s,
                                                         const char **ptr);
char **             gretl_string_split                  (const char *s,
                                                         int *n,
                                                         const char *sep);
char **             gretl_string_split_quoted           (const char *s,
                                                         int *n,
                                                         const char *sep,
                                                         int *err);
char *              gretl_str_expand                    (char **orig,
                                                         const char *add,
                                                         const char *sep);
char *              gretl_charsub                       (char *str,
                                                         char find,
                                                         char repl);
char *              comma_separate_numbers              (char *s);
char *              shift_string_left                   (char *str,
                                                         size_t move);
char *              gretl_lower                         (char *str);
char *              gretl_strstrip                      (char *str);
char *              gretl_strstrip_copy                 (const char *str,
                                                         int *err);
char *              switch_ext                          (char *targ,
                                                         const char *src,
                                                         const char *ext);
char *              switch_ext_new                      (const char *src,
                                                         const char *ext);
int                 equation_get_lhs_and_rhs            (const char *s,
                                                         char **plh,
                                                         char **prh);
int                 top_n_tail                          (char *str,
                                                         size_t maxlen,
                                                         int *err);
char *              tailstrip                           (char *str);
char *              compress_spaces                     (char *s);
char *              space_to_score                      (char *s);
char **             strings_array_new                   (int nstrs);
char **             strings_array_realloc_with_length   (char ***pS,
                                                         int oldn,
                                                         int newn,
                                                         int len);
int                 strings_array_add                   (char ***pS,
                                                         int *n,
                                                         const char *p);
char **             strings_array_new_with_length       (int nstrs,
                                                         int len);
char **             strings_array_dup                   (char **strs,
                                                         int n);
int                 strings_array_sort                  (char ***pS,
                                                         int *n,
                                                         gretlopt opt);
int                 strings_array_cmp                   (char **strs1,
                                                         char **strs2,
                                                         int n);
void                strings_array_free                  (char **strs,
                                                         int nstrs);
char *              get_obs_string                      (char *obs,
                                                         int t,
                                                         const DATASET *dset);
double              obs_str_to_double                   (const char *obs);
char *              colonize_obs                        (char *obs);
void                modify_date_for_csv                 (char *s,
                                                         int pd);
char *              print_time                          (char *s);
int                 gretl_xml_validate                  (const char *s);
char *              gretl_xml_encode                    (const char *str);
int                 gretl_xml_encode_to_buf             (char *targ,
                                                         const char *src,
                                                         int n);
void                unescape_url                        (char *url);
char *              make_varname_unique                 (char *vname,
                                                         int v,
                                                         DATASET *dset);
int                 fix_varname_duplicates              (DATASET *dset);
char *              append_dir                          (char *fname,
                                                         const char *dir);
char *              build_path                          (char *targ,
                                                         const char *dirname,
                                                         const char *fname,
                                                         const char *ext);
const char *        path_last_element                   (const char *path);
char *              trim_slash                          (char *s);
int                 gretl_string_ends_with              (const char *s,
                                                         const char *test);
void                get_column_widths                   (const char **strs,
                                                         int *widths,
                                                         int n);
char *              gretl_utf8_strncat                  (char *dest,
                                                         const char *src,
                                                         size_t n);
char *              gretl_utf8_strncat_trim             (char *dest,
                                                         const char *src,
                                                         size_t n);
int                 gretl_scan_varname                  (const char *src,
                                                         char *targ);
char *              gretl_regexp_replace                (const char *orig,
                                                         const char *match,
                                                         const char *repl,
                                                         int *err);
char *              gretl_literal_replace               (const char *orig,
                                                         const char *match,
                                                         const char *repl,
                                                         int *err);

Description

Various functions for creating, testing and manipulating strings and arrays of strings.

Details

SLASH

#define SLASH '\\'


SLASHSTR

#define SLASHSTR "\\"


CTRLZ

#define CTRLZ 26


string_is_blank ()

int                 string_is_blank                     (const char *s);

s :

the string to examine.

Returns :

1 if the string is NULL, of length zero, or contains nothing but space characters, otherwise returns 0.

has_suffix ()

int                 has_suffix                          (const char *str,
                                                         const char *sfx);

str :

the string to check.

sfx :

the suffix to check for, including the leading '.'

Returns :

1 if str ends with sfx (on a case-insensitive comparison), 0 otherwise.

numeric_string ()

int                 numeric_string                      (const char *str);

str :

the string to examine.

Returns :

1 if the given str is numeric, otherwise 0.

integer_string ()

int                 integer_string                      (const char *str);

str :

the string to examine.

Returns :

1 if the given str represents an integer, otherwise 0.

count_fields ()

int                 count_fields                        (const char *s,
                                                         const char *sep);

s :

the string to process.

sep :

string containing the character(s) to count as field separators, or NULL. If sep is NULL only the space character counts.

Returns :

the number of fields in s.

dot_atof ()

double              dot_atof                            (const char *s);

s :

the string to convert.

Returns :

the double-precision numeric interpretation of s, where the decimal point character is forced to be '.', regardless of the current locale.

set_atof_point ()

void                set_atof_point                      (char c);


gretl_dotpos ()

int                 gretl_dotpos                        (const char *str);

str :

the string to examine.

Returns :

the integer position of the last "." within str, or strlen(str) in case a dot is not found, or the string ends with a (backward or forward) slash.

gretl_slashpos ()

int                 gretl_slashpos                      (const char *str);

str :

the string to examine.

Returns :

the integer position of the last SLASH within str, or 0 in case a SLASH is not found.

gretl_delchar ()

char *              gretl_delchar                       (int c,
                                                         char *str);

Deletes all instances of c within str.

c :

the character to delete.

str :

the string from which to delete c.

Returns :

the possibly modified string.

gretl_charpos ()

int                 gretl_charpos                       (char c,
                                                         const char *s);

c :

the character to look for.

s :

the string to examine.

Returns :

the first position of c in s, or -1 if c is not found.

ends_with_backslash ()

int                 ends_with_backslash                 (const char *s);

s :

the string to examine.

Returns :

1 if the last non-space character in s is a backslash, otherwise 0.

gretl_namechar_spn ()

int                 gretl_namechar_spn                  (const char *s);

s :

the string to examine.

Returns :

the length of the intial segment of s which consists of characters that are valid in a gretl variable or object name, namely a-z, A-Z, 0-9 and _, starting with a letter.

gretl_trunc ()

char *              gretl_trunc                         (char *str,
                                                         size_t n);

Truncates the given str to the specified length.

str :

the string to truncate.

n :

the desired length of the truncated string.

Returns :

the possibly truncated string.

gretl_delete ()

char *              gretl_delete                        (char *str,
                                                         int idx,
                                                         int count);

Deletes count characters from str, starting at position idx.

str :

the string to process.

idx :

the starting point for deleting characters.

count :

the number of characters to delete.

Returns :

the modified string.

gretl_unquote ()

char *              gretl_unquote                       (char *str,
                                                         int *err);

If str begins with the ASCII double-quote character, checks that the last character is also a double-quote, and in that case trims the quotes from both ends. If the first character is a double quote but the last is not, flags an error. If the string is not quoted at all, returns the original string.

str :

the string to process.

err :

location to receive error code.

Returns :

the input string, possibly modified in place.

gretl_strdup ()

char *              gretl_strdup                        (const char *src);

src :

the string to duplicate.

Returns :

an allocated copy of src, or NULL on error.

gretl_strndup ()

char *              gretl_strndup                       (const char *src,
                                                         size_t n);

src :

the string to be copied.

n :

the maximum number of characters to copy.

Returns :

an allocated copy of at most n characters from src, or NULL on error.

gretl_strdup_printf ()

char *              gretl_strdup_printf                 (const char *format,
                                                         ...);

Print the arguments according to format.

format :

as in printf().

Returns :

allocated result of the printing, or NULL on failure.

gretl_word_strdup ()

char *              gretl_word_strdup                   (const char *src,
                                                         const char **ptr,
                                                         gretlopt opt,
                                                         int *err);

Copies the first 'word' found in src, where a word is defined as consisting of alphanumeric characters and the underscore. If ptr is not NULL, on exit it points at the next position in src after the copied word.

src :

the source string.

ptr :

location to receive end of word pointer, or NULL.

opt :

can include OPT_S for "strict" operation: in this case an error is flagged if src contains any characters other than 'word' characters (see below), comma and space.

err :

location to receive error code.

Returns :

the allocated word or NULL in case no word is found, or on error.

gretl_quoted_string_strdup ()

char *              gretl_quoted_string_strdup          (const char *s,
                                                         const char **ptr);

If s starts with a quote (double or single), return a copy of the portion of s that is enclosed in quotes. That is, from s + 1 up to but not including the next matching quote. If ptr is not NULL, on output it receives a pointer to the next byte in s after the closing quote.

s :

the source string.

ptr :

location to receive end pointer, or NULL.

Returns :

the allocated string or NULL on failure.

gretl_string_split ()

char **             gretl_string_split                  (const char *s,
                                                         int *n,
                                                         const char *sep);

Parses s into a set of zero or more substrings and creates an array of those substrings. On sucessful exit n holds the number of substrings.

s :

the source string.

n :

location to receive the number of substrings.

sep :

string containing the character(s) to count as field separators, or NULL. If sep is NULL only the space character counts.

Returns :

the allocated array or NULL in case of failure.

gretl_string_split_quoted ()

char **             gretl_string_split_quoted           (const char *s,
                                                         int *n,
                                                         const char *sep,
                                                         int *err);

Similar to gretl_string_split(), except that this variant allows for the presence of double-quoted substrings which may contain spaces. The quotes are removed in the members of the returned array.

s :

the source string.

n :

location to receive the number of substrings.

sep :

string containing the character(s) to count as field separators, or NULL. If sep is NULL only space, tab and newline count.

err :

location to receive error code.

Returns :

allocated array of substrings or NULL in case of failure.

gretl_str_expand ()

char *              gretl_str_expand                    (char **orig,
                                                         const char *add,
                                                         const char *sep);

Creates a newly allocated string built by concatenating orig and add, with sep interpolated unless sep is NULL, and replaces the content of orig with the new string. As a special case, if orig is NULL, or if the content of orig is NULL, we just duplicate add.

orig :

pointer to the base string.

add :

the string to be added.

sep :

string to be interpolated, or NULL.

Returns :

the reallocated string, or NULL on failure. In case of failure the content of orig is freed, if orig is not NULL, to avoid memory leakage.

gretl_charsub ()

char *              gretl_charsub                       (char *str,
                                                         char find,
                                                         char repl);

Replaces all occurrences of find with repl in str.

str :

the string to operate on.

find :

the character to replace.

repl :

the replacement character.

Returns :

the (possibly modified) string.

comma_separate_numbers ()

char *              comma_separate_numbers              (char *s);

Given a string which contains two or more numbers separated by spaces and/or commas, revise the string to ensure that all the numbers are comma-separated.

s :

the string to operate on.

Returns :

the (possibly modified) string.

shift_string_left ()

char *              shift_string_left                   (char *str,
                                                         size_t move);

Shifts the content of str left by move places, dropping leading bytes as needed.

str :

the string to process.

move :

the number of places to shift.

Returns :

the modified string.

gretl_lower ()

char *              gretl_lower                         (char *str);

Converts any upper case characters in str to lower case.

str :

the string to transform.

Returns :

the possibly modified string.

gretl_strstrip ()

char *              gretl_strstrip                      (char *str);

Removes leading and trailing white space from a string.

str :

the string to process.

Returns :

the possibly modified string.

gretl_strstrip_copy ()

char *              gretl_strstrip_copy                 (const char *str,
                                                         int *err);

str :

the string to process.

Returns :

a copy of str, from which both leading and trailing white space have been removed.

switch_ext ()

char *              switch_ext                          (char *targ,
                                                         const char *src,
                                                         const char *ext);

For processing filenames: copies src to targ, minus any existing filename extension, and adds to targ the specified extension.

targ :

the target or output string (must be pre-allocated).

src :

the source or input string.

ext :

the extension or suffix to attach.

Returns :

the output string, targ.

switch_ext_new ()

char *              switch_ext_new                      (const char *src,
                                                         const char *ext);

For processing filenames: creates a copy of src in which any existing dot-extension is removed and ext is appended (with a dot automatically inserted).

src :

the original string.

ext :

the extension or suffix to attach (without leading '.').

Returns :

the newly allocated string.

equation_get_lhs_and_rhs ()

int                 equation_get_lhs_and_rhs            (const char *s,
                                                         char **plh,
                                                         char **prh);

Given a string s, parse it into a left-hand side and a right-hand side, separated by an equals sign. Return in plh and prh allocated copies of the respective sides, with any leading or trailing white space trimmed.

s :

equation in string form.

plh :

pointer to receive left-hand side expression.

prh :

pointer to receive right-hand side expression.

Returns :

0 on success, 1 on error.

top_n_tail ()

int                 top_n_tail                          (char *str,
                                                         size_t maxlen,
                                                         int *err);

Drop leading space and trailing space and newline from string, then replace a trailing backslash (if any) with a space. If str does not end with a newline within the limit set by maxlen, and err is not NULL, then E_TOOLONG is written to err.

str :

the string to process.

maxlen :

maximum length of string, including NUL termination.

err :

location to receive error code, or NULL.

Returns :

1 if a trailing backslash, comma or left parenthesis was found, otherwise 0.

tailstrip ()

char *              tailstrip                           (char *str);

Drop trailing space (and newline if any) from string.

str :

the string to process.

Returns :

the modified string.

compress_spaces ()

char *              compress_spaces                     (char *s);

Reduce multiple contiguous space characters to single spaces within s.

s :

the string to process.

Returns :

the compressed string.

space_to_score ()

char *              space_to_score                      (char *s);

Replace any spaces with underscores in s.

s :

the string to process.

Returns :

the (possibly) modified string.

strings_array_new ()

char **             strings_array_new                   (int nstrs);

Allocates storage for nstrs strings and initalizes all to NULL.

nstrs :

number of strings in array.

Returns :

the allocated array, or NULL on failure.

strings_array_realloc_with_length ()

char **             strings_array_realloc_with_length   (char ***pS,
                                                         int oldn,
                                                         int newn,
                                                         int len);

Adjusts the storage in pS to a size of newn strings, each of them len bytes long. The first byte of any additional strings is initialized to 0. This function may be used either to expand or to shrink an existing array of strings.

pS :

existing array to reallocate.

oldn :

original number of strings in the array.

newn :

new number of strings in array.

len :

number of bytes per string.

Returns :

the new array, or NULL on failure.

strings_array_add ()

int                 strings_array_add                   (char ***pS,
                                                         int *n,
                                                         const char *p);

Allocates storage for an extra member of S and adds a copy of string p in the last position. On success, the content of n is incremented by 1.

pS :

pointer to strings array.

n :

location of present number of strings in array.

p :

string to add to array.

Returns :

0 on success, E_ALLOC on failure.

strings_array_new_with_length ()

char **             strings_array_new_with_length       (int nstrs,
                                                         int len);

Allocates storage for nstrs strings, each of them len bytes long. The first byte of each string is initialized to 0.

nstrs :

number of strings in array.

len :

number of bytes per string.

Returns :

the allocated array, or NULL on failure.

strings_array_dup ()

char **             strings_array_dup                   (char **strs,
                                                         int n);

strs :

array of strings to be copied.

n :

number of strings in array.

Returns :

an allocated copy of strs, or NULL on failure.

strings_array_sort ()

int                 strings_array_sort                  (char ***pS,
                                                         int *n,
                                                         gretlopt opt);

Sorts an array of strings in ascending lexicographical order. If OPT_U is given, n holds the number of unique strings on exit. It is assumed that storage for the strings array was obtained via strings_array_new() or a similar libgretl function.

pS :

location of array of strings.

n :

location of the number of strings in the array.

opt :

may contain OPT_U to trim the sorted array so that it contains only unique entries.

Returns :

0 on success, non-zero on error.

strings_array_cmp ()

int                 strings_array_cmp                   (char **strs1,
                                                         char **strs2,
                                                         int n);

Compares for equality two arrays of strings, each of which must contain at least n elements. Equality of the arrays means that strcmp returns 0 for each pair of strings strs1[i], strs2[i], for i equals 0 to n - 1.

strs1 :

first array of strings.

strs2 :

second array of strings.

n :

number of strings to examine.

Returns :

0 if the arrays compare equal, non-zero otherwise.

strings_array_free ()

void                strings_array_free                  (char **strs,
                                                         int nstrs);

Frees each allocated string in strs, then frees strs itself. Checks that strs is not NULL before proceeding.

strs :

array of allocated strings.

nstrs :

number of strings in array.

get_obs_string ()

char *              get_obs_string                      (char *obs,
                                                         int t,
                                                         const DATASET *dset);

obs :

char array big enough to hold the observation (OBSLEN).

t :

zero-based observation number.

dset :

pointer to dataset information.

Returns :

the observation string corresponding to t.

obs_str_to_double ()

double              obs_str_to_double                   (const char *obs);

obs :

string representation of observation number.

Returns :

the floating-point counterpart of obs, or NADBL on invalid input.

colonize_obs ()

char *              colonize_obs                        (char *obs);

Converts a decimal point in obs to a colon.

obs :

string representation of observation number.

Returns :

the (possibly) modified obs string.

modify_date_for_csv ()

void                modify_date_for_csv                 (char *s,
                                                         int pd);


print_time ()

char *              print_time                          (char *s);

s :

string into which to print: must be at least 48 bytes.

Returns :

s, which will contain a locale-dependent representation of the current time. In English, this will be in the format Y/m/d H:M.

gretl_xml_validate ()

int                 gretl_xml_validate                  (const char *s);

s :

string to be tested.

Returns :

1 if s is acceptable for insertion into an XML file as is, 0 if it contains special characters that need to be escaped. See also gretl_xml_encode().

gretl_xml_encode ()

char *              gretl_xml_encode                    (const char *str);

str :

NUL-terminated source string.

Returns :

an allocated re-write of str, with characters that are special in XML encoded as character entities. See also gretl_xml_validate().

gretl_xml_encode_to_buf ()

int                 gretl_xml_encode_to_buf             (char *targ,
                                                         const char *src,
                                                         int n);

Writes into targ a version of src in which characters that are special in XML are encoded as character entities. See also gretl_xml_encode() for the case where the encoding of src is of unknown size at compile time.

targ :

target buffer.

src :

NUL-terminated source string.

n :

size of targ in bytes.

Returns :

0 on success or 1 on error. An error occurs if (a) the encoded version of src is longer than n bytes (allowing for NUL termination), or (b) src does not validate as UTF-8. On error the conversion is not done.

unescape_url ()

void                unescape_url                        (char *url);

url :

string representing a URL.

make_varname_unique ()

char *              make_varname_unique                 (char *vname,
                                                         int v,
                                                         DATASET *dset);

Given a tentative name for a new variable, check that it is not a duplicate of an existing varname. If it is, modify the new name so that it becomes unique. The ID number v is required so that, if the variable has already been added to the dataset, its name does not appear to conflict with itself! If the name to be tested is not associated with an existing variable, pass 0 for v.

vname :

tentative name for variable.

v :

the ID number for the new variable.

dset :

dataset information.

Returns :

the (possibly modified) variable name.

fix_varname_duplicates ()

int                 fix_varname_duplicates              (DATASET *dset);


append_dir ()

char *              append_dir                          (char *fname,
                                                         const char *dir);


build_path ()

char *              build_path                          (char *targ,
                                                         const char *dirname,
                                                         const char *fname,
                                                         const char *ext);

Writes to targ a full path composed of dirname, fname and (optionally) ext. This function ensures that an appropriate separator is inserted between dirname and fname, if dirname is not already terminated with such a separator.

targ :

target string to write to (must be pre-allocated).

dirname :

first part of path.

fname :

filename.

ext :

filename extension to be appended (or NULL).

Returns :

the target string, targ.

path_last_element ()

const char *        path_last_element                   (const char *path);

path :

path to work on.

Returns :

a pointer to the last element of path, that is, the element following the last path separator character, if any. If path does not contain a separator, path itself is returned. Note that the return value may be the empty string, if path ends with a separator.

trim_slash ()

char *              trim_slash                          (char *s);

If s ends with SLASH, remove this character.

s :

string to work on.

Returns :

the (possibly) modified string.

gretl_string_ends_with ()

int                 gretl_string_ends_with              (const char *s,
                                                         const char *test);

s :

string to examine.

test :

string to test for.

Returns :

1 if s ends with test, else 0.

get_column_widths ()

void                get_column_widths                   (const char **strs,
                                                         int *widths,
                                                         int n);

If need be, increases the column widths in widths to accomodate the current translations of strs.

strs :

array of n strings.

widths :

array of n default column widths.

n :

number of columns.

gretl_utf8_strncat ()

char *              gretl_utf8_strncat                  (char *dest,
                                                         const char *src,
                                                         size_t n);

Works just like strncat(), except that it ensures that we don't end up with an incomplete UTF-8 character preceding the terminating NUL byte.

dest :

destination string.

src :

source string.

n :

maximum number of bytes to append.

Returns :

the destination string.

gretl_utf8_strncat_trim ()

char *              gretl_utf8_strncat_trim             (char *dest,
                                                         const char *src,
                                                         size_t n);

The same as gretl_utf8_strncat(), except that any leading and/or trailing white space is trimmed from dest.

dest :

destination string.

src :

source string.

n :

maximum number of bytes to append.

Returns :

the destination string.

gretl_scan_varname ()

int                 gretl_scan_varname                  (const char *src,
                                                         char *targ);

Performs sscanf() on src, using a conversion specifier which allows for writing up to VNAMELEN-1 bytes into targ. The latter must therefore be at least VNAMELEN bytes long.

src :

source string.

targ :

target string.

Returns :

the return value from sscanf().

gretl_regexp_replace ()

char *              gretl_regexp_replace                (const char *orig,
                                                         const char *match,
                                                         const char *repl,
                                                         int *err);

Builds a string based on orig but in which all occurrences of match (which is interpreted as a regular expression of the Perl type) are replaced by means of repl (also interpreted as a regular expression).

orig :

the original string.

match :

the pattern to match.

repl :

the replacement expression for match.

err :

location to receive error code.

Returns :

newly allocated string or NULL on failure.

gretl_literal_replace ()

char *              gretl_literal_replace               (const char *orig,
                                                         const char *match,
                                                         const char *repl,
                                                         int *err);

Builds a string based on orig but in which all occurrences of match (which is interpreted as a straight string literal) are replaced by repl (also a straight string literal).

orig :

the original string.

match :

the substring to match.

repl :

the replacement string for match.

err :

location to receive error code.

Returns :

newly allocated string or NULL on failure.