Strings

Strings — miscellaneous string-handling utilities

Synopsis

#include <libgretl.h>

#define             SLASH
#define             SLASHSTR
#define             CTRLZ
int                 string_is_blank                     (const char *s);
int                 has_suffix                          (const char *str,
                                                         const char *sfx);
int                 numeric_string                      (const char *str);
int                 integer_string                      (const char *str);
int                 count_fields                        (const char *s);
double              dot_atof                            (const char *s);
void                set_atof_point                      (char c);
int                 dotpos                              (const char *str);
int                 slashpos                            (const char *str);
char *              delchar                             (int c,
                                                         char *str);
int                 charpos                             (char c,
                                                         const char *s);
int                 lastchar                            (char c,
                                                         const char *s);
int                 ends_with_backslash                 (const char *s);
int                 gretl_namechar_spn                  (const char *s);
char *              gretl_trunc                         (char *str,
                                                         size_t n);
char *              gretl_delete                        (char *str,
                                                         int idx,
                                                         int count);
char *              gretl_unquote                       (char *str,
                                                         int *err);
char *              gretl_strdup                        (const char *src);
char *              gretl_strndup                       (const char *src,
                                                         size_t n);
char *              gretl_strdup_printf                 (const char *format,
                                                         ...);
char *              gretl_word_strdup                   (const char *src,
                                                         const char **ptr);
char *              gretl_quoted_string_strdup          (const char *s,
                                                         const char **ptr);
char **             gretl_string_split                  (const char *s,
                                                         int *n);
char **             gretl_string_split_quoted           (const char *s,
                                                         int *n,
                                                         int *err);
char *              gretl_str_expand                    (char **orig,
                                                         const char *add,
                                                         const char *sep);
char *              charsub                             (char *str,
                                                         char find,
                                                         char repl);
char *              comma_separate_numbers              (char *s);
char *              shift_string_left                   (char *str,
                                                         size_t move);
char *              lower                               (char *str);
void                clear                               (char *str,
                                                         int len);
char *              chopstr                             (char *str);
char *              switch_ext                          (char *targ,
                                                         const char *src,
                                                         const char *ext);
char *              switch_ext_new                      (const char *src,
                                                         const char *ext);
int                 get_base                            (char *targ,
                                                         const char *src,
                                                         char c);
int                 equation_get_lhs_and_rhs            (const char *s,
                                                         char **plh,
                                                         char **prh);
int                 top_n_tail                          (char *str,
                                                         size_t maxlen,
                                                         int *err);
char *              tailstrip                           (char *str);
char *              compress_spaces                     (char *s);
char *              space_to_score                      (char *s);
char *              safecpy                             (char *targ,
                                                         const char *src,
                                                         int n);
char **             strings_array_new                   (int nstrs);
char **             strings_array_realloc_with_length   (char ***pS,
                                                         int oldn,
                                                         int newn,
                                                         int len);
int                 strings_array_add                   (char ***pS,
                                                         int *n,
                                                         const char *p);
char **             strings_array_new_with_length       (int nstrs,
                                                         int len);
char **             strings_array_dup                   (char **strs,
                                                         int n);
int                 strings_array_sort                  (char ***pS,
                                                         int *n,
                                                         gretlopt opt);
int                 strings_array_cmp                   (char **strs1,
                                                         char **strs2,
                                                         int n);
void                free_strings_array                  (char **strs,
                                                         int nstrs);
char *              get_obs_string                      (char *obs,
                                                         int t,
                                                         const DATAINFO *pdinfo);
double              obs_str_to_double                   (const char *obs);
char *              colonize_obs                        (char *obs);
void                modify_date_for_csv                 (char *s,
                                                         int pd);
void                csv_obs_to_prn                      (int t,
                                                         const DATAINFO *pdinfo,
                                                         PRN *prn);
char *              print_time                          (char *s);
int                 gretl_xml_validate                  (const char *s);
char *              gretl_xml_encode                    (const char *str);
int                 gretl_xml_encode_to_buf             (char *targ,
                                                         const char *src,
                                                         int n);
void                unescape_url                        (char *url);
char *              make_varname_unique                 (char *vname,
                                                         int v,
                                                         DATAINFO *pdinfo);
int                 fix_varname_duplicates              (DATAINFO *pdinfo);
char *              append_dir                          (char *fname,
                                                         const char *dir);
char *              build_path                          (char *targ,
                                                         const char *dirname,
                                                         const char *fname,
                                                         const char *ext);
const char *        path_last_element                   (const char *path);
char *              trim_slash                          (char *s);
int                 gretl_string_ends_with              (const char *s,
                                                         const char *test);
void                get_column_widths                   (const char **strs,
                                                         int *widths,
                                                         int n);

Description

Various functions for creating, testing and manipulating strings and arrays of strings.

Details

SLASH

#define SLASH '\\'


SLASHSTR

#define SLASHSTR "\\"


CTRLZ

#define CTRLZ 26


string_is_blank ()

int                 string_is_blank                     (const char *s);

s :

the string to examine.

Returns :

1 if the string is NULL, of length zero, or contains nothing but space characters, otherwise returns 0.

has_suffix ()

int                 has_suffix                          (const char *str,
                                                         const char *sfx);

str :

the string to check.

sfx :

the suffix to check for, including the leading '.'

Returns :

1 if str ends with sfx (on a case-insensitive comparison), 0 otherwise.

numeric_string ()

int                 numeric_string                      (const char *str);

str :

the string to examine.

Returns :

1 if the given str is numeric, otherwise 0.

integer_string ()

int                 integer_string                      (const char *str);

str :

the string to examine.

Returns :

1 if the given str represents an integer, otherwise 0.

count_fields ()

int                 count_fields                        (const char *s);

s :

the string to process.

Returns :

the number of space-separated fields in s.

dot_atof ()

double              dot_atof                            (const char *s);

s :

the string to convert.

Returns :

the double-precision numeric interpretation of s, where the decimal point character is forced to be '.', regardless of the current locale.

set_atof_point ()

void                set_atof_point                      (char c);


dotpos ()

int                 dotpos                              (const char *str);

str :

the string to examine.

Returns :

the integer position of the last "." within str, or strlen(str) in case a dot is not found, or the string ends with a (backward or forward) slash.

slashpos ()

int                 slashpos                            (const char *str);

str :

the string to examine.

Returns :

the integer position of the last SLASH within str, or 0 in case a SLASH is not found.

delchar ()

char *              delchar                             (int c,
                                                         char *str);

Deletes all instances of c within str.

c :

the character to delete.

str :

the string from which to delete c.

Returns :

the possibly modified string.

charpos ()

int                 charpos                             (char c,
                                                         const char *s);

c :

the character to look for.

s :

the string to examine.

Returns :

the first position of c in s, or -1 if c is not found.

lastchar ()

int                 lastchar                            (char c,
                                                         const char *s);

c :

the character to look for.

s :

the string to examine.

Returns :

1 if c is the last character in s, 0 otherwise

ends_with_backslash ()

int                 ends_with_backslash                 (const char *s);

s :

the string to examine.

Returns :

1 if the last non-space character in s is a backslash, otherwise 0.

gretl_namechar_spn ()

int                 gretl_namechar_spn                  (const char *s);

s :

the string to examine.

Returns :

the length of the intial segment of s which consists of characters that are valid in a gretl variable or object name, namely a-z, A-Z, 0-9 and _, starting with a letter.

gretl_trunc ()

char *              gretl_trunc                         (char *str,
                                                         size_t n);

Truncates the given str to the specified length.

str :

the string to truncate.

n :

the desired length of the truncated string.

Returns :

the possibly truncated string.

gretl_delete ()

char *              gretl_delete                        (char *str,
                                                         int idx,
                                                         int count);

Deletes count characters from str, starting at position idx.

str :

the string to process.

idx :

the starting point for deleting characters.

count :

the number of characters to delete.

Returns :

the modified string.

gretl_unquote ()

char *              gretl_unquote                       (char *str,
                                                         int *err);

If str begins with the ASCII double-quote character, checks that the last character is also a double-quote, and in that case trims the quotes from both ends. If the first character is a double quote but the last is not, flags an error. If the string is not quoted at all, returns the original string.

str :

the string to process.

err :

location to receive error code.

Returns :

the input string, possibly modified in place.

gretl_strdup ()

char *              gretl_strdup                        (const char *src);

src :

the string to duplicate.

Returns :

an allocated copy of src, or NULL on error.

gretl_strndup ()

char *              gretl_strndup                       (const char *src,
                                                         size_t n);

src :

the string to be copied.

n :

the maximum number of characters to copy.

Returns :

an allocated copy of at most n characters from src, or NULL on error.

gretl_strdup_printf ()

char *              gretl_strdup_printf                 (const char *format,
                                                         ...);

Print the arguments according to format.

format :

as in printf().

... :

arguments to be printed.

Returns :

allocated result of the printing, or NULL on failure.

gretl_word_strdup ()

char *              gretl_word_strdup                   (const char *src,
                                                         const char **ptr);

Copies the first 'word' found in src, where a word is defined as consisting of alphanumeric characters and the underscore. If ptr is not NULL, on exit it points at the next position in src after the copied word.

src :

the source string.

ptr :

location to receive end of word pointer, or NULL.

Returns :

the allocated word or NULL in case no word is found, or if allocation fails.

gretl_quoted_string_strdup ()

char *              gretl_quoted_string_strdup          (const char *s,
                                                         const char **ptr);

If s starts with a quote (double or single), return a copy of the portion of s that is enclosed in quotes. That is, from s + 1 up to but not including the next matching quote. If ptr is not NULL, on output it receives a pointer to the next byte in s after the closing quote.

s :

the source string.

ptr :

location to receive end pointer, or NULL.

Returns :

the allocated string or NULL on failure.

gretl_string_split ()

char **             gretl_string_split                  (const char *s,
                                                         int *n);

Parses s into a set of zero or more substrings, separated by one or more spaces, and creates an array of those substrings. On sucessful exit, n holds the number of substrings.

s :

the source string.

n :

location to receive the number of substrings.

Returns :

the allocated array or NULL in case of failure.

gretl_string_split_quoted ()

char **             gretl_string_split_quoted           (const char *s,
                                                         int *n,
                                                         int *err);

Similar to gretl_string_split(), except that for this function the sub-strings are assumed to be delimited by ASCII double-quote characters, and may therefore contain embedded spaces. The quotes are removed in the members of the returned array. Note that this function is not fully general in that it doesn't handle escaped double-quotes.

s :

the source string.

n :

location to receive the number of substrings.

err :

location to receive error code.

Returns :

allocated array of substrings or NULL in case of failure.

gretl_str_expand ()

char *              gretl_str_expand                    (char **orig,
                                                         const char *add,
                                                         const char *sep);

Creates a newly allocated string built by concatenating orig and add, with sep interpolated unless sep is NULL, and replaces the content of orig with the new string. As a special case, if orig is NULL, or if the content of orig is NULL, we just duplicate add.

orig :

pointer to the base string.

add :

the string to be added.

sep :

string to be interpolated, or NULL.

Returns :

the reallocated string, or NULL on failure. In case of failure the content of orig is freed, if orig is not NULL, to avoid memory leakage.

charsub ()

char *              charsub                             (char *str,
                                                         char find,
                                                         char repl);

Replaces all occurrences of find with repl in str.

str :

the string to operate on.

find :

the character to replace.

repl :

the replacement character.

Returns :

the (possibly modified) string.

comma_separate_numbers ()

char *              comma_separate_numbers              (char *s);

Given a string which contains two or more numbers separated by spaces and/or commas, revise the string to ensure that all the numbers are comma-separated.

s :

the string to operate on.

Returns :

the (possibly modified) string.

shift_string_left ()

char *              shift_string_left                   (char *str,
                                                         size_t move);

Shifts the content of str left by move places, dropping leading bytes as needed.

str :

the string to process.

move :

the number of places to shift.

Returns :

the modified string.

lower ()

char *              lower                               (char *str);

Converts any upper case characters in str to lower case.

str :

the string to transform.

Returns :

the possibly modified string.

clear ()

void                clear                               (char *str,
                                                         int len);

Sets all bytes in str to 0.

str :

the string to clear.

len :

the length of the string to be cleared.

chopstr ()

char *              chopstr                             (char *str);

Removes both leading and trailing space from a string.

str :

the string to process.

Returns :

the possibly modified string.

switch_ext ()

char *              switch_ext                          (char *targ,
                                                         const char *src,
                                                         const char *ext);

For processing filenames: copies src to targ, minus any existing filename extension, and adds to targ the specified extension.

targ :

the target or output string (must be pre-allocated).

src :

the source or input string.

ext :

the extension or suffix to attach.

Returns :

the output string, targ.

switch_ext_new ()

char *              switch_ext_new                      (const char *src,
                                                         const char *ext);

For processing filenames: creates a copy of src in which any existing dot-extension is removed and ext is appended (with a dot automatically inserted).

src :

the original string.

ext :

the extension or suffix to attach (without leading '.').

Returns :

the newly allocated string.

get_base ()

int                 get_base                            (char *targ,
                                                         const char *src,
                                                         char c);

If c is found in src, puts into targ the portion of src up to and including the last occurrence of c within src.

targ :

the target or output string (must be pre-allocated).

src :

the source or input string.

c :

the "base marker" character.

Returns :

1 if c is found in str, otherwise 0.

equation_get_lhs_and_rhs ()

int                 equation_get_lhs_and_rhs            (const char *s,
                                                         char **plh,
                                                         char **prh);

Given a string s, parse it into a left-hand side and a right-hand side, separated by an equals sign. Return in plh and prh allocated copies of the respective sides, with any leading or trailing white space trimmed.

s :

equation in string form.

plh :

pointer to receive left-hand side expression.

prh :

pointer to receive right-hand side expression.

Returns :

0 on success, 1 on error.

top_n_tail ()

int                 top_n_tail                          (char *str,
                                                         size_t maxlen,
                                                         int *err);

Drop leading space and trailing space and newline from string, then replace a trailing backslash (if any) with a space. If str does not end with a newline within the limit set by maxlen, and err is not NULL, then E_TOOLONG is written to err.

str :

the string to process.

maxlen :

maximum length of string, including NUL termination.

err :

location to receive error code, or NULL.

Returns :

1 if a trailing backslash or comma was found, otherwise 0.

tailstrip ()

char *              tailstrip                           (char *str);

Drop trailing space (and newline if any) from string.

str :

the string to process.

Returns :

the modified string.

compress_spaces ()

char *              compress_spaces                     (char *s);

Reduce multiple contiguous space characters to single spaces within s.

s :

the string to process.

Returns :

the compressed string.

space_to_score ()

char *              space_to_score                      (char *s);

Replace any spaces with underscores in s.

s :

the string to process.

Returns :

the (possibly) modified string.

safecpy ()

char *              safecpy                             (char *targ,
                                                         const char *src,
                                                         int n);

Copies at most n characters from src to targ, and ensures that targ[n] is a NUL byte.

targ :

target or output string (must be pre-allocated).

src :

source or input string.

n :

maximum length of target string.

Returns :

the output string.

strings_array_new ()

char **             strings_array_new                   (int nstrs);

Allocates storage for nstrs strings and initalizes all to NULL.

nstrs :

number of strings in array.

Returns :

the allocated array, or NULL on failure.

strings_array_realloc_with_length ()

char **             strings_array_realloc_with_length   (char ***pS,
                                                         int oldn,
                                                         int newn,
                                                         int len);

Adjusts the storage in pS to a size of newn strings, each of them len bytes long. The first byte of any additional strings is initialized to 0. This function may be used either to expand or to shrink an existing array of strings.

pS :

existing array to reallocate.

oldn :

original number of strings in the array.

newn :

new number of strings in array.

len :

number of bytes per string.

Returns :

the new array, or NULL on failure.

strings_array_add ()

int                 strings_array_add                   (char ***pS,
                                                         int *n,
                                                         const char *p);

Allocates storage for an extra member of S and adds a copy of string p in the last position. On success, the content of n is incremented by 1.

pS :

pointer to strings array.

n :

location of present number of strings in array.

p :

string to add to array.

Returns :

0 on success, E_ALLOC on failure.

strings_array_new_with_length ()

char **             strings_array_new_with_length       (int nstrs,
                                                         int len);

Allocates storage for nstrs strings, each of them len bytes long. The first byte of each string is initialized to 0.

nstrs :

number of strings in array.

len :

number of bytes per string.

Returns :

the allocated array, or NULL on failure.

strings_array_dup ()

char **             strings_array_dup                   (char **strs,
                                                         int n);

strs :

array of strings to be copied.

n :

number of strings in array.

Returns :

an allocated copy of strs, or NULL on failure.

strings_array_sort ()

int                 strings_array_sort                  (char ***pS,
                                                         int *n,
                                                         gretlopt opt);

Sorts an array of strings in ascending lexicographical order. If OPT_U is given, n holds the number of unique strings on exit. It is assumed that storage for the strings array was obtained via strings_array_new() or a similar libgretl function.

pS :

location of array of strings.

n :

location of the number of strings in the array.

opt :

may contain OPT_U to trim the sorted array so that it contains only unique entries.

strings_array_cmp ()

int                 strings_array_cmp                   (char **strs1,
                                                         char **strs2,
                                                         int n);

Compares for equality two arrays of strings, each of which must contain at least n elements. Equality of the arrays means that strcmp returns 0 for each pair of strings strs1[i], strs2[i], for i equals 0 to n - 1.

strs1 :

first array of strings.

strs2 :

second array of strings.

n :

number of strings to examine.

Returns :

0 if the arrays compare equal, non-zero otherwise.

free_strings_array ()

void                free_strings_array                  (char **strs,
                                                         int nstrs);

Frees each allocated string in strs, then frees strs itself. Checks that strs is not NULL before proceeding.

strs :

array of allocated strings.

nstrs :

number of strings in array.

get_obs_string ()

char *              get_obs_string                      (char *obs,
                                                         int t,
                                                         const DATAINFO *pdinfo);

obs :

char array big enough to hold the observation (OBSLEN).

t :

zero-based observation number.

pdinfo :

pointer to dataset information.

Returns :

the observation string corresponding to t.

obs_str_to_double ()

double              obs_str_to_double                   (const char *obs);

obs :

string representation of observation number.

Returns :

the floating-point counterpart of obs.

colonize_obs ()

char *              colonize_obs                        (char *obs);

Converts a decimal point in obs to a colon.

obs :

string representation of observation number.

Returns :

the (possibly) modified obs string.

modify_date_for_csv ()

void                modify_date_for_csv                 (char *s,
                                                         int pd);


csv_obs_to_prn ()

void                csv_obs_to_prn                      (int t,
                                                         const DATAINFO *pdinfo,
                                                         PRN *prn);


print_time ()

char *              print_time                          (char *s);

s :

string into which to print: must be at least 48 bytes.

Returns :

s, which will contain a locale-dependent representation of the current time. In English, this will be in the format Y/m/d H:M.

gretl_xml_validate ()

int                 gretl_xml_validate                  (const char *s);

s :

string to be tested.

Returns :

1 if s is acceptable for insertion into an XML file as is, 0 if it contains special characters that need to be escaped. See also gretl_xml_encode().

gretl_xml_encode ()

char *              gretl_xml_encode                    (const char *str);

str :

NUL-terminated source string.

Returns :

an allocated re-write of str, with characters that are special in XML encoded as character entities. See also gretl_xml_validate().

gretl_xml_encode_to_buf ()

int                 gretl_xml_encode_to_buf             (char *targ,
                                                         const char *src,
                                                         int n);

Writes into targ a version of src in which characters that are special in XML are encoded as character entities. See also gretl_xml_encode() for the case where the encoding of src is of unknown size at compile time.

targ :

target buffer.

src :

NUL-terminated source string.

n :

size of targ in bytes.

Returns :

0 on success or 1 on error. An error occurs if (a) the encoded version of src is longer than n bytes (allowing for NUL termination), or (b) src does not validate as UTF-8. On error the conversion is not done.

unescape_url ()

void                unescape_url                        (char *url);

url :

string representing a URL.

make_varname_unique ()

char *              make_varname_unique                 (char *vname,
                                                         int v,
                                                         DATAINFO *pdinfo);

Given a tentative name for a new variable, check that it is not a duplicate of an existing varname. If it is, modify the new name so that it becomes unique. The ID number v is required so that, if the variable has already been added to the dataset, its name does not appear to conflict with itself! If the name to be tested is not associated with an existing variable, pass 0 for v.

vname :

tentative name for variable.

v :

the ID number for the new variable.

pdinfo :

dataset information.

Returns :

the (possibly modified) variable name.

fix_varname_duplicates ()

int                 fix_varname_duplicates              (DATAINFO *pdinfo);


append_dir ()

char *              append_dir                          (char *fname,
                                                         const char *dir);


build_path ()

char *              build_path                          (char *targ,
                                                         const char *dirname,
                                                         const char *fname,
                                                         const char *ext);

Writes to targ a full path composed of dirname, fname and (optionally) ext. This function ensures that an appropriate separator is inserted between dirname and fname, if dirname is not already terminated with such a separator.

targ :

target string to write to (must be pre-allocated).

dirname :

first part of path.

fname :

filename.

ext :

filename extension to be appended (or NULL).

Returns :

the target string, targ.

path_last_element ()

const char *        path_last_element                   (const char *path);

path :

path to work on.

Returns :

a pointer to the last element of path, that is, the element following the last path separator character, if any. If path does not contain a separator, path itself is returned. Note that the return value may be the empty string, if path ends with a separator.

trim_slash ()

char *              trim_slash                          (char *s);

If s ends with SLASH, remove this character.

s :

string to work on.

Returns :

the (possibly) modified string.

gretl_string_ends_with ()

int                 gretl_string_ends_with              (const char *s,
                                                         const char *test);

s :

string to examine.

test :

string to test for.

Returns :

1 if s ends with test, else 0.

get_column_widths ()

void                get_column_widths                   (const char **strs,
                                                         int *widths,
                                                         int n);

If need be, increases the column widths in widths to accomodate the current translations of strs.

strs :

array of n strings.

widths :

array of n default column widths.

n :

number of columns.