Data support

Data support — data handling (internal)

Synopsis

#include <gretl/libgretl.h>

enum                GretlFileType;
enum                DataClearCode;
enum                GretlVarnameError;
#define             SPREADSHEET_IMPORT                  (f)
#define             OTHER_IMPORT                        (f)
#define             free_datainfo                       (p)
#define             DBNA
#define             GRETL_SCALAR_DIGITS
int                 dateton                             (const char *date,
                                                         const DATAINFO *pdinfo);
int                 merge_dateton                       (const char *date,
                                                         const DATAINFO *pdinfo);
char *              ntodate                             (char *datestr,
                                                         int t,
                                                         const DATAINFO *pdinfo);
int                 get_subperiod                       (int t,
                                                         const DATAINFO *pdinfo,
                                                         int *err);
int                 get_info                            (const char *hdrfile,
                                                         PRN *prn);
int                 get_precision                       (const double *x,
                                                         int n,
                                                         int placemax);
double              get_date_x                          (int pd,
                                                         const char *obs);
int                 write_data                          (const char *fname,
                                                         int *list,
                                                         const double **Z,
                                                         const DATAINFO *pdinfo,
                                                         gretlopt opt,
                                                         int progress);
int                 is_gzipped                          (const char *fname);
int                 gretl_is_pkzip_file                 (const char *fname);
void                gz_switch_ext                       (char *targ,
                                                         char *src,
                                                         char *ext);
int                 merge_or_replace_data               (double ***pZ0,
                                                         DATAINFO *pdinfo0,
                                                         double ***pZ1,
                                                         DATAINFO **ppdinfo1,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 gretl_get_data                      (char *fname,
                                                         double ***pZ,
                                                         DATAINFO *pdinfo,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 open_nulldata                       (double ***pZ,
                                                         DATAINFO *pdinfo,
                                                         int data_status,
                                                         int length,
                                                         PRN *prn);
int                 import_csv                          (const char *fname,
                                                         double ***pZ,
                                                         DATAINFO *pdinfo,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 import_spreadsheet                  (const char *fname,
                                                         GretlFileType ftype,
                                                         int *list,
                                                         char *sheetname,
                                                         double ***pZ,
                                                         DATAINFO *pdinfo,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 import_other                        (const char *fname,
                                                         GretlFileType ftype,
                                                         double ***pZ,
                                                         DATAINFO *pdinfo,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 add_obs_markers_from_file           (DATAINFO *pdinfo,
                                                         const char *fname);
int                 add_var_labels_from_file            (DATAINFO *pdinfo,
                                                         const char *fname);
int                 save_var_labels_to_file             (const DATAINFO *pdinfo,
                                                         const char *fname);
int                 dataset_has_var_labels              (const DATAINFO *pdinfo);
int                 read_or_write_var_labels            (gretlopt opt,
                                                         DATAINFO *pdinfo,
                                                         PRN *prn);
GretlFileType       detect_filetype                     (char *fname,
                                                         gretlopt opt);
gretlopt            data_save_opt_from_suffix           (const char *fname);
int                 check_varname                       (const char *varname);
int                 check_atof                          (const char *numstr);
int                 check_atoi                          (const char *numstr);
int                 transpose_data                      (double ***pZ,
                                                         DATAINFO *pdinfo);
void                dataset_add_import_info             (DATAINFO *pdinfo,
                                                         const char *fname,
                                                         GretlFileType type);

Description

The following data handling functions are basically internal to gretl and not in a state where they can be readily documented as public APIs.

Details

enum GretlFileType

typedef enum {
    GRETL_NATIVE_DATA,    /* old-style gretl format data file */
    GRETL_XML_DATA,       /* gretl XML data file (.gdt) */
    GRETL_CSV,            /* comma-separated or other plain text data */
    GRETL_OCTAVE,         /* GNU octave ascii data file */
    GRETL_GNUMERIC,       /* gnumeric workbook data */
    GRETL_XLS,            /* MS Excel spreadsheet data */
    GRETL_ODS,            /* Open Document Spreadsheet data */
    GRETL_WF1,            /* Eviews workfile data */
    GRETL_DTA,            /* Stata .dta data */
    GRETL_SAV,            /* SPSS .sav data */
    GRETL_SAS,            /* SAS xport data file */
    GRETL_JMULTI,         /* JMulTi data file */
    GRETL_DATA_MAX,       /* -- place marker -- */
    GRETL_SCRIPT,         /* file containing gretl commands */
    GRETL_SESSION,        /* zipped session file */
    GRETL_NATIVE_DB,      /* gretl database */
    GRETL_NATIVE_DB_WWW,  /* gretl database, accessed via internet */
    GRETL_RATS_DB,        /* RATS 4.0 database */
    GRETL_PCGIVE_DB,      /* PcGive bn7/in7 pair */
    GRETL_ODBC,           /* Open DataBase Connectivity */
    GRETL_UNRECOGNIZED    /* none of the above */
} GretlFileType;


enum DataClearCode

typedef enum {
    CLEAR_FULL,           /* fully clear the dataset */
    CLEAR_SUBSAMPLE       /* dataset is sub-sampled: clear partially */
} DataClearCode;


enum GretlVarnameError

typedef enum {
    VARNAME_RESERVED = 1, /* vername is a gretl reserved name */
    VARNAME_FIRSTCHAR,    /* first character is not alphabetical */
    VARNAME_BADCHAR       /* illegal character in second or subsequent place */
} GretlVarnameError;


SPREADSHEET_IMPORT()

#define             SPREADSHEET_IMPORT(f)


OTHER_IMPORT()

#define             OTHER_IMPORT(f)


free_datainfo()

#define             free_datainfo(p)


DBNA

#define DBNA  -999.0 /* missing value code for gretl databases */


GRETL_SCALAR_DIGITS

#define GRETL_SCALAR_DIGITS 12


dateton ()

int                 dateton                             (const char *date,
                                                         const DATAINFO *pdinfo);

Determines the observation number corresponding to date, relative to pdinfo. It is an error if date represents an observation that lies outside of the full data range specified in pdinfo.

date :

string representation of date for processing.

pdinfo :

pointer to data information struct.

Returns :

zero-based observation number, or -1 on error.

merge_dateton ()

int                 merge_dateton                       (const char *date,
                                                         const DATAINFO *pdinfo);

Works just as dateton(), except that for this function it is not an error if date represents an observation that lies beyond the data range specified in pdinfo. This is inended for use when merging data, or when creating a new dataset.

date :

string representation of date for processing.

pdinfo :

pointer to data information struct.

Returns :

zero-based observation number, or -1 on error.

ntodate ()

char *              ntodate                             (char *datestr,
                                                         int t,
                                                         const DATAINFO *pdinfo);

Prints to datestr (which must be at least OBSLEN bytes) the calendar representation of observation number t.

datestr :

char array to which date is to be printed.

t :

zero-based observation number.

pdinfo :

data information struct.

Returns :

the observation string.

get_subperiod ()

int                 get_subperiod                       (int t,
                                                         const DATAINFO *pdinfo,
                                                         int *err);

For "seasonal" time series data (in a broad sense), determines the sub-period at observation t. The "sub-period" might be a quarter, month, hour or whatever. The value returned is zero-based (e.g. first quarter = 0). If the data are not "seasonal", 0 is returned and if err is non-NULL it receives a non-zero error code.

t :

zero-based observation number.

pdinfo :

data information struct.

err :

location to receive error code, or NULL.

Returns :

the sub-period.

get_info ()

int                 get_info                            (const char *hdrfile,
                                                         PRN *prn);

print to prn the informative comments contained in the given data file (if any).

hdrfile :

name of data header file.

prn :

gretl printing struct.

Returns :

0 on successful completion, non-zero on error or if there are no informative comments.

get_precision ()

int                 get_precision                       (const double *x,
                                                         int n,
                                                         int placemax);

Find the number of decimal places required to represent a given data series uniformly.

x :

data vector.

n :

length of x.

placemax :

maximum number of decimal places to try.

Returns :

the required number of decimal places.

get_date_x ()

double              get_date_x                          (int pd,
                                                         const char *obs);

pd :

frequency of data.

obs :

observation string.

Returns :

the floating-point representation of obs.

write_data ()

int                 write_data                          (const char *fname,
                                                         int *list,
                                                         const double **Z,
                                                         const DATAINFO *pdinfo,
                                                         gretlopt opt,
                                                         int progress);

Write out a data file containing the values of the given set of variables.

fname :

name of file to write.

list :

list of variables to write (or NULL to write all series).

Z :

data matrix.

pdinfo :

data information struct.

opt :

option flag indicating format in which to write the data.

progress :

may be 1 when called from gui to display progress bar in case of a large data write; generally should be 0.

Returns :

0 on successful completion, non-zero on error.

is_gzipped ()

int                 is_gzipped                          (const char *fname);

Determine if the given file is gzipped.

fname :

filename to examine.

Returns :

1 in case of a gzipped file, 0 if not gzipped or inaccessible.

gretl_is_pkzip_file ()

int                 gretl_is_pkzip_file                 (const char *fname);

fname :

name of file to examine.

Returns :

1 if fname is readable and is a PKZIP file, else 0.

gz_switch_ext ()

void                gz_switch_ext                       (char *targ,
                                                         char *src,
                                                         char *ext);

Copy src filename to targ, without the existing suffix (if any), and adding the supplied extension or suffix.

targ :

target or "output" filename (must be pre-allocated).

src :

source or "input" filename.

ext :

suffix to add to filename.

merge_or_replace_data ()

int                 merge_or_replace_data               (double ***pZ0,
                                                         DATAINFO *pdinfo0,
                                                         double ***pZ1,
                                                         DATAINFO **ppdinfo1,
                                                         gretlopt opt,
                                                         PRN *prn);

Given a newly-created dataset, pointed to by pZ1 and ppdinfo1, either attempt to merge it with pZ0, if the original dataset is non-NULL, or replace the content of the original pointers with the new dataset. In case merging is not successful, the new dataset is destroyed.

pZ0 :

pointer to original data set.

pdinfo0 :

original dataset information struct.

pZ1 :

new data set.

ppdinfo1 :

pointer to dataset information associated with pZ1.

opt :

may include OPT_T when appending to a panel dataset, to force a time-series interpretation of the added data.

prn :

print struct to accept messages.

Returns :

0 on successful completion, non-zero otherwise.

gretl_get_data ()

int                 gretl_get_data                      (char *fname,
                                                         double ***pZ,
                                                         DATAINFO *pdinfo,
                                                         gretlopt opt,
                                                         PRN *prn);

Read "native" data from file into gretl's work space, allocating space as required. This function handles both the current gretl XML data format and the traditional data format of gretl's precursor, ESL. It also handles incomplete information: it can perform path-searching on fname, and will try adding the .gdt extension to fname if this is not given.

A more straightforward function for reading a current gretl XML data file (.gdt), given the correct path, is gretl_read_gdt().

The only applicable option is that opt may contain OPT_T when appending data to a panel dataset: in that case we try to interpret the new data as time series, in common across all panel units. In most cases, just give OPT_NONE.

fname :

name of file to try.

pZ :

pointer to data set.

pdinfo :

pointer to data information struct.

opt :

option flags.

prn :

where messages should be written.

Returns :

0 on successful completion, non-zero otherwise.

open_nulldata ()

int                 open_nulldata                       (double ***pZ,
                                                         DATAINFO *pdinfo,
                                                         int data_status,
                                                         int length,
                                                         PRN *prn);

Create an empty "dummy" data set, suitable for simulations.

pZ :

pointer to data set.

pdinfo :

data information struct.

data_status :

indicator for whether a data file is currently open in gretl's work space (1) or not (0).

length :

desired length of data series.

prn :

gretl printing struct.

Returns :

0 on successful completion, non-zero otherwise.

import_csv ()

int                 import_csv                          (const char *fname,
                                                         double ***pZ,
                                                         DATAINFO *pdinfo,
                                                         gretlopt opt,
                                                         PRN *prn);


import_spreadsheet ()

int                 import_spreadsheet                  (const char *fname,
                                                         GretlFileType ftype,
                                                         int *list,
                                                         char *sheetname,
                                                         double ***pZ,
                                                         DATAINFO *pdinfo,
                                                         gretlopt opt,
                                                         PRN *prn);

Open a data file of a type that requires a special plugin. Acceptable values for ftype are GRETL_GNUMERIC, GRETL_XLS and GRETL_ODS.

fname :

name of file.

ftype :

type of data file.

list :

list of parameters for spreadsheet import, or NULL.

sheetname :

name of specific worksheet, or NULL.

pZ :

pointer to data set.

pdinfo :

dataset information.

opt :

option flag; see gretl_get_data().

prn :

gretl printing struct.

Returns :

0 on successful completion, non-zero otherwise.

import_other ()

int                 import_other                        (const char *fname,
                                                         GretlFileType ftype,
                                                         double ***pZ,
                                                         DATAINFO *pdinfo,
                                                         gretlopt opt,
                                                         PRN *prn);

Open a data file of a type that requires a special plugin.

fname :

name of file.

ftype :

type of data file.

pZ :

pointer to data set.

pdinfo :

pointer to data information struct.

opt :

option flag; see gretl_get_data().

prn :

gretl printing struct.

Returns :

0 on successful completion, non-zero otherwise.

add_obs_markers_from_file ()

int                 add_obs_markers_from_file           (DATAINFO *pdinfo,
                                                         const char *fname);

Read case markers (strings of OBSLEN - 1 characters or less that identify the observations) from a file, and associate them with the current data set. The file should contain one marker per line, with a number of lines equal to the number of observations in the current data set.

pdinfo :

data information struct.

fname :

name of file containing case markers.

Returns :

0 on successful completion, non-zero otherwise.

add_var_labels_from_file ()

int                 add_var_labels_from_file            (DATAINFO *pdinfo,
                                                         const char *fname);

Read descriptive variables for labels (strings of MAXLABEL - 1 characters or less) from a file, and associate them with the current data set. The file should contain one label per line, with a number of lines equal to the number of variables in the current data set, excluding the constant.

pdinfo :

data information struct.

fname :

name of file containing labels.

Returns :

0 on successful completion, non-zero otherwise.

save_var_labels_to_file ()

int                 save_var_labels_to_file             (const DATAINFO *pdinfo,
                                                         const char *fname);

Writes to fname the descriptive labels for the series in the current dataset.

pdinfo :

data information struct.

fname :

name of file containing labels.

Returns :

0 on successful completion, non-zero otherwise.

dataset_has_var_labels ()

int                 dataset_has_var_labels              (const DATAINFO *pdinfo);

pdinfo :

data information struct.

Returns :

1 if at least one variable in the current dataset has a descriptive label, otherwise 0.

read_or_write_var_labels ()

int                 read_or_write_var_labels            (gretlopt opt,
                                                         DATAINFO *pdinfo,
                                                         PRN *prn);


detect_filetype ()

GretlFileType       detect_filetype                     (char *fname,
                                                         gretlopt opt);

Attempt to determine the type of a file to be opened in gretl: data file (of various formats), or command script. If OPT_P is given, the fname argument must be an array of length at least MAXLEN.

fname :

name of file to examine.

opt :

include OPT_P to permit path-searching if fname is not an absolute path; in that case the fname argument may be modified, otherwise it will be left unchanged.

Returns :

integer code indicating the type of file.

data_save_opt_from_suffix ()

gretlopt            data_save_opt_from_suffix           (const char *fname);


check_varname ()

int                 check_varname                       (const char *varname);

Check a variable/object name for legality: the name must start with a letter, and be composed of letters, numbers or the underscore character, and nothing else.

varname :

putative name for variable (or object).

Returns :

0 if name is OK, non-zero if not.

check_atof ()

int                 check_atof                          (const char *numstr);

numstr :

string to check.

Returns :

0 if numstr is blank, or is a valid string representation of a floating point number, else 1.

check_atoi ()

int                 check_atoi                          (const char *numstr);

numstr :

string to check.

Returns :

0 if numstr is blank, or is a valid string representation of an int, else 1.

transpose_data ()

int                 transpose_data                      (double ***pZ,
                                                         DATAINFO *pdinfo);

Attempts to transpose the current dataset, so that each variable becomes interpreted as an observation and each observation as a variable.

pZ :

pointer to data array.

pdinfo :

pointer to dataset information struct.

Returns :

0 on success, non-zero error code on error.

dataset_add_import_info ()

void                dataset_add_import_info             (DATAINFO *pdinfo,
                                                         const char *fname,
                                                         GretlFileType type);

On successful import of data from some "foreign" format, add a note to the "descrip" member of the new dataset saying where it came from and when.

pdinfo :

pointer to dataset information struct.

fname :

the name of a file from which data have been imported.

type :

code representing the type of the file identified by fname.