Data support

Data support — data handling (internal)

Synopsis

#include <gretl/libgretl.h>

enum                GretlFileType;
enum                DataClearCode;
enum                GretlVarnameError;
#define             SPREADSHEET_IMPORT                  (f)
#define             OTHER_IMPORT                        (f)
#define             free_datainfo                       (p)
#define             DBNA
#define             GRETL_SCALAR_DIGITS
int                 dateton                             (const char *date,
                                                         const DATASET *dset);
int                 merge_dateton                       (const char *date,
                                                         const DATASET *dset);
char *              ntodate                             (char *datestr,
                                                         int t,
                                                         const DATASET *dset);
int                 get_subperiod                       (int t,
                                                         const DATASET *dset,
                                                         int *err);
int                 get_info                            (const char *hdrfile,
                                                         PRN *prn);
int                 get_precision                       (const double *x,
                                                         int n,
                                                         int placemax);
double              get_date_x                          (int pd,
                                                         const char *obs);
void                date_maj_min                        (int t,
                                                         const DATASET *dset,
                                                         int *maj,
                                                         int *min);
int                 write_data                          (const char *fname,
                                                         int *list,
                                                         const DATASET *dset,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 gui_write_data                      (const char *fname,
                                                         int *list,
                                                         const DATASET *dset,
                                                         gretlopt opt);
int                 is_gzipped                          (const char *fname);
int                 gretl_is_pkzip_file                 (const char *fname);
int                 merge_or_replace_data               (DATASET *dset0,
                                                         DATASET **pdset1,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 gretl_get_data                      (char *fname,
                                                         DATASET *dset,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 open_nulldata                       (DATASET *dset,
                                                         int data_status,
                                                         int length,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 import_csv                          (const char *fname,
                                                         DATASET *dset,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 import_spreadsheet                  (const char *fname,
                                                         GretlFileType ftype,
                                                         int *list,
                                                         char *sheetname,
                                                         DATASET *dset,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 import_other                        (const char *fname,
                                                         GretlFileType ftype,
                                                         DATASET *dset,
                                                         gretlopt opt,
                                                         PRN *prn);
int                 add_obs_markers_from_file           (DATASET *dset,
                                                         const char *fname);
int                 add_var_labels_from_file            (DATASET *dset,
                                                         const char *fname);
int                 save_var_labels_to_file             (const DATASET *dset,
                                                         const char *fname);
int                 dataset_has_var_labels              (const DATASET *dset);
int                 read_or_write_var_labels            (gretlopt opt,
                                                         DATASET *dset,
                                                         PRN *prn);
int                 read_or_write_obs_markers           (gretlopt opt,
                                                         DATASET *dset,
                                                         PRN *prn);
GretlFileType       detect_filetype                     (char *fname,
                                                         gretlopt opt);
gretlopt            data_save_opt_from_suffix           (const char *fname);
int                 check_varname                       (const char *varname);
int                 check_atof                          (const char *numstr);
int                 check_atoi                          (const char *numstr);
int                 transpose_data                      (DATASET *dset);
void                dataset_add_import_info             (DATASET *dset,
                                                         const char *fname,
                                                         GretlFileType type);

Description

The following data handling functions are basically internal to gretl and not in a state where they can be readily documented as public APIs.

Details

enum GretlFileType

typedef enum {
    GRETL_NATIVE_DATA,    /* old-style gretl format data file */
    GRETL_XML_DATA,       /* gretl XML data file (.gdt) */
    GRETL_CSV,            /* comma-separated or other plain text data */
    GRETL_OCTAVE,         /* GNU octave ascii data file */
    GRETL_GNUMERIC,       /* gnumeric workbook data */
    GRETL_XLS,            /* MS Excel spreadsheet data */
    GRETL_XLSX,           /* MS Office Open XML spreadsheet data */
    GRETL_ODS,            /* Open Document Spreadsheet data */
    GRETL_WF1,            /* Eviews workfile data */
    GRETL_DTA,            /* Stata .dta data */
    GRETL_SAV,            /* SPSS .sav data */
    GRETL_SAS,            /* SAS xport data file */
    GRETL_JMULTI,         /* JMulTi data file */
    GRETL_DATA_MAX,       /* -- place marker -- */
    GRETL_SCRIPT,         /* file containing gretl commands */
    GRETL_SESSION,        /* zipped session file */
    GRETL_NATIVE_DB,      /* gretl database */
    GRETL_NATIVE_DB_WWW,  /* gretl database, accessed via internet */
    GRETL_RATS_DB,        /* RATS 4.0 database */
    GRETL_PCGIVE_DB,      /* PcGive bn7/in7 pair */
    GRETL_ODBC,           /* Open DataBase Connectivity */
    GRETL_UNRECOGNIZED    /* none of the above */
} GretlFileType;

GRETL_NATIVE_DATA

GRETL_XML_DATA

GRETL_CSV

GRETL_OCTAVE

GRETL_GNUMERIC

GRETL_XLS

GRETL_XLSX

GRETL_ODS

GRETL_WF1

GRETL_DTA

GRETL_SAV

GRETL_SAS

GRETL_JMULTI

GRETL_DATA_MAX

GRETL_SCRIPT

GRETL_SESSION

GRETL_NATIVE_DB

GRETL_NATIVE_DB_WWW

GRETL_RATS_DB

GRETL_PCGIVE_DB

GRETL_ODBC

GRETL_UNRECOGNIZED


enum DataClearCode

typedef enum {
    CLEAR_FULL,           /* fully clear the dataset */
    CLEAR_SUBSAMPLE       /* dataset is sub-sampled: clear partially */
} DataClearCode;

CLEAR_FULL

CLEAR_SUBSAMPLE


enum GretlVarnameError

typedef enum {
    VARNAME_RESERVED = 1, /* vername is a gretl reserved name */
    VARNAME_FIRSTCHAR,    /* first character is not alphabetical */
    VARNAME_BADCHAR       /* illegal character in second or subsequent place */
} GretlVarnameError;

VARNAME_RESERVED

VARNAME_FIRSTCHAR

VARNAME_BADCHAR


SPREADSHEET_IMPORT()

#define             SPREADSHEET_IMPORT(f)


OTHER_IMPORT()

#define             OTHER_IMPORT(f)


free_datainfo()

#define             free_datainfo(p)


DBNA

#define DBNA  -999.0 /* missing value code for gretl databases */


GRETL_SCALAR_DIGITS

#define GRETL_SCALAR_DIGITS 12


dateton ()

int                 dateton                             (const char *date,
                                                         const DATASET *dset);

Determines the observation number corresponding to date, relative to dset. It is an error if date represents an observation that lies outside of the full data range specified in dset.

date :

string representation of date for processing.

dset :

pointer to data information struct.

Returns :

zero-based observation number, or -1 on error.

merge_dateton ()

int                 merge_dateton                       (const char *date,
                                                         const DATASET *dset);

Works just as dateton(), except that for this function it is not an error if date represents an observation that lies beyond the data range specified in dset. This is inended for use when merging data, or when creating a new dataset.

date :

string representation of date for processing.

dset :

pointer to data information struct.

Returns :

zero-based observation number, or -1 on error.

ntodate ()

char *              ntodate                             (char *datestr,
                                                         int t,
                                                         const DATASET *dset);

Prints to datestr (which must be at least OBSLEN bytes) the calendar representation of observation number t.

datestr :

char array to which date is to be printed.

t :

zero-based observation number.

dset :

data information struct.

Returns :

the observation string.

get_subperiod ()

int                 get_subperiod                       (int t,
                                                         const DATASET *dset,
                                                         int *err);

For "seasonal" time series data (in a broad sense), determines the sub-period at observation t. The "sub-period" might be a quarter, month, hour or whatever. The value returned is zero-based (e.g. first quarter = 0). If the data are not "seasonal", 0 is returned and if err is non-NULL it receives a non-zero error code.

t :

zero-based observation number.

dset :

data information struct.

err :

location to receive error code, or NULL.

Returns :

the sub-period.

get_info ()

int                 get_info                            (const char *hdrfile,
                                                         PRN *prn);

print to prn the informative comments contained in the given data file (if any).

hdrfile :

name of data header file.

prn :

gretl printing struct.

Returns :

0 on successful completion, non-zero on error or if there are no informative comments.

get_precision ()

int                 get_precision                       (const double *x,
                                                         int n,
                                                         int placemax);

Find the number of decimal places required to represent a given data series uniformly and accurately, if possible.

x :

data vector.

n :

length of x.

placemax :

the maximum number of decimal places to try.

Returns :

the required number of decimal places or PMAX_NOT_AVAILABLE if it can't be done.

get_date_x ()

double              get_date_x                          (int pd,
                                                         const char *obs);

pd :

frequency of data.

obs :

observation string.

Returns :

the floating-point representation of obs.

date_maj_min ()

void                date_maj_min                        (int t,
                                                         const DATASET *dset,
                                                         int *maj,
                                                         int *min);


write_data ()

int                 write_data                          (const char *fname,
                                                         int *list,
                                                         const DATASET *dset,
                                                         gretlopt opt,
                                                         PRN *prn);

Write out a data file containing the values of the given set of variables.

fname :

name of file to write.

list :

list of variables to write (or NULL to write all series).

dset :

dataset struct.

opt :

option flag indicating format in which to write the data.

prn :

gretl printer or NULL.

Returns :

0 on successful completion, non-zero on error.

gui_write_data ()

int                 gui_write_data                      (const char *fname,
                                                         int *list,
                                                         const DATASET *dset,
                                                         gretlopt opt);


is_gzipped ()

int                 is_gzipped                          (const char *fname);

Determine if the given file is gzipped.

fname :

filename to examine.

Returns :

1 in case of a gzipped file, 0 if not gzipped or inaccessible.

gretl_is_pkzip_file ()

int                 gretl_is_pkzip_file                 (const char *fname);

fname :

name of file to examine.

Returns :

1 if fname is readable and is a PKZIP file, else 0.

merge_or_replace_data ()

int                 merge_or_replace_data               (DATASET *dset0,
                                                         DATASET **pdset1,
                                                         gretlopt opt,
                                                         PRN *prn);

Given a newly-created dataset, pointed to by pdset1, either attempt to merge it with dset0, if the original data array is non-NULL, or replace the content of the original pointer with the new dataset.

In case merging is not successful, the new dataset is destroyed.

dset0 :

original dataset struct.

pdset1 :

new dataset struct.

opt :

may include OPT_T when appending to a panel dataset, to force a time-series interpretation of the added data.

prn :

print struct to accept messages.

Returns :

0 on successful completion, non-zero otherwise.

gretl_get_data ()

int                 gretl_get_data                      (char *fname,
                                                         DATASET *dset,
                                                         gretlopt opt,
                                                         PRN *prn);

Read "native" data from file into gretl's work space, allocating space as required. This function handles both the current gretl XML data format and the traditional data format of gretl's precursor, ESL. It also handles incomplete information: it can perform path-searching on fname, and will try adding the .gdt extension to fname if this is not given.

Note that a more straightforward function for reading a current gretl XML data file (.gdt), given the correct path, is gretl_read_gdt().

The only applicable option is that opt may contain OPT_T when appending data to a panel dataset: in that case we try to interpret the new data as time series, in common across all panel units. In most cases, just give OPT_NONE.

fname :

name of file to try.

dset :

dataset struct.

opt :

option flags.

prn :

where messages should be written.

Returns :

0 on successful completion, non-zero otherwise.

open_nulldata ()

int                 open_nulldata                       (DATASET *dset,
                                                         int data_status,
                                                         int length,
                                                         gretlopt opt,
                                                         PRN *prn);

Create an empty "dummy" data set, suitable for simulations.

dset :

dataset struct.

data_status :

indicator for whether a data file is currently open in gretl's work space (1) or not (0).

length :

desired length of data series.

opt :

may contain OPT_N to suppress addition of an index series.

prn :

gretl printing struct.

Returns :

0 on successful completion, non-zero otherwise.

import_csv ()

int                 import_csv                          (const char *fname,
                                                         DATASET *dset,
                                                         gretlopt opt,
                                                         PRN *prn);


import_spreadsheet ()

int                 import_spreadsheet                  (const char *fname,
                                                         GretlFileType ftype,
                                                         int *list,
                                                         char *sheetname,
                                                         DATASET *dset,
                                                         gretlopt opt,
                                                         PRN *prn);

Open a data file of a type that requires a special plugin. Acceptable values for ftype are GRETL_GNUMERIC, GRETL_XLS, GRETL_XLSX and GRETL_ODS.

fname :

name of file.

ftype :

type of data file.

list :

list of parameters for spreadsheet import, or NULL.

sheetname :

name of specific worksheet, or NULL.

dset :

dataset struct.

opt :

option flag; see gretl_get_data().

prn :

gretl printing struct.

Returns :

0 on successful completion, non-zero otherwise.

import_other ()

int                 import_other                        (const char *fname,
                                                         GretlFileType ftype,
                                                         DATASET *dset,
                                                         gretlopt opt,
                                                         PRN *prn);

Open a data file of a type that requires a special plugin.

fname :

name of file.

ftype :

type of data file.

dset :

pointer to dataset struct.

opt :

option flag; see gretl_get_data().

prn :

gretl printing struct.

Returns :

0 on successful completion, non-zero otherwise.

add_obs_markers_from_file ()

int                 add_obs_markers_from_file           (DATASET *dset,
                                                         const char *fname);

Read case markers (strings of OBSLEN - 1 characters or less that identify the observations) from a file, and associate them with the current data set. The file should contain one marker per line, with a number of lines equal to the number of observations in the current data set.

dset :

data information struct.

fname :

name of file containing case markers.

Returns :

0 on successful completion, non-zero otherwise.

add_var_labels_from_file ()

int                 add_var_labels_from_file            (DATASET *dset,
                                                         const char *fname);

Read descriptive variables for labels (strings of MAXLABEL - 1 characters or less) from a file, and associate them with the current data set. The file should contain one label per line, with a number of lines equal to the number of variables in the current data set, excluding the constant.

dset :

data information struct.

fname :

name of file containing labels.

Returns :

0 on successful completion, non-zero otherwise.

save_var_labels_to_file ()

int                 save_var_labels_to_file             (const DATASET *dset,
                                                         const char *fname);

Writes to fname the descriptive labels for the series in the current dataset.

dset :

data information struct.

fname :

name of file containing labels.

Returns :

0 on successful completion, non-zero otherwise.

dataset_has_var_labels ()

int                 dataset_has_var_labels              (const DATASET *dset);

dset :

data information struct.

Returns :

1 if at least one variable in the current dataset has a descriptive label, otherwise 0.

read_or_write_var_labels ()

int                 read_or_write_var_labels            (gretlopt opt,
                                                         DATASET *dset,
                                                         PRN *prn);


read_or_write_obs_markers ()

int                 read_or_write_obs_markers           (gretlopt opt,
                                                         DATASET *dset,
                                                         PRN *prn);


detect_filetype ()

GretlFileType       detect_filetype                     (char *fname,
                                                         gretlopt opt);

Attempt to determine the type of a file to be opened in gretl: data file (of various formats), or command script. If OPT_P is given, the fname argument must be an array of length at least MAXLEN.

fname :

name of file to examine.

opt :

include OPT_P to permit path-searching if fname is not an absolute path; in that case the fname argument may be modified, otherwise it will be left unchanged.

Returns :

integer code indicating the type of file.

data_save_opt_from_suffix ()

gretlopt            data_save_opt_from_suffix           (const char *fname);


check_varname ()

int                 check_varname                       (const char *varname);

Check a variable/object name for legality: the name must start with a letter, and be composed of letters, numbers or the underscore character, and nothing else.

varname :

putative name for variable (or object).

Returns :

0 if name is OK, non-zero if not.

check_atof ()

int                 check_atof                          (const char *numstr);

numstr :

string to check.

Returns :

0 if numstr is blank, or is a valid string representation of a floating point number, else 1.

check_atoi ()

int                 check_atoi                          (const char *numstr);

numstr :

string to check.

Returns :

0 if numstr is blank, or is a valid string representation of an int, else 1.

transpose_data ()

int                 transpose_data                      (DATASET *dset);

Attempts to transpose the current dataset, so that each variable becomes interpreted as an observation and each observation as a variable.

dset :

pointer to dataset information struct.

Returns :

0 on success, non-zero error code on error.

dataset_add_import_info ()

void                dataset_add_import_info             (DATASET *dset,
                                                         const char *fname,
                                                         GretlFileType type);

On successful import of data from some "foreign" format, add a note to the "descrip" member of the new dataset saying where it came from and when.

dset :

pointer to dataset information struct.

fname :

the name of a file from which data have been imported.

type :

code representing the type of the file identified by fname.