dataset

dataset —

Synopsis




enum        DatasetMarkerType;
enum        VarinfoFlags;
#define     dataset_is_time_series          (p)
#define     dataset_is_seasonal             (p)
#define     custom_time_series              (p)
#define     dataset_is_daily                (p)
#define     dataset_is_weekly               (p)
#define     dataset_is_hourly               (p)
#define     dataset_is_decennial            (p)
#define     dated_daily_data                (p)
#define     dated_seven_day_data            (p)
#define     dated_weekly_data               (p)
#define     calendar_data                   (p)
#define     quarterly_or_monthly            (p)
#define     dataset_is_panel                (p)
#define     var_is_discrete                 (p, i)
#define     var_is_scalar                   (p, i)
#define     var_is_series                   (p, i)
#define     var_is_hidden                   (p, i)
#define     var_is_generated                (p, i)
#define     var_is_const                    (p, i)
#define     set_var_const                   (p, i)
#define     unset_var_const                 (p, i)
void        free_Z                          (double **Z,
                                             DATAINFO *pdinfo);
DATAINFO*   datainfo_new                    (void);
DATAINFO*   create_new_dataset              (double ***pZ,
                                             int nvar,
                                             int nobs,
                                             int markers);
void        destroy_dataset                 (double **Z,
                                             DATAINFO *pdinfo);
void        clear_datainfo                  (DATAINFO *pdinfo,
                                             int code);
int         allocate_Z                      (double ***pZ,
                                             const DATAINFO *pdinfo);
int         dataset_allocate_varnames       (DATAINFO *pdinfo);
int         dataset_allocate_obs_markers    (DATAINFO *pdinfo);
void        dataset_destroy_obs_markers     (DATAINFO *pdinfo);
int         dataset_allocate_panel_info     (DATAINFO *pdinfo);
void        dataset_destroy_panel_info      (DATAINFO *pdinfo);
int         dataset_add_default_panel_indices
                                            (DATAINFO *pdinfo);
int         dataset_finalize_panel_indices  (DATAINFO *pdinfo);
void        dataset_obs_info_default        (DATAINFO *pdinfo);
void        copy_dataset_obs_info           (DATAINFO *targ,
                                             const DATAINFO *src);
void        copy_varinfo                    (VARINFO *targ,
                                             const VARINFO *src);
void        set_sorted_markers              (DATAINFO *pdinfo,
                                             int v,
                                             char **S);
void        dataset_set_regular_markers     (DATAINFO *pdinfo);
int         start_new_Z                     (double ***pZ,
                                             DATAINFO *pdinfo,
                                             int resample);
int         is_trend_variable               (const double *x,
                                             int n);
int         is_periodic_dummy               (const double *x,
                                             const DATAINFO *pdinfo);
int         dataset_add_observations        (int newobs,
                                             double ***pZ,
                                             DATAINFO *pdinfo,
                                             gretlopt opt);
int         dataset_drop_observations       (int n,
                                             double ***pZ,
                                             DATAINFO *pdinfo);
int         dataset_shrink_obs_range        (double ***pZ,
                                             DATAINFO *pdinfo);
int         dataset_add_series              (int newvars,
                                             double ***pZ,
                                             DATAINFO *pdinfo);
int         dataset_add_allocated_series    (double *x,
                                             double ***pZ,
                                             DATAINFO *pdinfo);
int         dataset_add_scalars             (int n,
                                             double ***pZ,
                                             DATAINFO *pdinfo);
int         dataset_add_scalar              (double ***pZ,
                                             DATAINFO *pdinfo);
int         dataset_add_scalar_as           (double x,
                                             const char *newname,
                                             double ***pZ,
                                             DATAINFO *pdinfo);
int         dataset_add_series_as           (double *x,
                                             const char *newname,
                                             double ***pZ,
                                             DATAINFO *pdinfo);
int         dataset_copy_variable_as        (int v,
                                             const char *newname,
                                             double ***pZ,
                                             DATAINFO *pdinfo);
int         overwrite_err                   (const DATAINFO *pdinfo,
                                             int v);
int         dataset_drop_listed_variables   (int *list,
                                             double ***pZ,
                                             DATAINFO *pdinfo,
                                             int *renumber);
int         dataset_drop_variable           (int v,
                                             double ***pZ,
                                             DATAINFO *pdinfo);
int         dataset_destroy_hidden_variables
                                            (double ***pZ,
                                             DATAINFO *pdinfo,
                                             int vmin);
int         dataset_drop_last_variables     (int delvars,
                                             double ***pZ,
                                             DATAINFO *pdinfo);
int         dataset_stack_variables         (const char *vname,
                                             const char *line,
                                             double ***pZ,
                                             DATAINFO *pdinfo,
                                             PRN *prn);
int         is_log_variable                 (int i,
                                             const DATAINFO *pdinfo,
                                             char *parent);
void        set_var_discrete                (DATAINFO *pdinfo,
                                             int i,
                                             int s);
void        set_var_scalar                  (DATAINFO *pdinfo,
                                             int i,
                                             int s);
void        set_var_hidden                  (DATAINFO *pdinfo,
                                             int i);
void        var_set_linewidth               (DATAINFO *pdinfo,
                                             int i,
                                             int w);
int         var_get_linewidth               (const DATAINFO *pdinfo,
                                             int i);

Description

Details

enum DatasetMarkerType

typedef enum {
    NO_MARKERS = 0,
    REGULAR_MARKERS,
    DAILY_DATE_STRINGS
} DatasetMarkerType;


enum VarinfoFlags

typedef enum {
    VAR_DISCRETE   = 1 << 0,
    VAR_SCALAR     = 1 << 1,
    VAR_HIDDEN     = 1 << 2,
    VAR_GENERATED  = 1 << 3,
    VAR_SETCONST   = 1 << 4
} VarinfoFlags;


dataset_is_time_series()

#define     dataset_is_time_series(p)

Attempt to determine whether a data set contains time series data (1) or not (0).

p : pointer to data information struct.

dataset_is_seasonal()

#define     dataset_is_seasonal(p)

Attempt to determine whether a data set contains seasonal time series data (1) or not (0).

p : pointer to data information struct.

custom_time_series()

#define custom_time_series(p) ((p)->structure == SPECIAL_TIME_SERIES)

Attempt to determine whether a data set contains time series data with custom (non-standard) frequency (1) or not (0).

p : pointer to data information struct.

dataset_is_daily()

#define     dataset_is_daily(p)

Attempt to determine whether a data set contains daily time series data (1) or not (0).

p : pointer to data information struct.

dataset_is_weekly()

#define     dataset_is_weekly(p)

Attempt to determine whether a data set contains weekly time series data (1) or not (0).

p : pointer to data information struct.

dataset_is_hourly()

#define     dataset_is_hourly(p)

Attempt to determine whether a data set contains hourly time series data (1) or not (0).

p : pointer to data information struct.

dataset_is_decennial()

#define     dataset_is_decennial(p)

Attempt to determine whether a data set contains decennial time series data (1) or not (0).

p : pointer to data information struct.

dated_daily_data()

#define     dated_daily_data(p)

Attempt to determine whether a data set contains dated daily time series data (1) or not (0).

p : pointer to data information struct.

dated_seven_day_data()

#define     dated_seven_day_data(p)

Attempt to determine whether a data set contains dated daily (seven-day) time series data (1) or not (0).

p : pointer to data information struct.

dated_weekly_data()

#define     dated_weekly_data(p)

Attempt to determine whether a data set contains dated weekly time series data (1) or not (0).

p : pointer to data information struct.

calendar_data()

#define     calendar_data(p)

Attempt to determine whether a data set uses calendar dates for observation strings (1) or not (0).

p : pointer to data information struct.

quarterly_or_monthly()

#define     quarterly_or_monthly(p)

Attempt to determine whether a data set is a quarterly or monthly time series (1), or something else (0).

p : pointer to data information struct.

dataset_is_panel()

#define dataset_is_panel(p) ((p)->structure == STACKED_TIME_SERIES)

Attempt to determine whether a data set contains panel data (1) or not (0).

p : pointer to data information struct.

var_is_discrete()

#define var_is_discrete(p, i) ((p)->varinfo[i]->flags & VAR_DISCRETE)

Determine whether a variable should be treated as discrete or not.

p : pointer to data information struct.
i : index number of variable.

var_is_scalar()

#define var_is_scalar(p, i) ((p)->varinfo[i]->flags & VAR_SCALAR)

Determine whether or not a variable is a scalar.

p : pointer to data information struct.
i : index number of variable.

var_is_series()

#define var_is_series(p, i) (!((p)->varinfo[i]->flags & VAR_SCALAR))

Determine whether or not a variable is a series (as opposed to a scalar).

p : pointer to data information struct.
i : index number of variable.

var_is_hidden()

#define var_is_hidden(p, i) ((p)->varinfo[i]->flags & VAR_HIDDEN)

Determine whether or not a variable is hidden.

p : pointer to data information struct.
i : index number of variable.

var_is_generated()

#define var_is_generated(p, i) ((p)->varinfo[i]->flags & VAR_GENERATED)

Determine whether or not a variable was generated using a formula or transformation function.

p : pointer to data information struct.
i : index number of variable.

var_is_const()

#define var_is_const(p, i) (i == 0 || ((p)->varinfo[i]->flags & VAR_SETCONST))

Determine whether or not a variable has been marked as "const".

p : pointer to data information struct.
i : index number of variable.

set_var_const()

#define set_var_const(p, i) ((p)->varinfo[i]->flags |= VAR_SETCONST)

Set the "const" flag on the given variable.

p : pointer to data information struct.
i : index number of variable.

unset_var_const()

#define unset_var_const(p, i) ((p)->varinfo[i]->flags &= ~VAR_SETCONST)

Remove the "const" flag from the given variable.

p : pointer to data information struct.
i : index number of variable.

free_Z ()

void        free_Z                          (double **Z,
                                             DATAINFO *pdinfo);

Does a deep free on the data matrix.

Z : data matrix.
pdinfo : data information struct.

datainfo_new ()

DATAINFO*   datainfo_new                    (void);

Creates a new data information struct pointer from scratch, properly initialized as empty.

Returns : pointer to data information struct, or NULL on error.

create_new_dataset ()

DATAINFO*   create_new_dataset              (double ***pZ,
                                             int nvar,
                                             int nobs,
                                             int markers);

Creates a new data information struct corresponding to a given data matrix.

pZ : pointer to data matrix.
nvar : number of variables.
nobs : number of observations per variable
markers : 1 if there are case markers for the observations, 0 otherwise.
Returns : pointer to data information struct, or NULL on error.

destroy_dataset ()

void        destroy_dataset                 (double **Z,
                                             DATAINFO *pdinfo);

Frees all resources associated with Z and pdinfo.

Z : data array.
pdinfo : dataset information struct.

clear_datainfo ()

void        clear_datainfo                  (DATAINFO *pdinfo,
                                             int code);

Frees the allocated content of a data information struct.

pdinfo : data information struct.
code : either CLEAR_FULL or CLEAR_SUBSAMPLE.

allocate_Z ()

int         allocate_Z                      (double ***pZ,
                                             const DATAINFO *pdinfo);

Allocates the two-dimensional array to which pZ points, based on the v (number of variables) and n (number of observations) members of pdinfo. The variable at position 0 is initialized to all 1s; other variables are initialized to NADBL.

pZ : pointer to data array.
pdinfo : dataset information struct.
Returns : 0 on success, E_ALLOC on error.

dataset_allocate_varnames ()

int         dataset_allocate_varnames       (DATAINFO *pdinfo);

Given a blank pdinfo, which should have been obtained using datainfo_new(), allocate space for the names of variables. The v member of pdinfo (number of variables) must be set before calling this function.

pdinfo : dataset information struct.
Returns : 0 on sucess, E_ALLOC on failure.

dataset_allocate_obs_markers ()

int         dataset_allocate_obs_markers    (DATAINFO *pdinfo);

Allocates space in pdinfo for strings indentifying the observations and initializes all of the markers to empty strings. Note that These strings have a fixed maximum length of OBSLEN - 1.

pdinfo : dataset information struct
Returns : 0 on success, E_ALLOC on error.

dataset_destroy_obs_markers ()

void        dataset_destroy_obs_markers     (DATAINFO *pdinfo);

Frees any allocated observation markers for pdinfo.

pdinfo : data information struct.

dataset_allocate_panel_info ()

int         dataset_allocate_panel_info     (DATAINFO *pdinfo);

Allocates space in pdinfo for two indices representing the unit or group and time-period, respectively, of each observation in a panel data set.

pdinfo : dataset information struct
Returns : 0 on success, E_ALLOC on error.

dataset_destroy_panel_info ()

void        dataset_destroy_panel_info      (DATAINFO *pdinfo);

pdinfo :

dataset_add_default_panel_indices ()

int         dataset_add_default_panel_indices
                                            (DATAINFO *pdinfo);

Adds a pair of indices for panel unit and panel period. The default is that both are zero-based and increase consecutively, per unit and per period respectively. This function assumes a balanced panel.

pdinfo : dataset information struct.
Returns : 0 on success, non-zero code on error.

dataset_finalize_panel_indices ()

int         dataset_finalize_panel_indices  (DATAINFO *pdinfo);

Having already added a pair of indices for panel unit and panel period, check these for consistency and calculate the number of panel units and the minimum and maximum observations per unit. If it turns out there's only one unit, or only one period, in the dataset, then it's not really a panel: we destroy the panel info and return E_PDWRONG.

pdinfo : dataset information struct.
Returns : 0 on success, non-zero code on error.

dataset_obs_info_default ()

void        dataset_obs_info_default        (DATAINFO *pdinfo);

Sets the "date" or observations information in pdinfo to a simple default of cross-sectional data, observations 1 to n, where n is the n element (number of observations) in pdinfo.

pdinfo : dataset information struct.

copy_dataset_obs_info ()

void        copy_dataset_obs_info           (DATAINFO *targ,
                                             const DATAINFO *src);

Sets the "date" or observations information in targ to that found in src.

targ : target dataset information struct.
src : source dataset information struct.

copy_varinfo ()

void        copy_varinfo                    (VARINFO *targ,
                                             const VARINFO *src);

Copies all relevant information from src to targ.

targ : target to which to copy.
src : source to copy from.

set_sorted_markers ()

void        set_sorted_markers              (DATAINFO *pdinfo,
                                             int v,
                                             char **S);

pdinfo :
v :
S :

dataset_set_regular_markers ()

void        dataset_set_regular_markers     (DATAINFO *pdinfo);

pdinfo :

start_new_Z ()

int         start_new_Z                     (double ***pZ,
                                             DATAINFO *pdinfo,
                                             int resample);

Initializes the data matrix pointed to by pZ (adding the constant in position 0) and the data information struct pdinfo.

pZ : pointer to data matrix.
pdinfo : data information struct.
resample : 1 if we're sub-sampling from a full data set, 0 otherwise.
Returns : 0 on successful completion, non-zero on error.

is_trend_variable ()

int         is_trend_variable               (const double *x,
                                             int n);

x : array to examine.
n : number of elements in array.
Returns : 1 if x is a simple linear trend variable, with each observation equal to the preceding observation plus 1, or if x is a quadratic trend starting at 1 for the first observation in the data set, and 0 otherwise.

is_periodic_dummy ()

int         is_periodic_dummy               (const double *x,
                                             const DATAINFO *pdinfo);

x : array to examine.
pdinfo : pointer to dataset information struct.
Returns : 1 if x is a periodic dummy variable, 0 otherwise.

dataset_add_observations ()

int         dataset_add_observations        (int newobs,
                                             double ***pZ,
                                             DATAINFO *pdinfo,
                                             gretlopt opt);

Extends all series in the dataset by the specified number of extra observations. The added values are initialized to the missing value code, NADBL, with the exception of simple deterministic variables when OPT_A is given.

newobs : number of observations to add.
pZ : pointer to data array.
pdinfo : dataset information.
opt : use OPT_A to attempt to recognize and automatically extend simple deterministic variables such as a time trend and periodic dummy variables; use OPT_D to drop any observation markers rather than expanding the set of markers and padding it out with dummy values.
Returns : 0 on success, non-zero code on error.

dataset_drop_observations ()

int         dataset_drop_observations       (int n,
                                             double ***pZ,
                                             DATAINFO *pdinfo);

Deletes n observations from the end of each series in the dataset.

n : number of observations to drop.
pZ : pointer to data array.
pdinfo : dataset information.
Returns : 0 on success, non-zero code on error.

dataset_shrink_obs_range ()

int         dataset_shrink_obs_range        (double ***pZ,
                                             DATAINFO *pdinfo);

Truncates the range of observations in the dataset, based on the current values of the t1 and t2 members of pdinfo.

pZ : pointer to data array.
pdinfo : dataset information.
Returns : 0 on success, non-zero code on error.

dataset_add_series ()

int         dataset_add_series              (int newvars,
                                             double ***pZ,
                                             DATAINFO *pdinfo);

Adds space for the specified number of additional series to the dataset. It is the caller's responsibility to initialize the numerical values of the new series.

newvars : number of series to add.
pZ : pointer to data array.
pdinfo : dataset information.
Returns : 0 on success, E_ALLOC on error.

dataset_add_allocated_series ()

int         dataset_add_allocated_series    (double *x,
                                             double ***pZ,
                                             DATAINFO *pdinfo);

Adds x as an additional series in the dataset. The array x is not copied; it should be treated as belonging to pZ after this operation.

x : one-dimensional data array.
pZ : pointer to data array.
pdinfo : dataset information.
Returns : 0 on success, E_ALLOC on error.

dataset_add_scalars ()

int         dataset_add_scalars             (int n,
                                             double ***pZ,
                                             DATAINFO *pdinfo);

Allocates space for n new scalar members of the dataset. The added variables are initialized to zero.

n : number of scalars to add.
pZ : pointer to data array.
pdinfo : dataset information.
Returns : 0 on success, E_ALLOC on error.

dataset_add_scalar ()

int         dataset_add_scalar              (double ***pZ,
                                             DATAINFO *pdinfo);

Allocates space for a new scalar member of the dataset. The added variable is initialized to zero.

pZ : pointer to data array.
pdinfo : dataset information.
Returns : 0 on success, E_ALLOC on error.

dataset_add_scalar_as ()

int         dataset_add_scalar_as           (double x,
                                             const char *newname,
                                             double ***pZ,
                                             DATAINFO *pdinfo);

Adds to the dataset a new scalar with name newname and value given by x. The new variable is added at one level "deeper" (in terms of function execution) than the current level. This is for use with user-defined functions.

x : scalar value.
newname : name to give the new variable.
pZ : pointer to data array.
pdinfo : dataset information.
Returns : 0 on success, E_ALLOC on error.

dataset_add_series_as ()

int         dataset_add_series_as           (double *x,
                                             const char *newname,
                                             double ***pZ,
                                             DATAINFO *pdinfo);

Adds to the dataset a new series with name newname and values given by x. The new variable is added at one level "deeper" (in terms of function execution) than the current level. This is for use with user-defined functions.

x : array to be added.
newname : name to give the new variable.
pZ : pointer to data array.
pdinfo : dataset information.
Returns : 0 on success, E_ALLOC on error.

dataset_copy_variable_as ()

int         dataset_copy_variable_as        (int v,
                                             const char *newname,
                                             double ***pZ,
                                             DATAINFO *pdinfo);

Makes a copy of variable v under the name newname. The copy exists in a variable namespace one level "deeper" (in terms of function execution) than the variable being copied. This is for use with user-defined functions: a variable supplied to a function as an argument is copied into the function's namespace under the name it was given as a parameter.

v : index number of variable to copy.
newname : name to give the copy.
pZ : pointer to data array.
pdinfo : dataset information.
Returns : 0 on success, E_ALLOC on error.

overwrite_err ()

int         overwrite_err                   (const DATAINFO *pdinfo,
                                             int v);

pdinfo :
v :
Returns :

dataset_drop_listed_variables ()

int         dataset_drop_listed_variables   (int *list,
                                             double ***pZ,
                                             DATAINFO *pdinfo,
                                             int *renumber);

Deletes the variables given in list from the dataset. Remaining variables may have their ID numbers changed as a consequence. If renumber is not NULL, this location receives 1 in case variables have been renumbered, 0 otherwise.

list : list of variable to drop, by ID number.
pZ : pointer to data array.
pdinfo : dataset information.
renumber : location for return of information on whether remaining variables have been renumbered as a result, or NULL.
Returns : 0 on success, E_ALLOC on error.

dataset_drop_variable ()

int         dataset_drop_variable           (int v,
                                             double ***pZ,
                                             DATAINFO *pdinfo);

Deletes variable v from the dataset.

v : ID number of variable to drop.
pZ : pointer to data array.
pdinfo : dataset information.
Returns : 0 on success, E_ALLOC on error.

dataset_destroy_hidden_variables ()

int         dataset_destroy_hidden_variables
                                            (double ***pZ,
                                             DATAINFO *pdinfo,
                                             int vmin);

Deletes from the dataset any "hidden" variables that have been added automatically (for example, auto-generated variables used for the x-axis in graph plotting), and that have ID numbers greater than or equal to vmin. Never deletes the automatically generated constant (ID number 0).

pZ : pointer to data array.
pdinfo : dataset information.
vmin : do not drop variables with ID numbers less than this.
Returns : 0 on success, E_ALLOC on error.

dataset_drop_last_variables ()

int         dataset_drop_last_variables     (int delvars,
                                             double ***pZ,
                                             DATAINFO *pdinfo);

Deletes from the dataset the number delvars of variables that were added most recently (that have the highest ID numbers).

delvars : number of variables to be dropped.
pZ : pointer to data array.
pdinfo : dataset information.
Returns : 0 on success, E_ALLOC on error.

dataset_stack_variables ()

int         dataset_stack_variables         (const char *vname,
                                             const char *line,
                                             double ***pZ,
                                             DATAINFO *pdinfo,
                                             PRN *prn);

Really for internal use. Don't worry about it.

vname : name for new variable, to be produced by stacking.
line : instructions for stacking existing variables.
pZ : pointer to data array.
pdinfo : dataset information.
prn : printing apparatus.
Returns : 0 on success, non-zero code on error.

is_log_variable ()

int         is_log_variable                 (int i,
                                             const DATAINFO *pdinfo,
                                             char *parent);

Tries to determine if the variable with ID number i is the logarithm of some other variable.

i : ID number of variable.
pdinfo : dataset information.
parent : location to which to write the name of the "parent" variable if any.
Returns : 1 if variable i appears to be a log, else 0.

set_var_discrete ()

void        set_var_discrete                (DATAINFO *pdinfo,
                                             int i,
                                             int s);

Mark a variable as being discrete or not.

pdinfo : pointer to data information struct.
i : index number of variable.
s : non-zero to mark variable as discrete, zero to mark as not discrete.

set_var_scalar ()

void        set_var_scalar                  (DATAINFO *pdinfo,
                                             int i,
                                             int s);

Mark a variable as being a scalar or not.

pdinfo : pointer to data information struct.
i : index number of variable.
s : non-zero to mark variable as a scalar, zero to mark as not scalar (i.e. a series).

set_var_hidden ()

void        set_var_hidden                  (DATAINFO *pdinfo,
                                             int i);

Mark a variable as being "hidden" (an automatically generated variable that will not be shown in the main GUI window).

pdinfo : pointer to data information struct.
i : index number of variable.

var_set_linewidth ()

void        var_set_linewidth               (DATAINFO *pdinfo,
                                             int i,
                                             int w);

Set the line width for use when this variable is displayed in a line graph.

pdinfo : pointer to data information struct.
i : index number of variable.
w : with of plot line.

var_get_linewidth ()

int         var_get_linewidth               (const DATAINFO *pdinfo,
                                             int i);

pdinfo : pointer to data information struct.
i : index number of variable.
Returns : the line width set for use when graphing variable i.