#ifndef lint static char *RCSid() { return RCSid("$Id: datafile.c,v 1.344.2.8 2017/10/09 20:36:01 sfeam Exp $"); } #endif /* GNUPLOT - datafile.c */ /*[ * Copyright 1986 - 1993, 1998, 2004 Thomas Williams, Colin Kelley * * Permission to use, copy, and distribute this software and its * documentation for any purpose with or without fee is hereby granted, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. * * Permission to modify the software is granted, but not the right to * distribute the complete modified source code. Modifications are to * be distributed as patches to the released version. Permission to * distribute binaries produced by compiling modified sources is granted, * provided you * 1. distribute the corresponding source modifications from the * released version in the form of a patch file along with the binaries, * 2. add special version identification to distinguish your version * in addition to the base release version number, * 3. provide your name and address as the primary contact for the * support of your modified version, and * 4. retain our contact information in regard to use of the base * software. * Permission to distribute the released version of the source code along * with corresponding source modifications in the form of a patch file is * granted with same provisions 2 through 4 for binary distributions. * * This software is provided "as is" without express or implied warranty * to the extent permitted by applicable law. ]*/ /* AUTHOR : David Denholm */ /* * this file provides the functions to handle data-file reading.. * takes care of all the pipe / stdin / index / using worries */ /*{{{ notes */ /* * every a:b:c:d:e:f - plot every a'th point from c to e, * in every b lines from d to f * ie for (line=d; line<=f; line+=b) * for (point=c; point >=e; point+=a) * * public variables declared in this file. * int df_no_use_specs - number of columns specified with 'using' * int df_no_tic_specs - count of additional ticlabel columns * int df_line_number - for error reporting * int df_datum - increases with each data point * int df_eof - end of file * * functions * int df_open(char *file_name, int max_using, plot_header *plot) * parses thru / index / using on command line * max_using is max no of 'using' columns allowed (obsolete?) * plot_header is NULL if called from fit or set_palette code * returns number of 'using' cols specified, or -1 on error (?) * * int df_readline(double vector[], int max) * reads a line, does all the 'index' and 'using' manipulation * deposits values into vector[] * returns * number of columns parsed [0 = not a blank line, but no valid data], * DF_EOF - end of file * DF_UNDEFINED - undefined result during eval of extended using spec * DF_MISSING - requested column matched that of 'set missing ' * DF_FIRST_BLANK - first consecutive blank line * DF_SECOND_BLANK - second consecutive blank line * DF_FOUND_KEY_TITLE - only relevant to first line of data * DF_KEY_TITLE_MISSING and only for 'set key autotitle columnhead' * DF_STRINGDATA - not currently used by anyone * DF_COLUMN_HEADERS - first row used as headers rather than data * * if a using spec was given, lines not fulfilling spec are ignored. * we will always return exactly the number of items specified * * if no spec given, we return number of consecutive columns we parsed. * * if we are processing indexes, separated by 'n' blank lines, * we will return n-1 blank lines before noticing the index change * * void df_close() * closes a currently open file. * * void f_dollars(x) * void f_column() actions for expressions using $i, column(j), etc * void f_valid() * * * Line parsing is slightly differently from previous versions of gnuplot... * given a line containing fewer columns than asked for, gnuplot used to make * up values... Now if I have explicitly said 'using 1:2:3', then if * column 3 doesn't exist, I dont want this point... * */ /*}}} */ /* Daniel Sebald: added general binary 2d data support. (20 August 2004) */ #include "datafile.h" #include "datablock.h" #include "alloc.h" #include "axis.h" #include "command.h" #include "eval.h" #include "gp_time.h" #include "graphics.h" #include "misc.h" #include "parse.h" #include "plot.h" #include "readline.h" #include "util.h" #include "breaders.h" #include "tabulate.h" /* For sanity check inblock != outblock */ #include "variable.h" /* For locale handling */ /* test to see if the end of an inline datafile is reached */ #define is_EOF(c) ((c) == 'e' || (c) == 'E') /* is it a comment line? */ #define is_comment(c) ((c) && (strchr(df_commentschars, (c)) != NULL)) /* Used to skip whitespace but not cross a field boundary */ #define NOTSEP (!df_separators || !strchr(df_separators,*s)) /*{{{ static fns */ static int check_missing __PROTO((char *s)); static void expand_df_column __PROTO((int)); static void clear_df_column_headers __PROTO((void)); static char *df_gets __PROTO((void)); static int df_tokenise __PROTO((char *s)); static float *df_read_matrix __PROTO((int *rows, int *columns)); static void plot_option_every __PROTO((void)); static void plot_option_index __PROTO((void)); static void plot_option_using __PROTO((int)); static TBOOLEAN valid_format __PROTO((const char *)); static void plot_ticlabel_using __PROTO((int)); static void add_key_entry __PROTO((char *temp_string, int df_datum)); static char * df_generate_pseudodata __PROTO((void)); static char * df_generate_ascii_array_entry __PROTO((void)); static int df_skip_bytes __PROTO((off_t nbytes)); #ifdef BACKWARDS_COMPATIBLE static void plot_option_thru __PROTO((void)); #endif /*}}} */ /*{{{ variables */ enum COLUMN_TYPE { CT_DEFAULT, CT_STRING, CT_KEYLABEL, CT_XTICLABEL, CT_X2TICLABEL, CT_YTICLABEL, CT_Y2TICLABEL, CT_ZTICLABEL, CT_CBTICLABEL }; /* public variables client might access */ int df_no_use_specs; /* how many using columns were specified */ int df_line_number; int df_datum; /* suggested x value if none given */ int df_last_col = 0; /* visible to user via STATS_columns */ AXIS_INDEX df_axis[MAXDATACOLS]; TBOOLEAN df_matrix = FALSE; /* indicates if data originated from a 2D or 3D format */ void *df_pixeldata; /* pixel data from an external library (e.g. libgd) */ #ifdef BACKWARDS_COMPATIBLE /* jev -- the 'thru' function --- NULL means no dummy vars active */ struct udft_entry ydata_func; #endif /* string representing missing values in ascii datafiles */ char *missing_val = NULL; /* input field separators, NULL if whitespace is the separator */ char *df_separators = NULL; /* comments chars */ char *df_commentschars = 0; /* If any 'inline data' are in use for the current plot, flag this */ TBOOLEAN plotted_data_from_stdin = FALSE; /* Setting this allows the parser to recognize Fortran D or Q */ /* format constants in the input file. But it slows things down */ TBOOLEAN df_fortran_constants = FALSE; /* Setting this disables re-initialization of the floating point exception */ /* handler before every expression evaluation in a using spec. */ TBOOLEAN df_nofpe_trap = FALSE; /* private variables */ /* Bookkeeping for df_fgets() and df_gets(). * Must be initialized before any calls to either function. */ static char *df_line = NULL; static size_t max_line_len = 0; #define DATA_LINE_BUFSIZ 160 static FILE *data_fp = NULL; #if defined(PIPES) static TBOOLEAN df_pipe_open = FALSE; #endif #if defined(HAVE_FDOPEN) static int data_fd = -2; /* only used for file redirection */ #endif static TBOOLEAN mixed_data_fp = FALSE; /* inline data */ char *df_filename = NULL; /* name of data file */ static int df_eof = 0; static int df_no_tic_specs; /* ticlabel columns not counted in df_no_use_specs */ #ifndef MAXINT /* should there be one already defined ? */ # define MAXINT INT_MAX /* from */ #endif /* stuff for implementing index */ static int blank_count = 0; /* how many blank lines recently */ static int df_lower_index = 0; /* first mesh required */ static int df_upper_index = MAXINT; static int df_index_step = 1; /* 'every' for indices */ static int df_current_index; /* current mesh */ /* stuff for named index support */ static char *indexname = NULL; static TBOOLEAN index_found = FALSE; static int df_longest_columnhead = 0; /* stuff for every point:line */ static TBOOLEAN set_every = FALSE; static int everypoint = 1; static int firstpoint = 0; static int lastpoint = MAXINT; static int everyline = 1; static int firstline = 0; static int lastline = MAXINT; static int point_count = -1; /* point counter - preincrement and test 0 */ static int line_count = 0; /* line counter */ /* for ascii file "skip" lines at head of file */ static int df_skip_at_front = 0; /* for pseudo-data (1 if filename = '+'; 2 if filename = '++') */ static int df_pseudodata = 0; static int df_pseudorecord = 0; static int df_pseudospan = 0; static double df_pseudovalue_0 = 0; static double df_pseudovalue_1 = 0; /* for datablocks */ static TBOOLEAN df_datablock = FALSE; static char **df_datablock_line = NULL; /* for arrays */ static int df_array_index = 0; static char *df_arrayname = NULL; /* track dimensions of input matrix/array/image */ static unsigned int df_xpixels; static unsigned int df_ypixels; static TBOOLEAN df_transpose; /* parsing stuff */ struct use_spec_s use_spec[MAXDATACOLS]; static char *df_format = NULL; static char *df_binary_format = NULL; TBOOLEAN evaluate_inside_using = FALSE; TBOOLEAN df_warn_on_missing_columnheader = FALSE; /* rather than three arrays which all grow dynamically, make one * dynamic array of this structure */ typedef struct df_column_struct { double datum; enum DF_STATUS good; char *position; /* points to start of this field in current line */ char *header; /* points to copy of the header for this column */ } df_column_struct; static df_column_struct *df_column = NULL; /* we'll allocate space as needed */ static int df_max_cols = 0; /* space allocated */ static int df_no_cols; /* cols read */ static int fast_columns; /* corey@cac optimization */ char *df_tokens[MAXDATACOLS]; /* filled in by df_tokenise */ static char *df_stringexpression[MAXDATACOLS]; /* filled in after evaluate_at() */ static struct curve_points *df_current_plot; /* used to process histogram labels + key entries */ struct value df_strings[MAXDATACOLS]; /* used only by TABLESTYLE */ static TBOOLEAN df_tabulate_strings = FALSE; /* used only by TABLESTYLE */ /* These control the handling of fields in the first row of a data file. * See also parse_1st_row_as_headers. */ #define NO_COLUMN_HEADER (-99) /* some value that can never be a real column */ static int column_for_key_title = NO_COLUMN_HEADER; static TBOOLEAN df_already_got_headers = FALSE; char *df_key_title = NULL; /* filled in from column header if requested */ /* Binary *read* variables used by df_readbinary(). * There is a confusing difference between the ascii and binary "matrix" keywords. * Ascii matrix data by default is interpreted as having an implicit uniform grid * of x and y coords that are not actually present in the data file. * The equivalent binary data format is called "binary general". * In both of these cases the internal flag df_nonuniform_matrix is FALSE; * Binary matrix data contains explicit y values in the first row, and explicit x * values in the first column. This is signalled by "binary matrix". * In this case the internal flag df_nonuniform_matrix is TRUE. * * EAM May 2011 - Add a keyword "nonuniform matrix" to indicate ascii matrix data * in the same format as "binary matrix", i.e. with explicit x and y coordinates. * EAM Jul 2014 - Add keywords "columnheaders" and "rowheaders" to indicate ascii * matrix data in the uniform grid format containing labels in row 1 and column 1. */ static TBOOLEAN df_read_binary; static TBOOLEAN df_nonuniform_matrix; static TBOOLEAN df_matrix_columnheaders, df_matrix_rowheaders; static int df_plot_mode; static int df_readascii __PROTO((double [], int)); static int df_readbinary __PROTO((double [], int)); static void initialize_use_spec __PROTO((void)); static void initialize_plot_style __PROTO((struct curve_points *)); static void initialize_binary_vars __PROTO((void)); static void df_insert_scanned_use_spec __PROTO((int)); static void adjust_binary_use_spec __PROTO((struct curve_points *)); static void clear_binary_records __PROTO((df_records_type)); static void plot_option_binary_format __PROTO((char *)); static void plot_option_binary __PROTO((TBOOLEAN, TBOOLEAN)); static void plot_option_array __PROTO((void)); static TBOOLEAN rotation_matrix_2D __PROTO((double R[][2], double)); static TBOOLEAN rotation_matrix_3D __PROTO((double P[][3], double *)); static int token2tuple __PROTO((double *, int)); static void df_determine_matrix_info __PROTO((FILE *)); static void df_swap_bytes_by_endianess __PROTO((char *, int, int)); typedef enum df_multivalue_type { DF_DELTA, DF_FLIP_AXIS, DF_FLIP, DF_SCAN, DF_ORIGIN, DF_CENTER, DF_ROTATION, DF_PERPENDICULAR, DF_SKIP } df_multivalue_type; static void plot_option_multivalued __PROTO((df_multivalue_type,int)); char *df_endian[DF_ENDIAN_TYPE_LENGTH] = { "little", "pdp (middle)", "swapped pdp (dimmle)", "big" }; #define SUPPORT_MIDDLE_ENDIAN 1 #if SUPPORT_MIDDLE_ENDIAN /* To generate a swap, take the bit-wise complement of the lowest two bits. */ typedef enum df_byte_read_order_type { DF_0123, DF_1032, DF_2301, DF_3210 } df_byte_read_order_type; /* First argument, this program's endianess. Second argument, file's endianess. * Don't use directly. Use 'byte_read_order()' function instead.*/ static char df_byte_read_order_map[4][4] = { {DF_0123, DF_1032, DF_2301, DF_3210}, {DF_1032, DF_0123, DF_1032, DF_2301}, {DF_2301, DF_1032, DF_0123, DF_1032}, {DF_3210, DF_2301, DF_1032, DF_0123} }; static long long_0x2468 = 0x2468; #define TEST_BIG_PDP ( (((char *)&long_0x2468)[0] < 3) ? DF_BIG_ENDIAN : DF_PDP_ENDIAN ) #define THIS_COMPILER_ENDIAN ( (((char *)&long_0x2468)[0] < 5) ? TEST_BIG_PDP : DF_LITTLE_ENDIAN ) /* Argument is file's endianess type. */ static df_byte_read_order_type byte_read_order __PROTO((df_endianess_type)); /* Logical variables indicating information about data file. */ TBOOLEAN df_binary_file; TBOOLEAN df_matrix_file; static int df_M_count; static int df_N_count; static int df_O_count; /* Initially set to default and then possibly altered by command line. */ df_binary_file_record_struct *df_bin_record = 0; /* Default settings. */ df_binary_file_record_struct *df_bin_record_default = 0; /* Settings that are transferred to default upon reset. */ df_binary_file_record_struct df_bin_record_reset = { {-1, 0, 0}, {1, 1, 1}, {1, 1, 1}, DF_TRANSLATE_DEFAULT, {0, 0, 0}, 0, {0, 0, 1}, {DF_SCAN_POINT, DF_SCAN_LINE, DF_SCAN_PLANE}, FALSE, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, {0, 0, 0}, DF_TRANSLATE_DEFAULT, {0, 0, 0}, NULL /* data_memory */ }; int df_max_num_bin_records = 0, df_num_bin_records, df_bin_record_count; int df_max_num_bin_records_default = 0, df_num_bin_records_default; /* Used to mark the location of a blank line in the original data input file */ struct coordinate blank_data_line = {UNDEFINED, -999, -999, -999, -999, -999, -999, -999}; static void gpbin_filetype_function __PROTO((void)); static void raw_filetype_function __PROTO((void)); static void avs_filetype_function __PROTO((void)); static void (*binary_input_function)(void); /* Will point to one of the above */ static void auto_filetype_function(void){} /* Just a placeholder for auto */ struct gen_ftable df_bin_filetype_table[] = { {"avs", avs_filetype_function}, {"bin", raw_filetype_function}, {"edf", edf_filetype_function}, {"ehf", edf_filetype_function}, {"gif", gif_filetype_function}, {"gpbin", gpbin_filetype_function}, {"jpeg", jpeg_filetype_function}, {"jpg", jpeg_filetype_function}, {"png", png_filetype_function}, {"raw", raw_filetype_function}, {"rgb", raw_filetype_function}, {"auto", auto_filetype_function}, {NULL, NULL} }; #define RAW_FILETYPE 1 /* Initially set to default and then possibly altered by command line. */ static int df_bin_filetype; /* Default setting. */ static int df_bin_filetype_default; static df_endianess_type df_bin_file_endianess_default; /* Setting that is transferred to default upon reset. */ static int df_bin_filetype_reset = -1; #define DF_BIN_FILE_ENDIANESS_RESET THIS_COMPILER_ENDIAN /* This one is needed by breaders.c */ df_endianess_type df_bin_file_endianess; typedef struct df_bin_scan_table_2D_struct { char *string; df_sample_scan_type scan[3]; } df_bin_scan_table_2D_struct; df_bin_scan_table_2D_struct df_bin_scan_table_2D[] = { {"xy", {DF_SCAN_POINT, DF_SCAN_LINE, DF_SCAN_PLANE}}, {"yx", {DF_SCAN_LINE, DF_SCAN_POINT, DF_SCAN_PLANE}}, {"tr", {DF_SCAN_POINT, DF_SCAN_LINE, DF_SCAN_PLANE}}, {"rt", {DF_SCAN_LINE, DF_SCAN_POINT, DF_SCAN_PLANE}} }; #define TRANSPOSE_INDEX 1 typedef struct df_bin_scan_table_3D_struct { char *string; df_sample_scan_type scan[3]; } df_bin_scan_table_3D_struct; df_bin_scan_table_3D_struct df_bin_scan_table_3D[] = { {"xyz", {DF_SCAN_POINT, DF_SCAN_LINE, DF_SCAN_PLANE}}, {"zxy", {DF_SCAN_LINE, DF_SCAN_PLANE, DF_SCAN_POINT}}, {"yzx", {DF_SCAN_PLANE, DF_SCAN_POINT, DF_SCAN_LINE}}, {"yxz", {DF_SCAN_LINE, DF_SCAN_POINT, DF_SCAN_PLANE}}, {"xzy", {DF_SCAN_POINT, DF_SCAN_PLANE, DF_SCAN_LINE}}, {"zyx", {DF_SCAN_PLANE, DF_SCAN_LINE, DF_SCAN_POINT}}, {"trz", {DF_SCAN_POINT, DF_SCAN_LINE, DF_SCAN_PLANE}}, {"ztr", {DF_SCAN_LINE, DF_SCAN_PLANE, DF_SCAN_POINT}}, {"rzt", {DF_SCAN_PLANE, DF_SCAN_POINT, DF_SCAN_LINE}}, {"rtz", {DF_SCAN_LINE, DF_SCAN_POINT, DF_SCAN_PLANE}}, {"tzr", {DF_SCAN_POINT, DF_SCAN_PLANE, DF_SCAN_LINE}}, {"zrt", {DF_SCAN_PLANE, DF_SCAN_LINE, DF_SCAN_POINT}} }; /* Names for machine dependent field sizes. */ char *ch_names[] = {"char","schar","c"}; char *uc_names[] = {"uchar"}; char *sh_names[] = {"short"}; char *us_names[] = {"ushort"}; char *in_names[] = {"int","sint","i","d"}; char *ui_names[] = {"uint","u"}; char *lo_names[] = {"long","ld"}; char *ul_names[] = {"ulong","lu"}; char *fl_names[] = {"float","f"}; char *db_names[] = {"double","lf"}; /* Machine independent names. */ char *byte_names[] = {"int8","byte"}; char *ubyte_names[] = {"uint8","ubyte"}; char *word_names[] = {"int16","word"}; char *uword_names[] = {"uint16","uword"}; char *word2_names[] = {"int32"}; char *uword2_names[] = {"uint32"}; char *word4_names[] = {"int64"}; char *uword4_names[] = {"uint64"}; char *float_names[] = {"float32"}; char *float2_names[] = {"float64"}; typedef struct df_binary_details_struct { char **name; unsigned short no_names; df_binary_type_struct type; } df_binary_details_struct; typedef struct df_binary_tables_struct { df_binary_details_struct *group; unsigned short group_length; } df_binary_tables_struct; df_binary_details_struct df_binary_details[] = { {ch_names,sizeof(ch_names)/sizeof(ch_names[0]),{DF_CHAR,sizeof(char)}}, {uc_names,sizeof(uc_names)/sizeof(uc_names[0]),{DF_UCHAR,sizeof(unsigned char)}}, {sh_names,sizeof(sh_names)/sizeof(sh_names[0]),{DF_SHORT,sizeof(short)}}, {us_names,sizeof(us_names)/sizeof(us_names[0]),{DF_USHORT,sizeof(unsigned short)}}, {in_names,sizeof(in_names)/sizeof(in_names[0]),{DF_INT,sizeof(int)}}, {ui_names,sizeof(ui_names)/sizeof(ui_names[0]),{DF_UINT,sizeof(unsigned int)}}, {lo_names,sizeof(lo_names)/sizeof(lo_names[0]),{DF_LONG,sizeof(long)}}, {ul_names,sizeof(ul_names)/sizeof(ul_names[0]),{DF_ULONG,sizeof(unsigned long)}}, {fl_names,sizeof(fl_names)/sizeof(fl_names[0]),{DF_FLOAT,sizeof(float)}}, {db_names,sizeof(db_names)/sizeof(db_names[0]),{DF_DOUBLE,sizeof(double)}}, {NULL,0, {DF_LONGLONG,sizeof(long long)}}, {NULL,0, {DF_ULONGLONG,sizeof(unsigned long long)}} }; df_binary_details_struct df_binary_details_independent[] = { {byte_names,sizeof(byte_names)/sizeof(byte_names[0]),{SIGNED_TEST(1),1}}, {ubyte_names,sizeof(ubyte_names)/sizeof(ubyte_names[0]),{UNSIGNED_TEST(1),1}}, {word_names,sizeof(word_names)/sizeof(word_names[0]),{SIGNED_TEST(2),2}}, {uword_names,sizeof(uword_names)/sizeof(uword_names[0]),{UNSIGNED_TEST(2),2}}, {word2_names,sizeof(word2_names)/sizeof(word2_names[0]),{SIGNED_TEST(4),4}}, {uword2_names,sizeof(uword2_names)/sizeof(uword2_names[0]),{UNSIGNED_TEST(4),4}}, {word4_names,sizeof(word4_names)/sizeof(word4_names[0]),{SIGNED_TEST(8),8}}, {uword4_names,sizeof(uword4_names)/sizeof(uword4_names[0]),{UNSIGNED_TEST(8),8}}, {float_names,sizeof(float_names)/sizeof(float_names[0]),{FLOAT_TEST(4),4}}, {float2_names,sizeof(float2_names)/sizeof(float2_names[0]),{FLOAT_TEST(8),8}} }; int df_no_bin_cols; /* binary columns to read */ df_binary_tables_struct df_binary_tables[] = { {df_binary_details,sizeof(df_binary_details)/sizeof(df_binary_details[0])}, {df_binary_details_independent,sizeof(df_binary_details_independent)/sizeof(df_binary_details_independent[0])} }; /* Information about binary data structure, to be determined by the * using and format options. This should be one greater than df_no_bin_cols. */ static df_column_bininfo_struct *df_column_bininfo = NULL; /* allocate space as needed */ static int df_max_bininfo_cols = 0; /* space allocated */ static const char *matrix_general_binary_conflict_msg = "Conflict between some matrix binary and general binary keywords"; #endif /*}}} */ /* Initialize input buffer used by df_gets and df_fgets. */ /* Called via reset_command() on program entry. */ void df_init() { if (max_line_len < DATA_LINE_BUFSIZ) { max_line_len = DATA_LINE_BUFSIZ; df_line = gp_alloc(max_line_len, "datafile line buffer"); } } /*{{{ static char *df_gets() */ static char * df_gets() { /* HBB 20000526: prompt user for inline data, if in interactive mode */ if (mixed_data_fp && interactive) fputs("input data ('e' ends) > ", stderr); /* Special pseudofiles '+' and '++' return coords of sample */ if (df_pseudodata) return df_generate_pseudodata(); if (df_datablock) return *(df_datablock_line++); if (df_array) return df_generate_ascii_array_entry(); return df_fgets(data_fp); } /*}}} */ /*{{{ char *df_gets() */ /* * This one is shared by df_gets() and by datablock.c:datablock_command */ char * df_fgets( FILE *fin ) { int len = 0; if (!fgets(df_line, max_line_len, fin)) return NULL; if (mixed_data_fp) ++inline_num; for (;;) { len += strlen(df_line + len); if (len > 0 && df_line[len - 1] == '\n') { /* we have read an entire text-file line. * Strip the trailing linefeed and return */ df_line[len - 1] = 0; return df_line; } if ((max_line_len - len) < 32) df_line = gp_realloc(df_line, max_line_len *= 2, "datafile line buffer"); if (!fgets(df_line + len, max_line_len - len, fin)) return df_line; /* unexpected end of file, but we have something to do */ } /* NOTREACHED */ return NULL; } /*}}} */ /*{{{ static int df_tokenise(s) */ static int df_tokenise(char *s) { /* implement our own sscanf that takes 'missing' into account, * and can understand fortran quad format */ TBOOLEAN in_string; int i; /* "here data" lines may end in \n rather than \0. */ /* DOS/Windows lines may end in \r rather than \0. */ if (s[strlen(s)-1] == '\n' || s[strlen(s)-1] == '\r') s[strlen(s)-1] = '\0'; for (i = 0; i 0) && (use_spec[0].column == dfncp1 || (df_no_use_specs > 1 && (use_spec[1].column == dfncp1 || (df_no_use_specs > 2 && (use_spec[2].column == dfncp1 || (df_no_use_specs > 3 && (use_spec[3].column == dfncp1 || (df_no_use_specs > 4 && (use_spec[4].column == dfncp1 || df_no_use_specs > 5) ) ) ) ) ) ) ) ) ) ) { /* This was the [slow] code used through version 4.0 * count = sscanf(s, "%lf%n", &df_column[df_no_cols].datum, &used); */ /* Use strtod() because * - it is faster than sscanf() * - sscanf(... %n ...) may not be portable * - it allows error checking * - atof() does not return a count or new position */ char *next; df_column[df_no_cols].datum = strtod(s, &next); used = next - s; count = (used) ? 1 : 0; } else { /* skip any space at start of column */ while (isspace((unsigned char) *s) && NOTSEP) ++s; count = (*s && NOTSEP) ? 1 : 0; /* skip chars to end of column */ used = 0; if (df_separators != NULL && in_string) { do ++s; while (*s && *s != '"'); in_string = FALSE; } while (!isspace((unsigned char) *s) && (*s != NUL) && NOTSEP) ++s; } /* it might be a fortran double or quad precision. * 'used' is only safe if count is 1 */ if (df_fortran_constants && count == 1 && (s[used] == 'd' || s[used] == 'D' || s[used] == 'q' || s[used] == 'Q')) { /* HBB 20001221: avoid breaking parsing of time/date * strings like 01Dec2000 that would be caused by * overwriting the 'D' with an 'e'... */ char *endptr; char save_char = s[used]; /* might be fortran double */ s[used] = 'e'; /* and try again */ df_column[df_no_cols].datum = strtod(s, &endptr); count = (endptr == s) ? 0 : 1; s[used] = save_char; } df_column[df_no_cols].good = count == 1 ? DF_GOOD : DF_BAD; if (isnan(df_column[df_no_cols].datum)) { df_column[df_no_cols].good = DF_UNDEFINED; FPRINTF((stderr,"NaN in column %d\n", df_no_cols)); } } ++df_no_cols; /* If we are in a quoted string, skip to end of quote */ if (in_string) { do s++; while (*s && (unsigned char) *s != '"'); } /* skip to 1st character in the next field */ if (df_separators != NULL) { /* skip to next separator or end of line */ while ((*s != '\0') && (*s != '\n') && NOTSEP) ++s; if ((*s == '\0') || (*s == '\n')) /* End of line; we're done */ break; /* step over field separator */ ++s; /* skip whitespace at start of next field */ while ((*s == ' ' || *s == '\t') && NOTSEP) ++s; if ((*s == '\0') || (*s == '\n')) { /* Last field is empty */ df_column[df_no_cols].good = DF_MISSING; df_column[df_no_cols].datum = not_a_number(); ++df_no_cols; break; } } else { /* skip trash chars remaining in this column */ while ((*s != '\0') && (*s != '\n') && !isspace((unsigned char) *s)) ++s; /* skip whitespace to start of next column */ while (isspace((unsigned char) *s) && *s != '\n') ++s; } } return df_no_cols; } /*}}} */ /*{{{ static float *df_read_matrix() */ /* Reads a matrix from a text file and stores it as floats in allocated * memory. * * IMPORTANT NOTE: The routine returns the memory pointer for that matrix, * but does not retain the pointer. Maintenance of the memory is left to * the calling code. */ static float * df_read_matrix(int *rows, int *cols) { int max_rows = 0; int c; float *linearized_matrix = NULL; int bad_data = 0; char *s; int index = 0; *rows = 0; *cols = 0; for (;;) { if (!(s = df_gets())) { df_eof = 1; /* NULL if we have not read anything yet */ return linearized_matrix; } /* skip leading spaces */ while (isspace((unsigned char) *s) && NOTSEP) ++s; /* skip blank lines and comments */ if (!*s || is_comment(*s)) { /* except that some comments hide an index name */ if (indexname) { while (is_comment(*s) || isspace((unsigned char)*s)) ++s; if (*s && !strncmp(s, indexname, strlen(indexname))) index_found = TRUE; } if (linearized_matrix) return linearized_matrix; else continue; } if (mixed_data_fp && is_EOF(*s)) { df_eof = 1; return linearized_matrix; } c = df_tokenise(s); if (!c) return linearized_matrix; /* If the first row of matrix data contains column headers */ if (!df_already_got_headers && df_matrix_columnheaders && *rows == 0) { int i; char *temp_string; df_already_got_headers = TRUE; for (i = (df_matrix_rowheaders ? 1 :0); i < c; i++) { double xpos = df_matrix_rowheaders ? (i-1) : i; if (use_spec[0].at) { struct value a; df_column[0].datum = xpos; df_column[0].good = DF_GOOD; evaluate_inside_using = TRUE; evaluate_at(use_spec[0].at, &a); evaluate_inside_using = FALSE; xpos = real(&a); } temp_string = df_parse_string_field(df_column[i].position); add_tic_user(&axis_array[FIRST_X_AXIS], temp_string, xpos, -1); free(temp_string); } continue; } if (*cols && c != *cols) { /* it's not regular */ if (linearized_matrix) free(linearized_matrix); int_error(NO_CARET, "Matrix does not represent a grid"); } *cols = c; ++*rows; if (*rows > max_rows) { max_rows = GPMAX(2*max_rows,1); linearized_matrix = gp_realloc(linearized_matrix, *cols * max_rows * sizeof(float), "df_matrix"); } /* store data */ { int i; for (i = 0; i < c; ++i) { /* First column in "matrix rowheaders" is a ytic label */ if (df_matrix_rowheaders && i == 0) { char *temp_string; double ypos = *rows - 1; if (use_spec[1].at) { /* The save/restore is to make sure 1:(f($2)):3 works */ struct value a; double save = df_column[1].datum; df_column[1].datum = ypos; evaluate_inside_using = TRUE; evaluate_at(use_spec[1].at, &a); evaluate_inside_using = FALSE; ypos = real(&a); df_column[1].datum = save; } temp_string = df_parse_string_field(df_column[0].position); add_tic_user(&axis_array[FIRST_Y_AXIS], temp_string, ypos, -1); free(temp_string); continue; } if (i < firstpoint && df_column[i].good != DF_GOOD) { /* It's going to be skipped anyhow, so... */ linearized_matrix[index++] = 0; } else linearized_matrix[index++] = (float) df_column[i].datum; if (df_column[i].good != DF_GOOD) { if (bad_data++ == 0) int_warn(NO_CARET,"matrix contains missing or undefined values"); } } } } } /*}}} */ static void initialize_use_spec() { int i; df_no_use_specs = 0; for (i = 0; i < MAXDATACOLS; ++i) { use_spec[i].column = i + 1; /* default column */ use_spec[i].expected_type = CT_DEFAULT; /* no particular expectation */ if (use_spec[i].at) { free_at(use_spec[i].at); use_spec[i].at = NULL; /* no expression */ } df_axis[i] = NO_AXIS; /* no timefmt for this output column */ } } static void initialize_plot_style(struct curve_points *plot) { int save_token = c_token; if (!plot) return; for ( ; !END_OF_COMMAND; c_token++) if (almost_equals(c_token, "w$ith")) { plot->plot_style = get_style(); break; } c_token = save_token; } /*{{{ int df_open(char *file_name, int max_using, plot_header *plot) */ /* open file, parsing using/thru/index stuff return number of using * specs [well, we have to return something !] */ int df_open(const char *cmd_filename, int max_using, struct curve_points *plot) { int name_token = c_token - 1; TBOOLEAN duplication = FALSE; TBOOLEAN set_index = FALSE, set_skip = FALSE; TBOOLEAN set_using = FALSE; TBOOLEAN set_matrix = FALSE; fast_columns = 1; /* corey@cac */ /* close file if necessary */ if (data_fp) { df_close(); data_fp = NULL; } free(df_format); df_format = NULL; /* no format string */ df_no_tic_specs = 0; free(df_key_title); df_key_title = NULL; initialize_use_spec(); clear_df_column_headers(); df_datum = -1; /* it will be preincremented before use */ df_line_number = 0; /* ditto */ df_lower_index = 0; df_index_step = 1; df_upper_index = MAXINT; free(indexname); indexname = NULL; df_current_index = 0; blank_count = 2; /* by initialising blank_count, leading blanks will be ignored */ set_every = FALSE; everypoint = everyline = 1; /* unless there is an every spec */ firstpoint = firstline = 0; lastpoint = lastline = MAXINT; df_binary_file = df_matrix_file = FALSE; df_pixeldata = NULL; df_num_bin_records = 0; df_matrix = FALSE; df_nonuniform_matrix = FALSE; df_matrix_columnheaders = FALSE; df_matrix_rowheaders = FALSE; df_skip_at_front = 0; df_xpixels = 0; df_ypixels = 0; df_transpose = FALSE; df_eof = 0; /* Save for use by df_readline(). */ /* Perhaps it should be a parameter to df_readline? */ df_current_plot = plot; /* If 'set key autotitle columnhead' is in effect we always treat the * first data row as non-data (df_readline() will return DF_COLUMNHEADERS * rather than the column count). This is true even if the key is off * or the data is read from 'stats' or from 'fit' rather than plot. * FIXME: This should probably be controlled by an option to * 'set datafile' rather than 'set key'. Or maybe both? */ column_for_key_title = NO_COLUMN_HEADER; df_already_got_headers = FALSE; if ((&keyT)->auto_titles == COLUMNHEAD_KEYTITLES) parse_1st_row_as_headers = TRUE; else parse_1st_row_as_headers = FALSE; if (!cmd_filename) int_error(c_token, "missing filename"); if (!cmd_filename[0]) { if (!df_filename || !*df_filename) int_error(c_token-1, "No previous filename"); if (!strcmp(df_filename,"@@") && df_arrayname) { df_array = get_udv_by_name(df_arrayname); if (df_array->udv_value.type != ARRAY) int_error(c_token-1, "Array %s invalid", df_arrayname); } } else { free(df_filename); df_filename = gp_strdup(cmd_filename); } /* defer opening until we have parsed the modifiers... */ #ifdef BACKWARDS_COMPATIBLE free_at(ydata_func.at); ydata_func.at = NULL; #endif /* pm 25.11.2001 allow any order of options */ while (!END_OF_COMMAND) { /* look for binary / matrix */ if (almost_equals(c_token, "bin$ary")) { if (df_filename[0] == '$') int_error(c_token, "data blocks cannot be binary"); if (!strcmp(df_filename,"+") || !strcmp(df_filename,"++")) int_error(c_token, "pseudofiles '+' and '++' cannot be binary"); c_token++; if (df_binary_file || set_skip) { duplication=TRUE; break; } gp_expand_tilde(&df_filename); df_binary_file = TRUE; /* Up to the time of adding the general binary code, only matrix * binary for 3d was defined. So, use matrix binary by default. */ df_matrix_file = TRUE; initialize_binary_vars(); plot_option_binary(set_matrix, FALSE); continue; } /* deal with matrix */ if (almost_equals(c_token, "mat$rix")) { c_token++; if (set_matrix) { duplication=TRUE; break; } /* `binary` default is both df_matrix_file and df_binary_file. * So if df_binary_file is true, but df_matrix_file isn't, then * some keyword specific to general binary has been given. */ if (!df_matrix_file && df_binary_file) int_error(c_token, matrix_general_binary_conflict_msg); df_matrix_file = TRUE; set_matrix = TRUE; fast_columns = 0; continue; } /* May 2011 - "nonuniform matrix" indicates an ascii data file * with the same row/column layout as "binary matrix" */ if (almost_equals(c_token, "nonuni$form")) { c_token++; df_matrix_file = TRUE; df_nonuniform_matrix = TRUE; fast_columns = 0; if (df_matrix_rowheaders || df_matrix_columnheaders) duplication = TRUE; continue; } /* Jul 2014 - "matrix columnheaders" indicates an ascii data file * in uniform grid format but with column labels in row 1 */ if (almost_equals(c_token, "columnhead$ers")) { c_token++; df_matrix_file = TRUE; df_matrix_columnheaders = TRUE; if (df_nonuniform_matrix || !set_matrix) duplication = TRUE; continue; } /* Jul 2014 - "matrix rowheaders" indicates an ascii data file * in uniform grid format but with row labels in column 1 */ if (almost_equals(c_token, "rowhead$ers")) { c_token++; df_matrix_file = TRUE; df_matrix_rowheaders = TRUE; if (df_nonuniform_matrix || !set_matrix) duplication = TRUE; continue; } /* deal with index */ if (almost_equals(c_token, "i$ndex")) { if (set_index) { duplication=TRUE; break; } plot_option_index(); set_index = TRUE; continue; } /* deal with every */ if (almost_equals(c_token, "ev$ery")) { if (set_every) { duplication=TRUE; break; } plot_option_every(); set_every = TRUE; continue; } /* deal with skip */ if (equals(c_token, "skip")) { if (set_skip || df_binary_file) { duplication=TRUE; break; } set_skip = TRUE; c_token++; df_skip_at_front = int_expression(); if (df_skip_at_front < 0) df_skip_at_front = 0; continue; } #ifdef BACKWARDS_COMPATIBLE /* deal with thru */ /* jev -- support for passing data from file thru user function */ if (almost_equals(c_token, "thru$")) { plot_option_thru(); continue; } #endif /* deal with using */ if (almost_equals(c_token, "u$sing")) { if (set_using) { duplication=TRUE; break; } plot_option_using(max_using); set_using = TRUE; continue; } /* deal with volatile */ if (almost_equals(c_token, "volatile")) { c_token++; volatile_data = TRUE; continue; } /* Allow this plot not to affect autoscaling */ if (almost_equals(c_token, "noauto$scale")) { c_token++; plot->noautoscale = TRUE; continue; } break; /* unknown option */ } /* while (!END_OF_COMMAND) */ if (duplication) int_error(c_token, "duplicated or contradicting arguments in datafile options"); /* Check for auto-generation of key title from column header */ /* Mar 2009: This may no longer be the best place for this! */ if ((&keyT)->auto_titles == COLUMNHEAD_KEYTITLES) { if (df_no_use_specs == 1) column_for_key_title = use_spec[0].column; else if (plot && plot->plot_type == DATA3D) column_for_key_title = use_spec[2].column; else column_for_key_title = use_spec[1].column; } /*{{{ more variable inits */ point_count = -1; /* we preincrement */ line_count = 0; df_pseudodata = 0; df_pseudorecord = 0; df_pseudospan = 0; df_datablock = FALSE; df_datablock_line = NULL; df_tabulate_strings = FALSE; if (plot) { /* Save the matrix/array/image dimensions for binary image plot styles */ plot->image_properties.ncols = df_xpixels; plot->image_properties.nrows = df_ypixels; FPRINTF((stderr,"datafile.c:%d (ncols,nrows) set to (%d,%d)\n", __LINE__, df_xpixels, df_ypixels)); if (set_every && df_xpixels && df_ypixels) { plot->image_properties.ncols = 1 + ((int)(GPMIN(lastpoint,df_xpixels-1)) - firstpoint) / everypoint; plot->image_properties.nrows = 1 + ((int)(GPMIN(lastline,df_ypixels-1)) - firstline) / everyline; FPRINTF((stderr,"datafile.c:%d adjusting to (%d, %d)\n", __LINE__, plot->image_properties.ncols, plot->image_properties.nrows)); } if (df_transpose) { unsigned int temp = plot->image_properties.ncols; plot->image_properties.ncols = plot->image_properties.nrows; plot->image_properties.nrows = temp; FPRINTF((stderr,"datafile.c:%d adjusting to (%d, %d)\n", __LINE__, plot->image_properties.ncols, plot->image_properties.nrows)); } } /*}}} */ /*{{{ open file */ #if defined(HAVE_FDOPEN) if (*df_filename == '<' && strlen(df_filename) > 1 && df_filename[1] == '&') { char *substr; /* read from an already open file descriptor */ data_fd = strtol(df_filename + 2, &substr, 10); if (*substr != '\0' || data_fd < 0 || substr == df_filename+2) int_error(name_token, "invalid file descriptor integer"); else if (data_fd == fileno(stdin) || data_fd == fileno(stdout) || data_fd == fileno(stderr)) int_error(name_token, "cannot plot from stdin/stdout/stderr"); else if ((data_fp = fdopen(data_fd, "r")) == (FILE *) NULL) int_error(name_token, "cannot open file descriptor for reading data"); /* if this stream isn't seekable, set it to volatile */ if (fseek(data_fp, 0, SEEK_CUR) < 0) volatile_data = TRUE; } else #endif /* HAVE_FDOPEN */ #if defined(PIPES) if (*df_filename == '<') { restrict_popen(); if ((data_fp = popen(df_filename + 1, "r")) == (FILE *) NULL) os_error(name_token, "cannot create pipe for data"); else df_pipe_open = TRUE; } else #endif /* PIPES */ /* Special filenames '-' '+' '++' '$DATABLOCK' */ if (*df_filename == '-' && strlen(df_filename) == 1) { plotted_data_from_stdin = TRUE; volatile_data = TRUE; data_fp = lf_top(); if (!data_fp) data_fp = stdin; mixed_data_fp = TRUE; /* don't close command file */ } else if (!strcmp(df_filename,"+")) { df_pseudodata = 1; } else if (!strcmp(df_filename,"++")) { df_pseudodata = 2; } else if (df_filename[0] == '$') { df_datablock = TRUE; df_datablock_line = get_datablock(df_filename); /* Better safe than sorry. Check for inblock != outblock */ if (table_var && table_var->udv_value.v.data_array == df_datablock_line) int_error(NO_CARET,"input and output datablock are the same"); } else if (!strcmp(df_filename, "@@") && df_array) { /* df_array was set in string_or_express() */ df_array_index = 0; /* save name so we can refer to it later */ df_arrayname = df_array->udv_name; } else { /* filename cannot be static array! */ gp_expand_tilde(&df_filename); #ifdef HAVE_SYS_STAT_H { struct stat statbuf; if ((stat(df_filename, &statbuf) > -1) && S_ISDIR(statbuf.st_mode)) { os_error(name_token, "\"%s\" is a directory", df_filename); } } #endif /* HAVE_SYS_STAT_H */ if ((data_fp = loadpath_fopen(df_filename, df_binary_file ? "rb" : "r")) == NULL) { int_warn(NO_CARET, "Cannot find or open file \"%s\"", df_filename); df_eof = 1; return DF_EOF; } } /*}}} */ /* Binary file options are handled differently depending on the plot style. */ /* Peek ahead in the command line to see if there is a "with