Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
ctSkennerton committed Mar 10, 2021
2 parents d1f287a + c0f4e97 commit 04e75e8
Show file tree
Hide file tree
Showing 12 changed files with 245 additions and 50 deletions.
31 changes: 31 additions & 0 deletions FIXES
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,37 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.

February 15, 2021:
Small fix so that awk will compile again with g++. Thanks to
Arnold Robbins.

January 06, 2021:
Fix a decision bug with trailing stuff in lib.c:is_valid_number
after recent changes. Thanks to Ozan Yigit.

December 18, 2020:
Fix problems converting inf and NaN values in lib.c:is_valid_number.
Enhance number to string conversion to do the right thing for
NaN and inf values. Things are now pretty much the same as in
gawk. (Found a gawk bug while we're at it.) Added a torture
test for these values. Thanks to Arnold Robbins. Allows closing
of PR #101.

December 15, 2020:
Merge PR #99, which gets the right header for strcasecmp.
Thanks to GitHub user michaelforney.

December 8, 2020:
Merge PR #98: Disallow hex data. Allow only +nan, -nan,
+inf, -inf (case independent) to give NaN and infinity values.
Improve things so that string to double conversion is only
done once, yielding something of a speedup. This obviate
PR #95. Thanks to Arnold Robbins.

December 3, 2020:
Fix to argument parsing to avoid printing spurious newlines.
Thanks to Todd Miller. Merges PR #97.

October 13, 2020:
Add casts before all the calls to malloc/calloc/realloc in order
to get it to compile with g++. Thanks to Arnold Robbins.
Expand Down
53 changes: 53 additions & 0 deletions bioawk.1
Original file line number Diff line number Diff line change
Expand Up @@ -729,3 +729,56 @@ The scope rules for variables in functions are a botch;
the syntax is worse.
.PP
Only eight-bit characters sets are handled correctly.
.SH UNUSUAL FLOATING-POINT VALUES
.I Awk
was designed before IEEE 754 arithmetic defined Not-A-Number (NaN)
and Infinity values, which are supported by all modern floating-point
hardware.
.PP
Because
.I awk
uses
.IR strtod (3)
and
.IR atof (3)
to convert string values to double-precision floating-point values,
modern C libraries also convert strings starting with
.B inf
and
.B nan
into infinity and NaN values respectively. This led to strange results,
with something like this:
.PP
.EX
.nf
echo nancy | awk '{ print $1 + 0 }'
.fi
.EE
.PP
printing
.B nan
instead of zero.
.PP
.I Awk
now follows GNU AWK, and prefilters string values before attempting
to convert them to numbers, as follows:
.TP
.I "Hexadecimal values"
Hexadecimal values (allowed since C99) convert to zero, as they did
prior to C99.
.TP
.I "NaN values"
The two strings
.B +nan
and
.B \-nan
(case independent) convert to NaN. No others do.
(NaNs can have signs.)
.TP
.I "Infinity values"
The two strings
.B +inf
and
.B \-inf
(case independent) convert to positive and negative infinity, respectively.
No others do.
4 changes: 4 additions & 0 deletions bugs-fixed/inf-nan-torture.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
for (i = 1; i <= NF; i++)
print i, $i, $i + 0
}
1 change: 1 addition & 0 deletions bugs-fixed/inf-nan-torture.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
-inf -inform inform -nan -nancy nancy -123 0 123 +123 nancy +nancy +nan inform +inform +inf
16 changes: 16 additions & 0 deletions bugs-fixed/inf-nan-torture.ok
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
1 -inf -inf
2 -inform 0
3 inform 0
4 -nan -nan
5 -nancy 0
6 nancy 0
7 -123 -123
8 0 0
9 123 123
10 +123 123
11 nancy 0
12 +nancy 0
13 +nan +nan
14 inform 0
15 +inform 0
16 +inf +inf
7 changes: 6 additions & 1 deletion lex.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,12 @@ int yylex(void)
return word(buf);
if (isdigit(c)) {
char *cp = tostring(buf);
yylval.cp = setsymtab(buf, cp, atof(buf), CON|NUM, symtab);
double result;

if (is_number(cp, & result))
yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
else
yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
free(cp);
/* should this also have STR set? */
RET(NUMBER);
Expand Down
102 changes: 80 additions & 22 deletions lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ THIS SOFTWARE.
#define DEBUG
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <stdarg.h>
#include <limits.h>
#include <math.h>
#include "awk.h"

char EMPTY[] = { '\0' };
Expand Down Expand Up @@ -183,12 +185,14 @@ int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record *
innew = false;
if (c != 0 || buf[0] != '\0') { /* normal record */
if (isrecord) {
double result;

if (freeable(fldtab[0]))
xfree(fldtab[0]->sval);
fldtab[0]->sval = buf; /* buf == record */
fldtab[0]->tval = REC | STR | DONTFREE;
if (is_number(fldtab[0]->sval)) {
fldtab[0]->fval = atof(fldtab[0]->sval);
if (is_number(fldtab[0]->sval, & result)) {
fldtab[0]->fval = result;
fldtab[0]->tval |= NUM;
}
}
Expand Down Expand Up @@ -295,15 +299,16 @@ void setclvar(char *s) /* set var=value from s */
{
char *p;
Cell *q;
double result;

for (p=s; *p != '='; p++)
;
*p++ = 0;
p = qstring(p, '\0');
q = setsymtab(s, p, 0.0, STR, symtab);
setsval(q, p);
if (is_number(q->sval)) {
q->fval = atof(q->sval);
if (is_number(q->sval, & result)) {
q->fval = result;
q->tval |= NUM;
}
DPRINTF("command line set %s to |%s|\n", s, p);
Expand Down Expand Up @@ -404,9 +409,11 @@ void fldbld(void) /* create fields from current record */
lastfld = i;
donefld = true;
for (j = 1; j <= lastfld; j++) {
double result;

p = fldtab[j];
if(is_number(p->sval)) {
p->fval = atof(p->sval);
if(is_number(p->sval, & result)) {
p->fval = result;
p->tval |= NUM;
}
}
Expand Down Expand Up @@ -671,12 +678,11 @@ void error()
fprintf(stderr, " source line number %d", curnode->lineno);
else if (lineno)
fprintf(stderr, " source line number %d", lineno);
if (compile_time == COMPILING && cursource() != NULL)
fprintf(stderr, " source file %s", cursource());
fprintf(stderr, "\n");
eprint();
}

if (compile_time == COMPILING && cursource() != NULL)
fprintf(stderr, " source file %s", cursource());
fprintf(stderr, "\n");
eprint();
}

void eprint(void) /* try to print context around error */
Expand Down Expand Up @@ -759,24 +765,76 @@ int isclvar(const char *s) /* is s of form var=something ? */
/* strtod is supposed to be a proper test of what's a valid number */
/* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
/* wrong: violates 4.10.1.4 of ansi C standard */

/* well, not quite. As of C99, hex floating point is allowed. so this is
* a bit of a mess.
* a bit of a mess. We work around the mess by checking for a hexadecimal
* value and disallowing it. Similarly, we now follow gawk and allow only
* +nan, -nan, +inf, and -inf for NaN and infinity values.
*/

#include <math.h>
int is_number(const char *s)
/*
* This routine now has a more complicated interface, the main point
* being to avoid the double conversion of a string to double, and
* also to convey out, if requested, the information that the numeric
* value was a leading string or is all of the string. The latter bit
* is used in getfval().
*/

bool is_valid_number(const char *s, bool trailing_stuff_ok,
bool *no_trailing, double *result)
{
double r;
char *ep;
bool retval = false;
bool is_nan = false;
bool is_inf = false;

if (no_trailing)
*no_trailing = false;

while (isspace(*s))
s++;

// no hex floating point, sorry
if (s[0] == '0' && tolower(s[1]) == 'x')
return false;

// allow +nan, -nan, +inf, -inf, any other letter, no
if (s[0] == '+' || s[0] == '-') {
is_nan = (strncasecmp(s+1, "nan", 3) == 0);
is_inf = (strncasecmp(s+1, "inf", 3) == 0);
if ((is_nan || is_inf)
&& (isspace(s[4]) || s[4] == '\0'))
goto convert;
else if (! isdigit(s[1]) && s[1] != '.')
return false;
}
else if (! isdigit(s[0]) && s[0] != '.')
return false;

convert:
errno = 0;
r = strtod(s, &ep);
if (ep == s || r == HUGE_VAL || errno == ERANGE)
return 0;
/* allow \r as well. windows files aren't going to go away. */
while (*ep == ' ' || *ep == '\t' || *ep == '\n' || *ep == '\r')
if (ep == s || errno == ERANGE)
return false;

if (isnan(r) && s[0] == '-' && signbit(r) == 0)
r = -r;

if (result != NULL)
*result = r;

/*
* check for trailing stuff
*/
while (isspace(*ep))
ep++;
if (*ep == '\0')
return 1;
else
return 0;

if (no_trailing != NULL)
*no_trailing = (*ep == '\0');

// return true if found the end, or trailing stuff is allowed
retval = *ep == '\0' || trailing_stuff_ok;

return retval;
}
2 changes: 1 addition & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/

const char *version = "version 20201013";
const char *version = "version 20210215";

#define DEBUG
#include <stdio.h>
Expand Down
4 changes: 3 additions & 1 deletion proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,9 @@ extern void eprint(void);
extern void bclass(int);
extern double errcheck(double, const char *);
extern int isclvar(const char *);
extern int is_number(const char *);
extern bool is_valid_number(const char *s, bool trailing_stuff_ok,
bool *no_trailing, double *result);
#define is_number(s, val) is_valid_number(s, false, NULL, val)

extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, const char *what);
extern void run(Node *);
Expand Down
Loading

0 comments on commit 04e75e8

Please sign in to comment.