Skip to content

Commit

Permalink
Merge branch 'master' into improve-gototab
Browse files Browse the repository at this point in the history
  • Loading branch information
arnoldrobbins committed Nov 17, 2023
2 parents cf7cbbb + 9e254e5 commit 12793c0
Show file tree
Hide file tree
Showing 12 changed files with 145 additions and 163 deletions.
6 changes: 6 additions & 0 deletions FIXES
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the
second edition of the AWK book was published in September 2023.

Nov 15, 2023
Man page edit, regression test fixes. thanks to Arnold Robbins
consolidation of sub and gsub into dosub, removing duplicate
code. thanks to Miguel Pineiro Jr.
gcc replaced with cc everywhere.

Oct 30, 2023:
multiple fixes and a minor code cleanup.
disabled utf-8 for non-multibyte locales, such as C or POSIX.
Expand Down
3 changes: 3 additions & 0 deletions awk.1
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,9 @@ the syntax is worse.
.PP
Input is expected to be UTF-8 encoded. Other multibyte
character sets are not handled.
However, in eight-bit locales,
.I awk
treats each input byte as a separate character.
.SH UNUSUAL FLOATING-POINT VALUES
.I Awk
was designed before IEEE 754 arithmetic defined Not-A-Number (NaN)
Expand Down
2 changes: 1 addition & 1 deletion bugs-fixed/REGRESS
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /bin/bash
#! /bin/sh

if [ ! -f ../a.out ]
then
Expand Down
2 changes: 1 addition & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/

const char *version = "version 20231030";
const char *version = "version 20231116";

#define DEBUG
#include <stdio.h>
Expand Down
8 changes: 4 additions & 4 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ CFLAGS =
CFLAGS = -O2

# compiler options
#CC = gcc -Wall -g -Wwrite-strings
#CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing
#CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
HOSTCC = gcc -g -Wall -pedantic -Wcast-qual
#CC = cc -Wall -g -Wwrite-strings
#CC = cc -O4 -Wall -pedantic -fno-strict-aliasing
#CC = cc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
HOSTCC = cc -g -Wall -pedantic -Wcast-qual
CC = $(HOSTCC) # change this is cross-compiling.

# By fiat, to make our lives easier, yacc is now defined to be bison.
Expand Down
4 changes: 2 additions & 2 deletions maketab.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ struct xx
{ ARRAY, "array", NULL },
{ INDIRECT, "indirect", "$(" },
{ SUBSTR, "substr", "substr" },
{ SUB, "sub", "sub" },
{ GSUB, "gsub", "gsub" },
{ SUB, "dosub", "sub" },
{ GSUB, "dosub", "gsub" },
{ INDEX, "sindex", "sindex" },
{ SPRINTF, "awksprintf", "sprintf " },
{ ADD, "arith", " + " },
Expand Down
3 changes: 1 addition & 2 deletions proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,7 @@ extern FILE *openfile(int, const char *, bool *);
extern const char *filename(FILE *);
extern Cell *closefile(Node **, int);
extern void closeall(void);
extern Cell *sub(Node **, int);
extern Cell *gsub(Node **, int);
extern Cell *dosub(Node **, int);

extern FILE *popen(const char *, const char *);
extern int pclose(FILE *);
Expand Down
270 changes: 122 additions & 148 deletions run.c
Original file line number Diff line number Diff line change
Expand Up @@ -2397,169 +2397,143 @@ static void flush_all(void)

void backsub(char **pb_ptr, const char **sptr_ptr);

Cell *sub(Node **a, int nnn) /* substitute command */
Cell *dosub(Node **a, int subop) /* sub and gsub */
{
const char *sptr, *q;
Cell *x, *y, *result;
char *t, *buf, *pb;
fa *pfa;
int tempstat;
char *repl;
Cell *x;

char *buf = NULL;
char *pb = NULL;
int bufsz = recsize;

if ((buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of memory in sub");
x = execute(a[3]); /* target string */
t = getsval(x);
if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *) a[1]; /* regular expression */
else {
y = execute(a[1]);
pfa = makedfa(getsval(y), 1);
tempfree(y);
const char *r, *s;
const char *start;
const char *noempty = NULL; /* empty match disallowed here */
size_t m = 0; /* match count */
size_t whichm; /* which match to select, 0 = global */
int mtype; /* match type */

if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *) a[1];
} else {
x = execute(a[1]);
pfa = makedfa(getsval(x), 1);
tempfree(x);
}
y = execute(a[2]); /* replacement string */
result = False;
if (pmatch(pfa, t)) {
sptr = t;
adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
pb = buf;
while (sptr < patbeg)
*pb++ = *sptr++;
sptr = getsval(y);
while (*sptr != '\0') {
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
if (*sptr == '\\') {
backsub(&pb, &sptr);
} else if (*sptr == '&') {
sptr++;
adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
for (q = patbeg; q < patbeg+patlen; )
*pb++ = *q++;
} else
*pb++ = *sptr++;

x = execute(a[2]); /* replacement string */
repl = tostring(getsval(x));
tempfree(x);

switch (subop) {
case SUB:
whichm = 1;
x = execute(a[3]); /* source string */
break;
case GSUB:
whichm = 0;
x = execute(a[3]); /* source string */
break;
default:
FATAL("dosub: unrecognized subop: %d", subop);
}

start = getsval(x);
while (pmatch(pfa, start)) {
if (buf == NULL) {
if ((pb = buf = malloc(bufsz)) == NULL)
FATAL("out of memory in dosub");
tempstat = pfa->initstat;
pfa->initstat = 2;
}
*pb = '\0';
if (pb > buf + bufsz)
FATAL("sub result1 %.30s too big; can't happen", buf);
sptr = patbeg + patlen;
if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
while ((*pb++ = *sptr++) != '\0')
continue;

/* match types */
#define MT_IGNORE 0 /* unselected or invalid */
#define MT_INSERT 1 /* selected, empty */
#define MT_REPLACE 2 /* selected, not empty */

/* an empty match just after replacement is invalid */

if (patbeg == noempty && patlen == 0) {
mtype = MT_IGNORE; /* invalid, not counted */
} else if (whichm == ++m || whichm == 0) {
mtype = patlen ? MT_REPLACE : MT_INSERT;
} else {
mtype = MT_IGNORE; /* unselected, but counted */
}
if (pb > buf + bufsz)
FATAL("sub result2 %.30s too big; can't happen", buf);
setsval(x, buf); /* BUG: should be able to avoid copy */
result = True;
}
tempfree(x);
tempfree(y);
free(buf);
return result;
}

Cell *gsub(Node **a, int nnn) /* global substitute */
{
Cell *x, *y;
char *rptr, *pb;
const char *q, *t, *sptr;
char *buf;
fa *pfa;
int mflag, tempstat, num;
int bufsz = recsize;
int charlen = 0;
/* leading text: */
if (patbeg > start) {
adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
recsize, &pb, "dosub");
s = start;
while (s < patbeg)
*pb++ = *s++;
}

if ((buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of memory in gsub");
mflag = 0; /* if mflag == 0, can replace empty string */
num = 0;
x = execute(a[3]); /* target string */
t = getsval(x);
if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *) a[1]; /* regular expression */
else {
y = execute(a[1]);
pfa = makedfa(getsval(y), 1);
tempfree(y);
}
y = execute(a[2]); /* replacement string */
if (pmatch(pfa, t)) {
tempstat = pfa->initstat;
pfa->initstat = 2;
pb = buf;
rptr = getsval(y);
do {
if (patlen == 0 && *patbeg != '\0') { /* matched empty string */
if (mflag == 0) { /* can replace empty */
num++;
sptr = rptr;
while (*sptr != '\0') {
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
if (*sptr == '\\') {
backsub(&pb, &sptr);
} else if (*sptr == '&') {
sptr++;
adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
for (q = patbeg; q < patbeg+patlen; )
*pb++ = *q++;
} else
*pb++ = *sptr++;
}
}
if (*t == '\0') /* at end */
goto done;
adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
charlen = u8_nextlen(t);
while (charlen-- > 0)
*pb++ = *t++;
if (pb > buf + bufsz) /* BUG: not sure of this test */
FATAL("gsub result0 %.30s too big; can't happen", buf);
mflag = 0;
if (mtype == MT_IGNORE)
goto matching_text; /* skip replacement text */

r = repl;
while (*r != 0) {
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
if (*r == '\\') {
backsub(&pb, &r);
} else if (*r == '&') {
r++;
adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
&pb, "dosub");
for (s = patbeg; s < patbeg+patlen; )
*pb++ = *s++;
} else {
*pb++ = *r++;
}
else { /* matched nonempty string */
num++;
sptr = t;
adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
while (sptr < patbeg)
*pb++ = *sptr++;
sptr = rptr;
while (*sptr != '\0') {
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
if (*sptr == '\\') {
backsub(&pb, &sptr);
} else if (*sptr == '&') {
sptr++;
adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
for (q = patbeg; q < patbeg+patlen; )
*pb++ = *q++;
} else
*pb++ = *sptr++;
}
t = patbeg + patlen;
if (patlen == 0 || *t == '\0' || *(t-1) == '\0')
goto done;
if (pb > buf + bufsz)
FATAL("gsub result1 %.30s too big; can't happen", buf);
mflag = 1;
}
} while (pmatch(pfa,t));
sptr = t;
adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
while ((*pb++ = *sptr++) != '\0')
continue;
done: if (pb < buf + bufsz)
*pb = '\0';
else if (*(pb-1) != '\0')
FATAL("gsub result2 %.30s truncated; can't happen", buf);
setsval(x, buf); /* BUG: should be able to avoid copy + free */
}

matching_text:
if (mtype == MT_REPLACE || *patbeg == '\0')
goto next_search; /* skip matching text */

if (patlen == 0)
patlen = u8_nextlen(patbeg);
adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
s = patbeg;
while (s < patbeg + patlen)
*pb++ = *s++;

next_search:
start = patbeg + patlen;
if (m == whichm || *patbeg == '\0')
break;
if (mtype == MT_REPLACE)
noempty = start;

#undef MT_IGNORE
#undef MT_INSERT
#undef MT_REPLACE
}

xfree(repl);

if (buf != NULL) {
pfa->initstat = tempstat;

/* trailing text */
adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
while ((*pb++ = *start++) != '\0')
;

setsval(x, buf);
free(buf);
}

tempfree(x);
tempfree(y);
x = gettemp();
x->tval = NUM;
x->fval = num;
free(buf);
return(x);
x->fval = m;
return x;
}

void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
Expand Down
2 changes: 1 addition & 1 deletion testdir/Compare.tt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ oldawk=${oldawk-awk}
awk=${awk-../a.out}

echo compiling time.c
gcc time.c -o time
cc time.c -o time
time=./time

echo time command = $time
Expand Down
2 changes: 1 addition & 1 deletion testdir/REGRESS
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/sh

uname -a
gcc echo.c -o echo && echo echo compiled
cc echo.c -o echo && echo echo compiled

oldawk=${oldawk-awk}
awk=${awk-../a.out}
Expand Down
1 change: 0 additions & 1 deletion testdir/T.csv
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,4 @@ a''b [a''b]
a, [a][]
"", [][]
, [][]
a"b [a"b]
!!!!
5 changes: 3 additions & 2 deletions testdir/T.flags
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ grep 'unknown option' foo >/dev/null || echo 'T.flags: bad unknown option'
$awk -F >foo 2>&1
grep 'no field separator' foo >/dev/null || echo 'T.flags: bad missing field separator'

$awk -F '' >foo 2>&1
grep 'field separator FS is empty' foo >/dev/null || echo 'T.flags: bad empty field separator'
### Awk is now like gawk and splits into separate characters if FS = ""
# $awk -F '' >foo 2>&1
# grep 'field separator FS is empty' foo >/dev/null || echo 'T.flags: bad empty field separator'

0 comments on commit 12793c0

Please sign in to comment.