From 0f810ef16781bf0f59690be63af876bddabf68bf Mon Sep 17 00:00:00 2001 From: Mark Nudelman Date: Wed, 25 Nov 2020 15:03:25 -0800 Subject: [PATCH] Make -R support OSC 8 hyperlink escape sequences. --- NEWS.VER | 2 + cvt.c | 7 +-- filename.c | 5 ++- less.h | 6 +++ less.nro.VER | 16 +++++-- line.c | 122 ++++++++++++++++++++++++++++++++++++++------------- version.c | 5 ++- 7 files changed, 121 insertions(+), 42 deletions(-) diff --git a/NEWS.VER b/NEWS.VER index ccb2a510..f02d97d8 100644 --- a/NEWS.VER +++ b/NEWS.VER @@ -20,6 +20,8 @@ * Add ctrl-W search modifier for wrapping search. +* Support OSC 8 hyperlinks when -R is in effect. + * g command with no number will ignore -j and put first line at top of screen. * Editing the same file under different names now creates only diff --git a/cvt.c b/cvt.c index 3271cc94..100a5f61 100644 --- a/cvt.c +++ b/cvt.c @@ -70,6 +70,7 @@ cvt_text(odst, osrc, chpos, lenp, ops) { int src_pos = (int) (src - osrc); int dst_pos = (int) (dst - odst); + struct ansi_state *pansi; ch = step_char(&src, +1, src_end); if ((ops & CVT_BS) && ch == '\b' && dst > odst) { @@ -78,13 +79,13 @@ cvt_text(odst, osrc, chpos, lenp, ops) dst--; } while (dst > odst && utf_mode && !IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst)); - } else if ((ops & CVT_ANSI) && IS_CSI_START(ch)) + } else if ((ops & CVT_ANSI) && (pansi = ansi_start(ch)) != NULL) { /* Skip to end of ANSI escape sequence. */ - src++; /* skip the CSI start char */ while (src < src_end) - if (!is_ansi_middle(*src++)) + if (ansi_step(pansi, *src++) != ANSI_MID) break; + ansi_done(pansi); } else { /* Just copy the char to the destination buffer. */ diff --git a/filename.c b/filename.c index 653397a5..e5b76998 100644 --- a/filename.c +++ b/filename.c @@ -479,8 +479,9 @@ bin_file(f) } else { LWCHAR c = step_char(&p, +1, edata); - if (ctldisp == OPT_ONPLUS && IS_CSI_START(c)) - skip_ansi(&p, edata); + struct ansi_state *pansi; + if (ctldisp == OPT_ONPLUS && (pansi = ansi_start(c)) != NULL) + skip_ansi(pansi, &p, edata); else if (binary_char(c)) bin_count++; } diff --git a/less.h b/less.h index b6c40f49..db70bf3f 100644 --- a/less.h +++ b/less.h @@ -383,6 +383,11 @@ struct wchar_range_table #define AT_BINARY (1 << 5) /* LESS*BINFMT representation */ #define AT_HILITE (1 << 6) /* Internal highlights (e.g., for search) */ +/* ANSI states */ +#define ANSI_MID 1 +#define ANSI_ERR 2 +#define ANSI_END 3 + #if '0' == 240 #define IS_EBCDIC_HOST 1 #endif @@ -529,6 +534,7 @@ struct wchar_range_table struct mlist; struct loption; struct hilite_tree; +struct ansi_state; #include "pattern.h" #include "funcs.h" diff --git a/less.nro.VER b/less.nro.VER index 3368a683..71d272ad 100644 --- a/less.nro.VER +++ b/less.nro.VER @@ -791,17 +791,25 @@ each type of control character). Thus, various display problems may result, such as long lines being split in the wrong place. .IP "\-R or \-\-RAW-CONTROL-CHARS" -Like \-r, but only ANSI "color" escape sequences are output in "raw" form. +Like \-r, but only ANSI "color" escape sequences and OSC 8 hyperlink +sequences are output in "raw" form. Unlike \-r, the screen appearance is maintained correctly, provided that there are no escape sequences in the file -other than ANSI color escape sequences. +other than these types of escape sequences. +For the purpose of keeping track of screen appearance, +these escape sequences are assumed to not move the cursor. +OSC 8 hyperlinks are sequences of the form: +.sp + ESC ] 8 ; \&...\& \\7 +.sp +The terminating sequence may be either a BEL character (\\7) +or the two-character sequence "ESC \\". +.sp ANSI color escape sequences are sequences of the form: .sp ESC [ \&...\& m .sp where the "...\&" is zero or more color specification characters. -For the purpose of keeping track of screen appearance, -ANSI color escape sequences are assumed to not move the cursor. You can make .I less think that characters other than "m" can end ANSI color escape sequences diff --git a/line.c b/line.c index e46aa39d..dd13387d 100644 --- a/line.c +++ b/line.c @@ -18,6 +18,7 @@ static char *linebuf = NULL; /* Buffer which holds the current output line */ static char *attr = NULL; /* Extension of linebuf to hold attributes */ public int size_linebuf = 0; /* Size of line buffer (and attr buffer) */ +static struct ansi_state *line_ansi = NULL; static int cshift; /* Current left-shift of output line buffer */ public int hshift; /* Desired left-shift of output line buffer */ @@ -68,6 +69,12 @@ static int mbc_buf_len = 0; static int mbc_buf_index = 0; static POSITION mbc_pos; +struct ansi_state { + int hindex; + int hlink; + int prev_esc; +}; + /* * Initialize from environment variables. */ @@ -287,8 +294,9 @@ pshift(shift) */ while (shifted <= shift && from < curr) { + struct ansi_state *pansi; c = linebuf[from]; - if (ctldisp == OPT_ONPLUS && IS_CSI_START(c)) + if (ctldisp == OPT_ONPLUS && (pansi = ansi_start(c)) != NULL) { /* Keep cumulative effect. */ linebuf[to] = c; @@ -297,9 +305,10 @@ pshift(shift) { linebuf[to] = linebuf[from]; attr[to++] = attr[from]; - if (!is_ansi_middle(linebuf[from++])) + if (ansi_step(pansi, linebuf[from++]) != ANSI_MID) break; - } + } + ansi_done(pansi); continue; } @@ -528,29 +537,6 @@ backc(VOID_PARAM) return 0; } -/* - * Are we currently within a recognized ANSI escape sequence? - */ - static int -in_ansi_esc_seq(VOID_PARAM) -{ - char *p; - - /* - * Search backwards for either an ESC (which means we ARE in a seq); - * or an end char (which means we're NOT in a seq). - */ - for (p = &linebuf[curr]; p > linebuf; ) - { - LWCHAR ch = step_char(&p, -1, linebuf); - if (IS_CSI_START(ch)) - return (1); - if (!is_ansi_middle(ch)) - return (0); - } - return (0); -} - /* * Is a character the end of an ANSI escape sequence? */ @@ -582,15 +568,82 @@ is_ansi_middle(ch) * pp is initially positioned just after the CSI_START char. */ public void -skip_ansi(pp, limit) +skip_ansi(pansi, pp, limit) + struct ansi_state *pansi; char **pp; constant char *limit; { LWCHAR c; do { c = step_char(pp, +1, limit); - } while (*pp < limit && is_ansi_middle(c)); - /* Note that we discard final char, for which is_ansi_middle is false. */ + } while (*pp < limit && ansi_step(pansi, c) != ANSI_MID); + /* Note that we discard final char, for which is_ansi_end is true. */ +} + +/* + * Determine if a character starts an ANSI escape sequence. + * If so, return an ansi_state struct; otherwise return NULL. + */ + public struct ansi_state * +ansi_start(ch) + LWCHAR ch; +{ + struct ansi_state *pansi; + + if (!IS_CSI_START(ch)) + return NULL; + pansi = ecalloc(1, sizeof(struct ansi_state)); + pansi->hindex = 0; + pansi->hlink = 0; + pansi->prev_esc = 0; + return pansi; +} + +/* + * Determine whether the next char in an ANSI escape sequence + * ends the sequence. + */ + public int +ansi_step(pansi, ch) + struct ansi_state *pansi; + LWCHAR ch; +{ + if (pansi->hlink) + { + /* Hyperlink ends with \7 or ESC-backslash. */ + if (ch == '\7') + return ANSI_END; + if (hlink->prev_esc && ch == '\\') + return ANSI_END; + hlink->prev_esc = (ch == ESC); + return ANSI_MID; + } + if (pansi->hindex >= 0) + { + static char hlink_prefix[] = "]8;"; + if (ch == hlink_prefix[pansi->hindex++]) + { + if (hlink_prefix[pansi->hindex] == '\0') + pansi->hlink = 1; + return ANSI_MID; + } + pansi->hindex = -1; + } + if (is_ansi_middle(ch)) + return ANSI_MID; + if (is_ansi_end(ch)) + return ANSI_END; + return ANSI_ERR; +} + +/* + * Free an ansi_state structure. + */ + public void +ansi_done(pansi) + struct ansi_state *pansi; +{ + free(pansi); } @@ -637,9 +690,16 @@ store_char(ch, a, rep, pos) } #endif - if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq()) + if (ctldisp == OPT_ONPLUS && line_ansi == NULL) + line_ansi = ansi_start(ch); + else if (ctldisp == OPT_ONPLUS && line_ansi != NULL) { - if (!is_ansi_end(ch) && !is_ansi_middle(ch)) { + int a = ansi_step(line_ansi, ch); + if (a == ANSI_END) + { + ansi_done(line_ansi); + line_ansi = NULL; + } else if (a == ANSI_ERR) { /* Remove whole unrecognized sequence. */ char *p = &linebuf[curr]; LWCHAR bch; diff --git a/version.c b/version.c index 4d7ef024..5896893a 100644 --- a/version.c +++ b/version.c @@ -889,7 +889,8 @@ v562 5/19/20 Update Unicode tables; minor doc formatting. v563 6/13/20 Fix crash due to realpath() incompatibility. v564 8/25/20 Handle realpath consistently; update docs. v565 11/3/20 Add ESC-U command, optimize calls to realpath(). -v566 Fix crash when reopening a file while using LESSOPEN. +v566 11/25/20 Fix crash when reopening a file while using LESSOPEN; + support OSC 8 hyperlinks. */ -char version[] = "566x"; +char version[] = "566";