Skip to content

Commit

Permalink
Refactor a little.
Browse files Browse the repository at this point in the history
  • Loading branch information
gwsw committed Oct 21, 2017
1 parent e863fd8 commit 1566a4f
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 52 deletions.
32 changes: 9 additions & 23 deletions charset.c
Original file line number Diff line number Diff line change
Expand Up @@ -557,33 +557,19 @@ is_utf8_well_formed(ss, slen)
}

/*
* Return number of invalid UTF-8 sequences and binary chars found in a buffer.
* Skip bytes until a UTF-8 lead byte (11xxxxxx) or ASCII byte (0xxxxxxx) is found.
*/
public int
utf_bin_count(data, len)
char *data;
int len;
public void
utf_skip_to_lead(pp, limit)
char **pp;
char *limit;
{
int bin_count = 0;
char *edata = data + len;
while (data < edata)
{
if (is_utf8_well_formed(data, edata-data))
{
if (bin_char_in_string(&data, edata))
bin_count++;
} else /* invalid UTF-8 */
{
/* Skip to next lead byte. */
bin_count++;
do {
++data;
} while (data < edata && !IS_UTF8_LEAD(*data & 0377) && !IS_ASCII_OCTET(*data));
}
}
return (bin_count);
do {
++(*pp);
} while (*pp < limit && !IS_UTF8_LEAD((*pp)[0] & 0377) && !IS_ASCII_OCTET((*pp)[0]));
}


/*
* Get the value of a UTF-8 character.
*/
Expand Down
40 changes: 12 additions & 28 deletions filename.c
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ bin_file(f)
int bin_count = 0;
char data[256];
char* p;
char* pend;
char* edata;

if (!seekable(f))
return (0);
Expand All @@ -469,15 +469,19 @@ bin_file(f)
n = read(f, data, sizeof(data));
if (n <= 0)
return (0);
if (utf_mode)
edata = &data[n];
for (p = data; p < edata; )
{
bin_count = utf_bin_count(data, n);
} else
{
pend = &data[n];
for (p = data; p < pend; )
if (utf_mode && !is_utf8_well_formed(p, edata-data))
{
bin_count++;
utf_skip_to_lead(&p, edata);
} else
{
if (bin_char_in_string(&p, pend))
LWCHAR c = step_char(&p, +1, edata);
if (ctldisp == OPT_ONPLUS && IS_CSI_START(c))
skip_ansi(&p, edata);
else if (binary_char(c))
bin_count++;
}
}
Expand All @@ -488,26 +492,6 @@ bin_file(f)
return (bin_count > 5);
}

/*
* Determine if the next char in a string is binary.
*/
public int
bin_char_in_string(pp, limit)
char **pp;
constant char *limit;
{
LWCHAR c = step_char(pp, +1, limit);
if (ctldisp == OPT_ONPLUS && IS_CSI_START(c))
{
/* Skip the CSI sequence. */
do {
c = step_char(pp, +1, limit);
} while (*pp < limit && is_ansi_middle(c));
} else if (binary_char(c))
return (1);
return (0);
}

/*
* Try to determine the size of a file by seeking to the end.
*/
Expand Down
19 changes: 18 additions & 1 deletion line.c
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ is_ansi_end(ch)
}

/*
*
* Can a char appear in an ANSI escape sequence, before the end char?
*/
public int
is_ansi_middle(ch)
Expand All @@ -584,6 +584,23 @@ is_ansi_middle(ch)
return (strchr(mid_ansi_chars, (char) ch) != NULL);
}

/*
* Skip past an ANSI escape sequence.
* pp is initially positioned just after the CSI_START char.
*/
public void
skip_ansi(pp, limit)
char **pp;
constant char *limit;
{
LWCHAR c;
do {
c = step_char(pp, +1, limit);
} while (*pp < limit && is_ansi_middle(c));
/* Note that we discard final char, for which is_ansi_middle is false. */
}


/*
* Append a character and attribute to the line buffer.
*/
Expand Down

0 comments on commit 1566a4f

Please sign in to comment.