Skip to content

Commit

Permalink
Added OSUtf.h header for utf8 conversion.
Browse files Browse the repository at this point in the history
Wrote mbrtowc, mbsrtowcs, wcrtomb, wctob to wrap OSUtf.
  • Loading branch information
Chadderz121 committed Mar 31, 2014
1 parent 955c49a commit e24ee5f
Show file tree
Hide file tree
Showing 3 changed files with 212 additions and 4 deletions.
40 changes: 40 additions & 0 deletions bslug_include/rvl/OSUtf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/* OSUtf.h
* by Alex Chadwick
*
* Copyright (C) 2014, Alex Chadwick
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

/* definitions of symbols inferred to exist in the OSUtf.h header file for
* which the brainslug symbol information is available. */

#ifndef _RVL_OSUTF_H_
#define _RVL_OSUTF_H_

/* Converts the utf8 encoded character at utf8 to a 32 bit utf32 integer and
* stores it at utf32. Returns utf8 advanced by the number of characters read on
* success, or NULL on error. If utf8 points to the NULL character, it returns
* utf8. */
char *OSUTF8to32(const char *utf8, int *utf32);
/* Converts the utf32 character to a single byte ANSI character. Returns '\0' on
* error. */
char OSUTF32toANSI(int utf32);

#endif /* _RVL_OSUTF_H_ */
162 changes: 158 additions & 4 deletions bslug_include/wchar.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#define __need_wint_t
#include <stddef.h>
#include <stdint.h>
#include <rvl/OSUtf.h>

#ifndef NULL
#define NULL 0
Expand All @@ -57,9 +58,162 @@ typedef struct {
} __value; /* Value so far. */
} mbstate_t;

size_t mbrtowc(wchar_t* pwc, const char* pmb, size_t max, mbstate_t* ps);
size_t mbsrtowcs(wchar_t* dest, const char** src, size_t max, mbstate_t* ps);
size_t wcrtomb(char* pmb, wchar_t wc, mbstate_t* ps);
int wctob(wint_t wc);
static inline size_t mbrtowc(
wchar_t *pwc, const char *pmb, size_t max, mbstate_t *ps);
static inline size_t mbsrtowcs(
wchar_t *dest, const char **src, size_t max, mbstate_t *ps);
static inline size_t wcrtomb(char *pmb, wchar_t wc, mbstate_t *ps);
static inline int wctob(wint_t wc);

static inline size_t mbrtowc(
wchar_t *pwc, const char *pmb, size_t max, mbstate_t *ps) {
static mbstate_t internal_state;
int wchar;
size_t bytes, i;
char *res;

if (ps == NULL)
ps = &internal_state;
if (pmb == NULL) {
/* finish conversion. */
if (ps->__count == 0)
return (size_t)0;
else
return (size_t)-1;
}
/* We should not have a full (or overfull) buffer. */
if (ps->__count >= sizeof(ps->__value.__wchb))
return (size_t)-1;
/* we need some bytes! */
if (max == 0)
return (size_t)-2;

/* copy bytes to buffer. */
for (i = 0;
i < max && ps->__count < sizeof(ps->__value.__wchb);
i++, ps->__count++) {
ps->__value.__wchb[ps->__count] = pmb[i];
}
/* zero out remaining buffer space. */
for (i = ps->__count; i < sizeof(ps->__value.__wchb); i++) {
ps->__value.__wchb[i] = 0;
}

res = OSUTF8to32(ps->__value.__wchb, &wchar);

if (res == NULL) {
/* conversion error. */
if (ps->__count < sizeof(ps->__value.__wchb)) {
/* probably because we don't have enough chars. */
return (size_t)-2;
}
return (size_t)-1;
}

bytes = res - ps->__value.__wchb;

if (bytes > sizeof(ps->__value.__wchb)) {
/* this should be impossible. Their implementation of OSUTF8to32. Never
* reads beyond 4 bytes. */
return (size_t)-1;
}
if (bytes > ps->__count) {
/* this should be impossible. 0 is never valid, so our zero padding
* should cause a conversion error. */
return (size_t)-1;
}

if (bytes == 0) {
/* we've ended the string. */
if (pwc)
*pwc = L'\0';
/* reset shift state. */
ps->__count = 0;
return bytes;
}

for (i = 0; i < ps->__count - bytes; i++) {
ps->__value.__wchb[i] = ps->__value.__wchb[i + bytes];
}
ps->__count -= bytes;

if (pwc)
*pwc = (wchar_t)wchar;
return bytes;
}
static inline size_t mbsrtowcs(
wchar_t *dest, const char **src, size_t max, mbstate_t *ps) {
size_t i;
const char *cur;

cur = *src;
for (i = 0; i < max; i++) {
int utf32;
const char *end;

end = OSUTF8to32(cur, &utf32);
if (end == NULL) {
*src = cur;
return (size_t)-1;
}
if (cur == end) {
*src = NULL;
if (dest)
*dest = L'\0';
return i;
}

if (dest) {
*dest = (whcar_t)utf32;
dest++;
}
cur = end;
}

*src = cur;
return i;
}
static inline size_t wcrtomb(char *pmb, wchar_t wc, mbstate_t *ps) {
size_t bytes;

if ((in >= 0x0000D800 && in <= 0x0000DFFF)
|| in > 0x00200000 || in == 0x0000FFFF || in == 0x0000FFFE)
return (size_t)0;

if (in < 0x80)
bytes = 1;
else if (in < 0x800)
bytes = 2;
else if (in < 0x10000)
bytes = 3;
else if (in < 0x200000)
bytes = 4;

switch (bytes) {
case 1:
*pmb = (unsigned char)in;
break;
case 2:
*pmb++ = (unsigned char)((in >> 6) | 0x000000C0);
*pmb++ = (unsigned char)((in & 0x0000003F) | 0x00000080);
break;
case 3:
*pmb++ = (unsigned char)((in >> 12) | 0x000000E0);
*pmb++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080);
*pmb++ = (unsigned char)((in & 0x0000003F) | 0x00000080);
break;
case 4:
*pmb++ = (unsigned char)((in >> 18) | 0x000000F0);
*pmb++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080);
*pmb++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080);
*pmb++ = (unsigned char)((in & 0x0000003F) | 0x00000080);
break;
}

return bytes;
}
static inline int wctob(wint_t wc) {
return OSUTF32toANSI(wc);
}

#endif /* _WCHAR_H_ */
14 changes: 14 additions & 0 deletions symbols/OSUtf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Symbols in the rvl/OSUtf.h header -->
<symbols>
<symbol name="OSUTF8to32" size="0x110" offset="0x0" >
<data>
88C30000 2C060000 41820008 38630001
</data>
</symbol>
<symbol name="OSUTF32toANSI" size="0x78" offset="0x0" >
<data>
280300FF 4081000C 38600000 4E800020
</data>
</symbol>
</symbols>

0 comments on commit e24ee5f

Please sign in to comment.