mirror of https://github.com/axmolengine/axmol.git
450 lines
9.4 KiB
C
450 lines
9.4 KiB
C
/*
|
||
* Gbk_Unicode.h
|
||
*
|
||
* Created on: 2011-9-12
|
||
* Author: dumganhar
|
||
*/
|
||
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include "Gbk_Unicode.h"
|
||
#include "gbk_table.h"
|
||
#include "unicode_table.h"
|
||
|
||
#define LOGI //printf
|
||
|
||
#undef SAFE_FREE
|
||
#define SAFE_FREE(p) do \
|
||
{\
|
||
if ((p) != NULL) \
|
||
{ \
|
||
free((p)); \
|
||
(p) = NULL; \
|
||
} \
|
||
}while(0)
|
||
|
||
#define UNICODE1_BEGIN (0x3000)
|
||
#define UNICODE1_END (0x9FA5)
|
||
#define UNICODE1_TOTAL (UNICODE1_END-UNICODE1_BEGIN+1)
|
||
|
||
#define UNICODE2_BEGIN (0xFF00)
|
||
#define UNICODE2_END (0xFFEF)
|
||
#define UNICODE2_TOTAL (UNICODE2_END-UNICODE2_BEGIN+1)
|
||
|
||
#define UNICODE3_BEGIN (0x2000)
|
||
#define UNICODE3_END (0x206F)
|
||
#define UNICODE3_TOTAL (UNICODE3_END-UNICODE3_BEGIN+1)
|
||
|
||
#define FONT_ROW_BEGIN 129
|
||
#define FONT_ROW_END 254
|
||
#define FONT_COL_BEGIN 64
|
||
#define FONT_COL_END 254
|
||
#define FONT_TOTAL (((FONT_ROW_END)-(FONT_ROW_BEGIN)+1)*((FONT_COL_END)-(FONT_COL_BEGIN)+1))
|
||
|
||
static int myWcslen(const unsigned short* str)
|
||
{
|
||
int i=0;
|
||
while(*str++) i++;
|
||
return i;
|
||
}
|
||
|
||
#define GET_GBK_WORD(table, s, e) \
|
||
if (pUnicodeIn[i] >= s && pUnicodeIn[i] <= e) \
|
||
{ \
|
||
iIndex = pUnicodeIn[i]-s; \
|
||
oneGbk = table[iIndex]; \
|
||
pGbk = (unsigned char*)&oneGbk; \
|
||
pGBKOut[j++] = *(pGbk+1); \
|
||
pGBKOut[j++] = *pGbk; \
|
||
continue; \
|
||
}
|
||
|
||
|
||
int MyUnicodeToGBK(char* pGBKOut, int iGbkBufSize, const unsigned short* pUnicodeIn)
|
||
{
|
||
unsigned char* pOneUnicode = NULL;
|
||
int len = 0;
|
||
int iIndex = 0;
|
||
int i = 0, j = 0;
|
||
|
||
if (pUnicodeIn == NULL || pGBKOut == NULL || iGbkBufSize <= 2)
|
||
{
|
||
LOGI("pUnicodeIn == NULL || pGBKOut == NULL || iGbkBufSize <= 2");
|
||
return 0;
|
||
}
|
||
|
||
memset(pGBKOut, 0, iGbkBufSize);
|
||
|
||
len = myWcslen(pUnicodeIn);
|
||
for (i = 0, j = 0; i < len; i++)
|
||
{
|
||
unsigned char* pGbk = NULL;
|
||
unsigned short oneGbk = 0;
|
||
pOneUnicode = (unsigned char*)&pUnicodeIn[i];
|
||
if (j >= iGbkBufSize-1)
|
||
{
|
||
break;
|
||
}
|
||
|
||
if (pOneUnicode[1] == 0)
|
||
{// °ë½Ç×Öĸ·ûºÅ
|
||
if (pOneUnicode[0] == 0xb7)
|
||
{
|
||
pGBKOut[j++] = 0xa1;
|
||
pGBKOut[j++] = 0xa4;
|
||
}
|
||
else if (pOneUnicode[0] == 0xB0)
|
||
{
|
||
pGBKOut[j++] = 0xA1;
|
||
pGBKOut[j++] = 0xE3;
|
||
}
|
||
else
|
||
{
|
||
pGBKOut[j++] = pOneUnicode[0];
|
||
}
|
||
}
|
||
else
|
||
{
|
||
GET_GBK_WORD(g_gbk_table1_array, UNICODE1_BEGIN, UNICODE1_END);// ºº×Ö
|
||
GET_GBK_WORD(g_gbk_table2_array, UNICODE2_BEGIN, UNICODE2_END);// È«½Ç×Öĸ·ûºÅ
|
||
GET_GBK_WORD(g_gbk_table3_array, UNICODE3_BEGIN, UNICODE3_END);// ³£Óñêµã·ûºÅ
|
||
}
|
||
}
|
||
|
||
j = j > iGbkBufSize-1 ? iGbkBufSize-1 : j;
|
||
|
||
pGBKOut[j] = '\0';
|
||
return 1;
|
||
}
|
||
|
||
int MyGBKToUnicode(unsigned short* pUnicodeOut, int iUniBufSize, const char* pGBKIn)
|
||
{
|
||
int i = 0;
|
||
int j = 0;
|
||
int len = 0;
|
||
int iIndex = 0;
|
||
unsigned char* pUnicode = NULL;
|
||
unsigned short oneGbkHanzi = 0;
|
||
unsigned char* pOneGbkHanzi = (unsigned char*)&oneGbkHanzi;
|
||
const unsigned char* pGBKInU = (const unsigned char*)pGBKIn;
|
||
|
||
if (pUnicodeOut == NULL || pGBKIn == NULL || iUniBufSize <= 3)
|
||
{
|
||
LOGI("pUnicodeOut == NULL || pGBKIn == NULL || iUniBufSize <= 3");
|
||
return 0;
|
||
}
|
||
|
||
memset(pUnicodeOut, 0, iUniBufSize);
|
||
|
||
len = strlen(pGBKIn);
|
||
for (i = 0; i < len; i++)
|
||
{
|
||
if (j >= iUniBufSize/2-1)
|
||
{
|
||
break;
|
||
}
|
||
|
||
pUnicode = (unsigned char*)&pUnicodeOut[j];
|
||
if (pGBKInU[i] >= 0x00 && pGBKInU[i] <= 0x80)
|
||
{// °ë½Ç×Öĸ·ûºÅ
|
||
*(pUnicode++) = pGBKInU[i];
|
||
*pUnicode = 0x00;
|
||
++j;
|
||
}
|
||
else if ((pGBKInU[i] >= 0x81 && pGBKInU[i] <= 0xFE)
|
||
&&(pGBKInU[i+1] >= 0x40 && pGBKInU[i+1] <= 0xFE)
|
||
&& (pGBKInU[i+1] != 0x7F)
|
||
)
|
||
{// ÖÐÎÄ
|
||
pOneGbkHanzi[1] = pGBKInU[i];
|
||
pOneGbkHanzi[0] = pGBKInU[i+1];
|
||
iIndex = (pOneGbkHanzi[1]-0x81)*191+(pOneGbkHanzi[0]-0x40);
|
||
pUnicodeOut[j] = g_uni_table_array[iIndex];
|
||
++i;
|
||
++j;
|
||
}
|
||
}
|
||
// LOGI("unicode len = %d", j);
|
||
|
||
j = j > iUniBufSize/2-1 ? iUniBufSize/2-1 : j;
|
||
|
||
pUnicodeOut[j] = L'\0';
|
||
return 1;
|
||
}
|
||
|
||
static int GetUtf8Len(unsigned short* pUniStr)
|
||
{
|
||
int wideLen = 0;
|
||
int len = 0;
|
||
int i = 0;
|
||
|
||
if (pUniStr == NULL || (wideLen = myWcslen(pUniStr)) <= 0)
|
||
return 0;
|
||
|
||
for (i = 0; i < wideLen; i++)
|
||
{
|
||
unsigned short c = pUniStr[i];
|
||
if (c < 0x80)
|
||
{
|
||
len += 1;
|
||
}
|
||
else if (c < 0x800)
|
||
{
|
||
len += 2;
|
||
}
|
||
else if (c < 0x10000)
|
||
{
|
||
len += 3;
|
||
}
|
||
else if (c < 0x200000)
|
||
{
|
||
len += 4;
|
||
}
|
||
else if (c < 0x4000000)
|
||
{
|
||
len += 5;
|
||
}
|
||
else
|
||
{
|
||
len += 6;
|
||
}
|
||
}
|
||
return len;
|
||
}
|
||
|
||
static int unichar_to_utf8 (unsigned short c, char* outbuf)
|
||
{
|
||
/* If this gets modified, also update the copy in g_string_insert_unichar() */
|
||
size_t len = 0;
|
||
int first;
|
||
int i;
|
||
|
||
if (c < 0x80)
|
||
{
|
||
first = 0;
|
||
len = 1;
|
||
}
|
||
else if (c < 0x800)
|
||
{
|
||
first = 0xc0;
|
||
len = 2;
|
||
}
|
||
else if (c < 0x10000)
|
||
{
|
||
first = 0xe0;
|
||
len = 3;
|
||
}
|
||
else if (c < 0x200000)
|
||
{
|
||
first = 0xf0;
|
||
len = 4;
|
||
}
|
||
else if (c < 0x4000000)
|
||
{
|
||
first = 0xf8;
|
||
len = 5;
|
||
}
|
||
else
|
||
{
|
||
first = 0xfc;
|
||
len = 6;
|
||
}
|
||
|
||
if (outbuf)
|
||
{
|
||
for (i = len - 1; i > 0; --i)
|
||
{
|
||
outbuf[i] = (c & 0x3f) | 0x80;
|
||
c >>= 6;
|
||
}
|
||
outbuf[0] = c | first;
|
||
}
|
||
|
||
return len;
|
||
}
|
||
|
||
int MyGBKToUTF8_M(char** ppszOutUtf8, char* pszGbk)
|
||
{
|
||
int i = 0;
|
||
int iRet = 0;
|
||
int wideLenGuess = 0;
|
||
int wideLen = 0;
|
||
int utfLen = 0;
|
||
unsigned short* pUnicode = NULL;
|
||
if (ppszOutUtf8 == NULL || pszGbk == NULL || strlen(pszGbk) <= 0)
|
||
return 0;
|
||
wideLenGuess = (strlen(pszGbk)+1)*2;
|
||
pUnicode = (unsigned short*)malloc(wideLenGuess);
|
||
if (pUnicode == NULL)
|
||
{
|
||
LOGI("int MyGBKToUTF8_M malloc unicode buf fails!");
|
||
return 0;
|
||
}
|
||
|
||
memset(pUnicode, 0, wideLenGuess);
|
||
|
||
do {
|
||
if (0 == MyGBKToUnicode(pUnicode, wideLenGuess, pszGbk))
|
||
{
|
||
LOGI("MyGBKToUnicode fails in MyGBKToUTF8_M");
|
||
break;
|
||
}
|
||
wideLen = myWcslen(pUnicode);
|
||
utfLen = GetUtf8Len(pUnicode);
|
||
*ppszOutUtf8 = (char*)malloc(utfLen+1);
|
||
if (*ppszOutUtf8 == NULL)
|
||
{
|
||
break;
|
||
}
|
||
|
||
memset(*ppszOutUtf8, 0, utfLen+1);
|
||
for (i = 0; i < wideLen; i++)
|
||
{
|
||
char szTmpUtf8[7] = {0};
|
||
unichar_to_utf8(pUnicode[i], szTmpUtf8);
|
||
strcat(*ppszOutUtf8, szTmpUtf8);
|
||
}
|
||
|
||
iRet = 1;
|
||
}while (0);
|
||
|
||
SAFE_FREE(pUnicode);
|
||
|
||
return iRet;
|
||
}
|
||
|
||
/*************************************************************************************************
|
||
|
||
* ½«UTF8±àÂëת»»³ÉUnicode£¨UCS-2£©±àÂë
|
||
|
||
* ²ÎÊý£º
|
||
|
||
* char* pInput Ö¸ÏòÊäÈë×Ö·û´®£¨ÒÔ'\0'½á⣩µÄÖ¸Õë
|
||
|
||
* char** ppOutput Ö¸ÏòÊä³ö×Ö·û´®Ö¸ÕëµÄÖ¸Õë
|
||
|
||
* ·µ»ØÖµ£º
|
||
|
||
* ·µ»Øת»»ºóµÄUnicode×Ö·û´®µÄ×Ö½ÚÊý£¬Èç¹û³ö´íÔò·µ»Ø-1
|
||
|
||
* ×¢Ò⣺
|
||
|
||
* 1. UTF8ûÓÐ×Ö½ÚÐòÎÊÌ⣬µ«ÊÇUnicode×Ö·ûÓÐ×Ö½ÚÐò£¬
|
||
|
||
* ×Ö½ÚÐò·ÖΪ´ó¶Ë£¨Big Endian£©ºÍС¶Ë£¨Little Endian£©Á½ÖÖ£¬
|
||
|
||
* ÔÚIntel´¦ÀíÆ÷ÖвÉÓÃС¶Ë·¨±íʾ£¬Òò´Ë±¾ÀýÖвÉÓÃС¶Ë·¨±íʾ¡££¨µÍµØÖ·´æµÍ룩
|
||
|
||
* 2. ÔÚµ÷Óñ¾º¯ÊýºóÐèÒªÊÖ¶¯ÊÍ·Å *ppOutput Ö¸ÏòµÄÄڴ棬·ñÔò½«»áÔì³ÉÄÚ´æй©¡£
|
||
|
||
**************************************************************************************************/
|
||
|
||
static int utf8_to_unicode(char* pInput, char** ppOutput)
|
||
{
|
||
int outputSize = 0; //¼Ç¼ת»»ºóµÄUnicode×Ö·û´®µÄ×Ö½ÚÊý
|
||
char *tmp = NULL;
|
||
if (pInput == NULL || strlen(pInput) <= 0)
|
||
return -1;
|
||
|
||
*ppOutput = (char *)malloc(strlen(pInput) * 2); //ΪÊä³ö×Ö·û´®·ÖÅä×ã¹»´óµÄÄÚ´æ¿Õ¼ä
|
||
memset(*ppOutput, 0, strlen(pInput) * 2);
|
||
tmp = *ppOutput; //ÁÙʱ±äÁ¿£¬ÓÃÓÚ±éÀúÊä³ö×Ö·û´®
|
||
|
||
while (*pInput)
|
||
{
|
||
if (*pInput > 0x00 && *pInput <= 0x7F) //´¦Àíµ¥×Ö½ÚUTF8×Ö·û£¨Ó¢ÎÄ×Öĸ¡¢Êý×Ö£©
|
||
{
|
||
*tmp = *pInput;
|
||
tmp++;
|
||
*tmp = 0; //С¶Ë·¨±íʾ£¬ÔڸߵØÖ·Ìî²¹0
|
||
}
|
||
else if (((*pInput) & 0xE0) == 0xC0) //´¦ÀíË«×Ö½ÚUTF8×Ö·û
|
||
{
|
||
char high = *pInput;
|
||
char low = 0;
|
||
pInput++;
|
||
low = *pInput;
|
||
|
||
if ((low & 0xC0) != 0x80) //¼ì²éÊÇ·ñΪºÏ·¨µÄUTF8×Ö·û±íʾ
|
||
{
|
||
return -1; //Èç¹û²»ÊÇÔò±¨´í
|
||
}
|
||
|
||
*tmp = (high << 6) + (low & 0x3F);
|
||
tmp++;
|
||
*tmp = (high >> 2) & 0x07;
|
||
}
|
||
else if (((*pInput) & 0xF0) == 0xE0) //´¦ÀíÈý×Ö½ÚUTF8×Ö·û
|
||
{
|
||
char high = *pInput;
|
||
char middle = 0;
|
||
char low = 0;
|
||
|
||
pInput++;
|
||
middle = *pInput;
|
||
pInput++;
|
||
low = *pInput;
|
||
|
||
if (((middle & 0xC0) != 0x80) || ((low & 0xC0) != 0x80))
|
||
{
|
||
return -1;
|
||
}
|
||
|
||
*tmp = (middle << 6) + (low & 0x7F);
|
||
tmp++;
|
||
*tmp = (high << 4) + ((middle >> 2) & 0x0F);
|
||
}
|
||
else //¶ÔÓÚÆäËû×Ö½ÚÊýµÄUTF8×Ö·û²»½øÐд¦Àí
|
||
{
|
||
return -1;
|
||
}
|
||
|
||
pInput ++;
|
||
tmp ++;
|
||
outputSize += 2;
|
||
}
|
||
|
||
*tmp = 0;
|
||
tmp++;
|
||
*tmp = 0;
|
||
|
||
return outputSize;
|
||
}
|
||
|
||
int MyUTF8ToGBK_M(char** ppszOutGbk, char* pszUtf8)
|
||
{
|
||
int iRet = 0;
|
||
int unicodeBytes = 0;
|
||
unsigned short* pUnicodeStrChar = NULL;
|
||
|
||
if (ppszOutGbk == NULL || pszUtf8 == NULL || strlen(pszUtf8) <= 0)
|
||
return 0;
|
||
|
||
do {
|
||
unicodeBytes = utf8_to_unicode(pszUtf8, (char**)&pUnicodeStrChar);
|
||
if (unicodeBytes <= 0)
|
||
{
|
||
break;
|
||
}
|
||
|
||
*ppszOutGbk = (char*)malloc(unicodeBytes+1);
|
||
if (*ppszOutGbk == NULL)
|
||
{
|
||
break;
|
||
}
|
||
memset(*ppszOutGbk, 0, unicodeBytes+1);
|
||
if (0 == MyUnicodeToGBK(*ppszOutGbk, unicodeBytes+1, pUnicodeStrChar))
|
||
{
|
||
LOGI("MyUnicodeToGBK fails in MyUTF8ToGBK_M");
|
||
SAFE_FREE(*ppszOutGbk);
|
||
break;
|
||
}
|
||
iRet = 1;
|
||
} while(0);
|
||
|
||
SAFE_FREE(pUnicodeStrChar);
|
||
|
||
return iRet;
|
||
}
|