axmol/cocos2dx/platform/bada/Gbk_Unicode.c

541 lines
11 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Gbk_Unicode.h
*
* Created on: 2011-9-12
* Author: dumganhar
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "Gbk_Unicode.h"
#define LOGI //printf
#define USE_MEMORY
#ifdef USE_MEMORY
#include "gbk_table.h"
#include "unicode_table.h"
#endif
#undef SAFE_FREE
#define SAFE_FREE(p) do \
{\
if ((p) != NULL) \
{ \
free((p)); \
(p) = NULL; \
} \
}while(0)
#define UNICODE1_BEGIN (0x3000)
#define UNICODE1_END (0x9FA5)
#define UNICODE1_TOTAL (UNICODE1_END-UNICODE1_BEGIN+1)
#define UNICODE2_BEGIN (0xFF00)
#define UNICODE2_END (0xFFEF)
#define UNICODE2_TOTAL (UNICODE2_END-UNICODE2_BEGIN+1)
#define UNICODE3_BEGIN (0x2000)
#define UNICODE3_END (0x206F)
#define UNICODE3_TOTAL (UNICODE3_END-UNICODE3_BEGIN+1)
#define FONT_ROW_BEGIN 129
#define FONT_ROW_END 254
#define FONT_COL_BEGIN 64
#define FONT_COL_END 254
#define FONT_TOTAL (((FONT_ROW_END)-(FONT_ROW_BEGIN)+1)*((FONT_COL_END)-(FONT_COL_BEGIN)+1))
static unsigned short* g_uni_table = NULL;
static unsigned short* g_gbk_table1 = NULL;
static unsigned short* g_gbk_table2 = NULL;
static unsigned short* g_gbk_table3 = NULL;
static int s_bInit = 0;
static int myWcslen(const unsigned short* str)
{
int i=0;
while(*str++) i++;
return i;
}
int InitGbkUnicodeTable(const char* szTablePath)
{
#ifndef USE_MEMORY
int ret = 0;
FILE* fp = NULL;
if (s_bInit)
return 0;
fp = fopen(szTablePath, "rb");
if (fp == NULL)
{
return 0;
}
g_uni_table = (unsigned short*)malloc(FONT_TOTAL*2);
memset((void*)g_uni_table, 0, FONT_TOTAL*2);
g_gbk_table1 = (unsigned short*)malloc(UNICODE1_TOTAL*2);
memset((void*)g_gbk_table1, 0, UNICODE1_TOTAL*2);
g_gbk_table2 = (unsigned short*)malloc(UNICODE2_TOTAL*2);
memset((void*)g_gbk_table2, 0, UNICODE2_TOTAL*2);
g_gbk_table3 = (unsigned short*)malloc(UNICODE3_TOTAL*2);
memset((void*)g_gbk_table3, 0, UNICODE3_TOTAL*2);
ret = fread((void*)g_uni_table, FONT_TOTAL*2, 1, fp);
ret = fread((void*)g_gbk_table1, UNICODE1_TOTAL*2, 1, fp);
ret = fread((void*)g_gbk_table2, UNICODE2_TOTAL*2, 1, fp);
ret = fread((void*)g_gbk_table3, UNICODE3_TOTAL*2, 1, fp);
fclose(fp);
s_bInit = 1;
#else
g_uni_table = g_uni_table_array;
g_gbk_table1 = g_gbk_table1_array;
g_gbk_table2 = g_gbk_table2_array;
g_gbk_table3 = g_gbk_table3_array;
s_bInit = 1;
#endif
return 1;
}
void ReleaseGbkUnicodeTable(void)
{
#ifndef USE_MEMORY
if (s_bInit) {
if (g_uni_table != NULL)
{
free((void*)g_uni_table);
g_uni_table = NULL;
}
if (g_gbk_table1 != NULL)
{
free((void*)g_gbk_table1);
g_gbk_table1 = NULL;
}
if (g_gbk_table2 != NULL)
{
free((void*)g_gbk_table2);
g_gbk_table2 = NULL;
}
if (g_gbk_table3 != NULL)
{
free((void*)g_gbk_table3);
g_gbk_table3 = NULL;
}
s_bInit = 0;
}
#endif
}
#define GET_GBK_WORD(table, s, e) \
if (pUnicodeIn[i] >= s && pUnicodeIn[i] <= e) \
{ \
iIndex = pUnicodeIn[i]-s; \
oneGbk = table[iIndex]; \
pGbk = (unsigned char*)&oneGbk; \
pGBKOut[j++] = *(pGbk+1); \
pGBKOut[j++] = *pGbk; \
continue; \
}
int MyUnicodeToGBK(char* pGBKOut, int iGbkBufSize, const unsigned short* pUnicodeIn)
{
unsigned char* pOneUnicode = NULL;
int len = 0;
int iIndex = 0;
int i = 0, j = 0;
if (!s_bInit)
return 0;
if (pUnicodeIn == NULL || pGBKOut == NULL || iGbkBufSize <= 2)
{
LOGI("pUnicodeIn == NULL || pGBKOut == NULL || iGbkBufSize <= 2");
return 0;
}
memset(pGBKOut, 0, iGbkBufSize);
len = myWcslen(pUnicodeIn);
for (i = 0, j = 0; i < len; i++)
{
unsigned char* pGbk = NULL;
unsigned short oneGbk = 0;
pOneUnicode = (unsigned char*)&pUnicodeIn[i];
if (j >= iGbkBufSize-1)
{
break;
}
if (pOneUnicode[1] == 0)
{// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD><EFBFBD>
if (pOneUnicode[0] == 0xb7)
{
pGBKOut[j++] = 0xa1;
pGBKOut[j++] = 0xa4;
}
else if (pOneUnicode[0] == 0xB0)
{
pGBKOut[j++] = 0xA1;
pGBKOut[j++] = 0xE3;
}
else
{
pGBKOut[j++] = pOneUnicode[0];
}
}
else
{
GET_GBK_WORD(g_gbk_table1, UNICODE1_BEGIN, UNICODE1_END);// <20><><EFBFBD><EFBFBD>
GET_GBK_WORD(g_gbk_table2, UNICODE2_BEGIN, UNICODE2_END);// ȫ<><C8AB><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD><EFBFBD>
GET_GBK_WORD(g_gbk_table3, UNICODE3_BEGIN, UNICODE3_END);// <20><><EFBFBD>ñ<EFBFBD><C3B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
}
}
j = j > iGbkBufSize-1 ? iGbkBufSize-1 : j;
pGBKOut[j] = '\0';
return 1;
}
int MyGBKToUnicode(unsigned short* pUnicodeOut, int iUniBufSize, const char* pGBKIn)
{
int i = 0;
int j = 0;
int len = 0;
int iIndex = 0;
unsigned char* pUnicode = NULL;
unsigned short oneGbkHanzi = 0;
unsigned char* pOneGbkHanzi = (unsigned char*)&oneGbkHanzi;
const unsigned char* pGBKInU = (const unsigned char*)pGBKIn;
if (!s_bInit)
return 0;
if (pUnicodeOut == NULL || pGBKIn == NULL || iUniBufSize <= 3)
{
LOGI("pUnicodeOut == NULL || pGBKIn == NULL || iUniBufSize <= 3");
return 0;
}
memset(pUnicodeOut, 0, iUniBufSize);
len = strlen(pGBKIn);
for (i = 0; i < len; i++)
{
if (j >= iUniBufSize/2-1)
{
break;
}
pUnicode = (unsigned char*)&pUnicodeOut[j];
if (pGBKInU[i] >= 0x00 && pGBKInU[i] <= 0x80)
{// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD><EFBFBD>
*(pUnicode++) = pGBKInU[i];
*pUnicode = 0x00;
++j;
}
else if ((pGBKInU[i] >= 0x81 && pGBKInU[i] <= 0xFE)
&&(pGBKInU[i+1] >= 0x40 && pGBKInU[i+1] <= 0xFE)
&& (pGBKInU[i+1] != 0x7F)
)
{// <20><><EFBFBD><EFBFBD>
pOneGbkHanzi[1] = pGBKInU[i];
pOneGbkHanzi[0] = pGBKInU[i+1];
iIndex = (pOneGbkHanzi[1]-0x81)*191+(pOneGbkHanzi[0]-0x40);
pUnicodeOut[j] = g_uni_table[iIndex];
++i;
++j;
}
}
// LOGI("unicode len = %d", j);
j = j > iUniBufSize/2-1 ? iUniBufSize/2-1 : j;
pUnicodeOut[j] = L'\0';
return 1;
}
static int GetUtf8Len(unsigned short* pUniStr)
{
int wideLen = 0;
int len = 0;
int i = 0;
if (pUniStr == NULL || (wideLen = myWcslen(pUniStr)) <= 0)
return 0;
for (i = 0; i < wideLen; i++)
{
unsigned short c = pUniStr[i];
if (c < 0x80)
{
len += 1;
}
else if (c < 0x800)
{
len += 2;
}
else if (c < 0x10000)
{
len += 3;
}
else if (c < 0x200000)
{
len += 4;
}
else if (c < 0x4000000)
{
len += 5;
}
else
{
len += 6;
}
}
return len;
}
static int unichar_to_utf8 (unsigned short c, char* outbuf)
{
/* If this gets modified, also update the copy in g_string_insert_unichar() */
size_t len = 0;
int first;
int i;
if (c < 0x80)
{
first = 0;
len = 1;
}
else if (c < 0x800)
{
first = 0xc0;
len = 2;
}
else if (c < 0x10000)
{
first = 0xe0;
len = 3;
}
else if (c < 0x200000)
{
first = 0xf0;
len = 4;
}
else if (c < 0x4000000)
{
first = 0xf8;
len = 5;
}
else
{
first = 0xfc;
len = 6;
}
if (outbuf)
{
for (i = len - 1; i > 0; --i)
{
outbuf[i] = (c & 0x3f) | 0x80;
c >>= 6;
}
outbuf[0] = c | first;
}
return len;
}
int MyGBKToUTF8_M(char** ppszOutUtf8, char* pszGbk)
{
int i = 0;
int iRet = 0;
int wideLenGuess = 0;
int wideLen = 0;
int utfLen = 0;
unsigned short* pUnicode = NULL;
if (ppszOutUtf8 == NULL || pszGbk == NULL || strlen(pszGbk) <= 0)
return 0;
wideLenGuess = (strlen(pszGbk)+1)*2;
pUnicode = (unsigned short*)malloc(wideLenGuess);
if (pUnicode == NULL)
{
LOGI("int MyGBKToUTF8_M malloc unicode buf fails!");
return 0;
}
memset(pUnicode, 0, wideLenGuess);
do {
if (0 == MyGBKToUnicode(pUnicode, wideLenGuess, pszGbk))
{
LOGI("MyGBKToUnicode fails in MyGBKToUTF8_M");
break;
}
wideLen = myWcslen(pUnicode);
utfLen = GetUtf8Len(pUnicode);
*ppszOutUtf8 = (char*)malloc(utfLen+1);
if (*ppszOutUtf8 == NULL)
{
break;
}
memset(*ppszOutUtf8, 0, utfLen+1);
for (i = 0; i < wideLen; i++)
{
char szTmpUtf8[7] = {0};
unichar_to_utf8(pUnicode[i], szTmpUtf8);
strcat(*ppszOutUtf8, szTmpUtf8);
}
iRet = 1;
}while (0);
SAFE_FREE(pUnicode);
return iRet;
}
/*************************************************************************************************
* <20><>UTF8<46><38><EFBFBD><EFBFBD>ת<EFBFBD><D7AA><EFBFBD><EFBFBD>Unicode<64><65>UCS-2<><32><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
* char* pInput ָ<><D6B8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>'\0'<27><>β<EFBFBD><CEB2><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
* char** ppOutput ָ<><D6B8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>ָ<EFBFBD><D6B8><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
* <20><><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>
* <20><><EFBFBD><EFBFBD>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Unicode<64>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>򷵻<EFBFBD>-1
* ע<>
* 1. UTF8û<38><C3BB><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E2A3AC><EFBFBD><EFBFBD>Unicode<64>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD><EFBFBD><EFBFBD>
* <20>ֽ<EFBFBD><D6BD><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD><CEAA><EFBFBD>ˣ<EFBFBD>Big Endian<61><6E><EFBFBD><EFBFBD>С<EFBFBD>ˣ<EFBFBD>Little Endian<61><6E><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD>
* <20><>Intel<65><6C><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD>С<EFBFBD>˷<EFBFBD><CBB7><EFBFBD>ʾ<EFBFBD><CABE><EFBFBD><EFBFBD><EFBFBD>˱<EFBFBD><CBB1><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD>С<EFBFBD>˷<EFBFBD><CBB7><EFBFBD>ʾ<EFBFBD><CABE><EFBFBD><EFBFBD><EFBFBD>͵<EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>λ<EFBFBD><CEBB>
* 2. <20>ڵ<EFBFBD><DAB5>ñ<EFBFBD><C3B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD>ֶ<EFBFBD><D6B6>ͷ<EFBFBD> *ppOutput ָ<><D6B8><EFBFBD><EFBFBD><EFBFBD>ڴ棬<DAB4><E6A3AC><EFBFBD>򽫻<EFBFBD><F2BDABBB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD>й©<D0B9><C2A9>
**************************************************************************************************/
static int utf8_to_unicode(char* pInput, char** ppOutput)
{
int outputSize = 0; //<2F><>¼ת<C2BC><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Unicode<64>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
char *tmp = NULL;
if (pInput == NULL || strlen(pInput) <= 0)
return -1;
*ppOutput = (char *)malloc(strlen(pInput) * 2); //Ϊ<><CEAA><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E3B9BB><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4>ռ<EFBFBD>
memset(*ppOutput, 0, strlen(pInput) * 2);
tmp = *ppOutput; //<2F><>ʱ<EFBFBD><CAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڱ<EFBFBD><DAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>
while (*pInput)
{
if (*pInput > 0x00 && *pInput <= 0x7F) //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD>Ӣ<EFBFBD><D3A2><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD>
{
*tmp = *pInput;
tmp++;
*tmp = 0; //С<>˷<EFBFBD><CBB7><EFBFBD>ʾ<EFBFBD><CABE><EFBFBD>ڸߵ<DAB8>ַ<EFBFBD>0
}
else if (((*pInput) & 0xE0) == 0xC0) //<2F><><EFBFBD><EFBFBD>˫<EFBFBD>ֽ<EFBFBD>UTF8<46>ַ<EFBFBD>
{
char high = *pInput;
char low = 0;
pInput++;
low = *pInput;
if ((low & 0xC0) != 0x80) //<2F><><EFBFBD><EFBFBD><EFBFBD>Ƿ<EFBFBD>Ϊ<EFBFBD>Ϸ<EFBFBD><CFB7><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD>ʾ
{
return -1; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>򱨴<EFBFBD>
}
*tmp = (high << 6) + (low & 0x3F);
tmp++;
*tmp = (high >> 2) & 0x07;
}
else if (((*pInput) & 0xF0) == 0xE0) //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>UTF8<46>ַ<EFBFBD>
{
char high = *pInput;
char middle = 0;
char low = 0;
pInput++;
middle = *pInput;
pInput++;
low = *pInput;
if (((middle & 0xC0) != 0x80) || ((low & 0xC0) != 0x80))
{
return -1;
}
*tmp = (middle << 6) + (low & 0x7F);
tmp++;
*tmp = (high << 4) + ((middle >> 2) & 0x0F);
}
else //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD>
{
return -1;
}
pInput ++;
tmp ++;
outputSize += 2;
}
*tmp = 0;
tmp++;
*tmp = 0;
return outputSize;
}
int MyUTF8ToGBK_M(char** ppszOutGbk, char* pszUtf8)
{
int iRet = 0;
int unicodeBytes = 0;
unsigned short* pUnicodeStrChar = NULL;
if (ppszOutGbk == NULL || pszUtf8 == NULL || strlen(pszUtf8) <= 0)
return 0;
do {
unicodeBytes = utf8_to_unicode(pszUtf8, (char**)&pUnicodeStrChar);
if (unicodeBytes <= 0)
{
break;
}
*ppszOutGbk = (char*)malloc(unicodeBytes+1);
if (*ppszOutGbk == NULL)
{
break;
}
memset(*ppszOutGbk, 0, unicodeBytes+1);
if (0 == MyUnicodeToGBK(*ppszOutGbk, unicodeBytes+1, pUnicodeStrChar))
{
LOGI("MyUnicodeToGBK fails in MyUTF8ToGBK_M");
SAFE_FREE(*ppszOutGbk);
break;
}
iRet = 1;
} while(0);
SAFE_FREE(pUnicodeStrChar);
return iRet;
}