axmol/cocos2dx/platform/bada/Gbk_Unicode.c

450 lines
9.4 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Gbk_Unicode.h
*
* Created on: 2011-9-12
* Author: dumganhar
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "Gbk_Unicode.h"
#include "gbk_table.h"
#include "unicode_table.h"
#define LOGI //printf
#undef SAFE_FREE
#define SAFE_FREE(p) do \
{\
if ((p) != NULL) \
{ \
free((p)); \
(p) = NULL; \
} \
}while(0)
#define UNICODE1_BEGIN (0x3000)
#define UNICODE1_END (0x9FA5)
#define UNICODE1_TOTAL (UNICODE1_END-UNICODE1_BEGIN+1)
#define UNICODE2_BEGIN (0xFF00)
#define UNICODE2_END (0xFFEF)
#define UNICODE2_TOTAL (UNICODE2_END-UNICODE2_BEGIN+1)
#define UNICODE3_BEGIN (0x2000)
#define UNICODE3_END (0x206F)
#define UNICODE3_TOTAL (UNICODE3_END-UNICODE3_BEGIN+1)
#define FONT_ROW_BEGIN 129
#define FONT_ROW_END 254
#define FONT_COL_BEGIN 64
#define FONT_COL_END 254
#define FONT_TOTAL (((FONT_ROW_END)-(FONT_ROW_BEGIN)+1)*((FONT_COL_END)-(FONT_COL_BEGIN)+1))
static int myWcslen(const unsigned short* str)
{
int i=0;
while(*str++) i++;
return i;
}
#define GET_GBK_WORD(table, s, e) \
if (pUnicodeIn[i] >= s && pUnicodeIn[i] <= e) \
{ \
iIndex = pUnicodeIn[i]-s; \
oneGbk = table[iIndex]; \
pGbk = (unsigned char*)&oneGbk; \
pGBKOut[j++] = *(pGbk+1); \
pGBKOut[j++] = *pGbk; \
continue; \
}
int MyUnicodeToGBK(char* pGBKOut, int iGbkBufSize, const unsigned short* pUnicodeIn)
{
unsigned char* pOneUnicode = NULL;
int len = 0;
int iIndex = 0;
int i = 0, j = 0;
if (pUnicodeIn == NULL || pGBKOut == NULL || iGbkBufSize <= 2)
{
LOGI("pUnicodeIn == NULL || pGBKOut == NULL || iGbkBufSize <= 2");
return 0;
}
memset(pGBKOut, 0, iGbkBufSize);
len = myWcslen(pUnicodeIn);
for (i = 0, j = 0; i < len; i++)
{
unsigned char* pGbk = NULL;
unsigned short oneGbk = 0;
pOneUnicode = (unsigned char*)&pUnicodeIn[i];
if (j >= iGbkBufSize-1)
{
break;
}
if (pOneUnicode[1] == 0)
{// °ë½Ç×Öĸ·ûºÅ
if (pOneUnicode[0] == 0xb7)
{
pGBKOut[j++] = 0xa1;
pGBKOut[j++] = 0xa4;
}
else if (pOneUnicode[0] == 0xB0)
{
pGBKOut[j++] = 0xA1;
pGBKOut[j++] = 0xE3;
}
else
{
pGBKOut[j++] = pOneUnicode[0];
}
}
else
{
GET_GBK_WORD(g_gbk_table1_array, UNICODE1_BEGIN, UNICODE1_END);// ºº×Ö
GET_GBK_WORD(g_gbk_table2_array, UNICODE2_BEGIN, UNICODE2_END);// È«½Ç×Öĸ·ûºÅ
GET_GBK_WORD(g_gbk_table3_array, UNICODE3_BEGIN, UNICODE3_END);// ³£Óñêµã·ûºÅ
}
}
j = j > iGbkBufSize-1 ? iGbkBufSize-1 : j;
pGBKOut[j] = '\0';
return 1;
}
int MyGBKToUnicode(unsigned short* pUnicodeOut, int iUniBufSize, const char* pGBKIn)
{
int i = 0;
int j = 0;
int len = 0;
int iIndex = 0;
unsigned char* pUnicode = NULL;
unsigned short oneGbkHanzi = 0;
unsigned char* pOneGbkHanzi = (unsigned char*)&oneGbkHanzi;
const unsigned char* pGBKInU = (const unsigned char*)pGBKIn;
if (pUnicodeOut == NULL || pGBKIn == NULL || iUniBufSize <= 3)
{
LOGI("pUnicodeOut == NULL || pGBKIn == NULL || iUniBufSize <= 3");
return 0;
}
memset(pUnicodeOut, 0, iUniBufSize);
len = strlen(pGBKIn);
for (i = 0; i < len; i++)
{
if (j >= iUniBufSize/2-1)
{
break;
}
pUnicode = (unsigned char*)&pUnicodeOut[j];
if (pGBKInU[i] >= 0x00 && pGBKInU[i] <= 0x80)
{// °ë½Ç×Öĸ·ûºÅ
*(pUnicode++) = pGBKInU[i];
*pUnicode = 0x00;
++j;
}
else if ((pGBKInU[i] >= 0x81 && pGBKInU[i] <= 0xFE)
&&(pGBKInU[i+1] >= 0x40 && pGBKInU[i+1] <= 0xFE)
&& (pGBKInU[i+1] != 0x7F)
)
{// ÖÐÎÄ
pOneGbkHanzi[1] = pGBKInU[i];
pOneGbkHanzi[0] = pGBKInU[i+1];
iIndex = (pOneGbkHanzi[1]-0x81)*191+(pOneGbkHanzi[0]-0x40);
pUnicodeOut[j] = g_uni_table_array[iIndex];
++i;
++j;
}
}
// LOGI("unicode len = %d", j);
j = j > iUniBufSize/2-1 ? iUniBufSize/2-1 : j;
pUnicodeOut[j] = L'\0';
return 1;
}
static int GetUtf8Len(unsigned short* pUniStr)
{
int wideLen = 0;
int len = 0;
int i = 0;
if (pUniStr == NULL || (wideLen = myWcslen(pUniStr)) <= 0)
return 0;
for (i = 0; i < wideLen; i++)
{
unsigned short c = pUniStr[i];
if (c < 0x80)
{
len += 1;
}
else if (c < 0x800)
{
len += 2;
}
else if (c < 0x10000)
{
len += 3;
}
else if (c < 0x200000)
{
len += 4;
}
else if (c < 0x4000000)
{
len += 5;
}
else
{
len += 6;
}
}
return len;
}
static int unichar_to_utf8 (unsigned short c, char* outbuf)
{
/* If this gets modified, also update the copy in g_string_insert_unichar() */
size_t len = 0;
int first;
int i;
if (c < 0x80)
{
first = 0;
len = 1;
}
else if (c < 0x800)
{
first = 0xc0;
len = 2;
}
else if (c < 0x10000)
{
first = 0xe0;
len = 3;
}
else if (c < 0x200000)
{
first = 0xf0;
len = 4;
}
else if (c < 0x4000000)
{
first = 0xf8;
len = 5;
}
else
{
first = 0xfc;
len = 6;
}
if (outbuf)
{
for (i = len - 1; i > 0; --i)
{
outbuf[i] = (c & 0x3f) | 0x80;
c >>= 6;
}
outbuf[0] = c | first;
}
return len;
}
int MyGBKToUTF8_M(char** ppszOutUtf8, char* pszGbk)
{
int i = 0;
int iRet = 0;
int wideLenGuess = 0;
int wideLen = 0;
int utfLen = 0;
unsigned short* pUnicode = NULL;
if (ppszOutUtf8 == NULL || pszGbk == NULL || strlen(pszGbk) <= 0)
return 0;
wideLenGuess = (strlen(pszGbk)+1)*2;
pUnicode = (unsigned short*)malloc(wideLenGuess);
if (pUnicode == NULL)
{
LOGI("int MyGBKToUTF8_M malloc unicode buf fails!");
return 0;
}
memset(pUnicode, 0, wideLenGuess);
do {
if (0 == MyGBKToUnicode(pUnicode, wideLenGuess, pszGbk))
{
LOGI("MyGBKToUnicode fails in MyGBKToUTF8_M");
break;
}
wideLen = myWcslen(pUnicode);
utfLen = GetUtf8Len(pUnicode);
*ppszOutUtf8 = (char*)malloc(utfLen+1);
if (*ppszOutUtf8 == NULL)
{
break;
}
memset(*ppszOutUtf8, 0, utfLen+1);
for (i = 0; i < wideLen; i++)
{
char szTmpUtf8[7] = {0};
unichar_to_utf8(pUnicode[i], szTmpUtf8);
strcat(*ppszOutUtf8, szTmpUtf8);
}
iRet = 1;
}while (0);
SAFE_FREE(pUnicode);
return iRet;
}
/*************************************************************************************************
* ½«UTF8±àÂëת»»³ÉUnicode£¨UCS-2£©±àÂë
* ²ÎÊý£º
* char* pInput Ö¸ÏòÊäÈë×Ö·û´®£¨ÒÔ'\0'½á⣩µÄÖ¸Õë
* char** ppOutput Ö¸ÏòÊä³ö×Ö·û´®Ö¸ÕëµÄÖ¸Õë
* ·µ»ØÖµ£º
* ·µ»Øת»»ºóµÄUnicode×Ö·û´®µÄ×Ö½ÚÊý£¬Èç¹û³ö´íÔò·µ»Ø-1
* ×¢Ò⣺
* 1. UTF8ûÓÐ×Ö½ÚÐòÎÊÌ⣬µ«ÊÇUnicode×Ö·ûÓÐ×Ö½ÚÐò£¬
* ×Ö½ÚÐò·ÖΪ´ó¶Ë£¨Big Endian£©ºÍС¶Ë£¨Little Endian£©Á½ÖÖ£¬
* ÔÚIntel´¦ÀíÆ÷ÖвÉÓÃС¶Ë·¨±íʾ£¬Òò´Ë±¾ÀýÖвÉÓÃС¶Ë·¨±íʾ¡££¨µÍµØÖ·´æµÍ룩
* 2. ÔÚµ÷Óñ¾º¯ÊýºóÐèÒªÊÖ¶¯ÊÍ·Å *ppOutput Ö¸ÏòµÄÄڴ棬·ñÔò½«»áÔì³ÉÄÚ´æй©¡£
**************************************************************************************************/
static int utf8_to_unicode(char* pInput, char** ppOutput)
{
int outputSize = 0; //¼Ç¼ת»»ºóµÄUnicode×Ö·û´®µÄ×Ö½ÚÊý
char *tmp = NULL;
if (pInput == NULL || strlen(pInput) <= 0)
return -1;
*ppOutput = (char *)malloc(strlen(pInput) * 2); //ΪÊä³ö×Ö·û´®·ÖÅä×ã¹»´óµÄÄÚ´æ¿Õ¼ä
memset(*ppOutput, 0, strlen(pInput) * 2);
tmp = *ppOutput; //ÁÙʱ±äÁ¿£¬ÓÃÓÚ±éÀúÊä³ö×Ö·û´®
while (*pInput)
{
if (*pInput > 0x00 && *pInput <= 0x7F) //´¦Àíµ¥×Ö½ÚUTF8×Ö·û£¨Ó¢ÎÄ×Öĸ¡¢Êý×Ö£©
{
*tmp = *pInput;
tmp++;
*tmp = 0; //С¶Ë·¨±íʾ£¬ÔڸߵØÖ·Ìî²¹0
}
else if (((*pInput) & 0xE0) == 0xC0) //´¦ÀíË«×Ö½ÚUTF8×Ö·û
{
char high = *pInput;
char low = 0;
pInput++;
low = *pInput;
if ((low & 0xC0) != 0x80) //¼ì²éÊÇ·ñΪºÏ·¨µÄUTF8×Ö·û±íʾ
{
return -1; //Èç¹û²»ÊÇÔò±¨´í
}
*tmp = (high << 6) + (low & 0x3F);
tmp++;
*tmp = (high >> 2) & 0x07;
}
else if (((*pInput) & 0xF0) == 0xE0) //´¦ÀíÈý×Ö½ÚUTF8×Ö·û
{
char high = *pInput;
char middle = 0;
char low = 0;
pInput++;
middle = *pInput;
pInput++;
low = *pInput;
if (((middle & 0xC0) != 0x80) || ((low & 0xC0) != 0x80))
{
return -1;
}
*tmp = (middle << 6) + (low & 0x7F);
tmp++;
*tmp = (high << 4) + ((middle >> 2) & 0x0F);
}
else //¶ÔÓÚÆäËû×Ö½ÚÊýµÄUTF8×Ö·û²»½øÐд¦Àí
{
return -1;
}
pInput ++;
tmp ++;
outputSize += 2;
}
*tmp = 0;
tmp++;
*tmp = 0;
return outputSize;
}
int MyUTF8ToGBK_M(char** ppszOutGbk, char* pszUtf8)
{
int iRet = 0;
int unicodeBytes = 0;
unsigned short* pUnicodeStrChar = NULL;
if (ppszOutGbk == NULL || pszUtf8 == NULL || strlen(pszUtf8) <= 0)
return 0;
do {
unicodeBytes = utf8_to_unicode(pszUtf8, (char**)&pUnicodeStrChar);
if (unicodeBytes <= 0)
{
break;
}
*ppszOutGbk = (char*)malloc(unicodeBytes+1);
if (*ppszOutGbk == NULL)
{
break;
}
memset(*ppszOutGbk, 0, unicodeBytes+1);
if (0 == MyUnicodeToGBK(*ppszOutGbk, unicodeBytes+1, pUnicodeStrChar))
{
LOGI("MyUnicodeToGBK fails in MyUTF8ToGBK_M");
SAFE_FREE(*ppszOutGbk);
break;
}
iRet = 1;
} while(0);
SAFE_FREE(pUnicodeStrChar);
return iRet;
}