2011-09-20 02:19:26 +08:00
|
|
|
|
/*
|
|
|
|
|
* Gbk_Unicode.h
|
|
|
|
|
*
|
|
|
|
|
* Created on: 2011-9-12
|
|
|
|
|
* Author: dumganhar
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include "Gbk_Unicode.h"
|
|
|
|
|
#include "gbk_table.h"
|
|
|
|
|
#include "unicode_table.h"
|
2011-09-26 17:50:31 +08:00
|
|
|
|
|
|
|
|
|
#define LOGI //printf
|
2011-09-20 02:19:26 +08:00
|
|
|
|
|
|
|
|
|
#undef SAFE_FREE
|
|
|
|
|
#define SAFE_FREE(p) do \
|
|
|
|
|
{\
|
|
|
|
|
if ((p) != NULL) \
|
|
|
|
|
{ \
|
|
|
|
|
free((p)); \
|
|
|
|
|
(p) = NULL; \
|
|
|
|
|
} \
|
|
|
|
|
}while(0)
|
|
|
|
|
|
|
|
|
|
#define UNICODE1_BEGIN (0x3000)
|
|
|
|
|
#define UNICODE1_END (0x9FA5)
|
|
|
|
|
#define UNICODE1_TOTAL (UNICODE1_END-UNICODE1_BEGIN+1)
|
|
|
|
|
|
|
|
|
|
#define UNICODE2_BEGIN (0xFF00)
|
|
|
|
|
#define UNICODE2_END (0xFFEF)
|
|
|
|
|
#define UNICODE2_TOTAL (UNICODE2_END-UNICODE2_BEGIN+1)
|
|
|
|
|
|
|
|
|
|
#define UNICODE3_BEGIN (0x2000)
|
|
|
|
|
#define UNICODE3_END (0x206F)
|
|
|
|
|
#define UNICODE3_TOTAL (UNICODE3_END-UNICODE3_BEGIN+1)
|
|
|
|
|
|
|
|
|
|
#define FONT_ROW_BEGIN 129
|
|
|
|
|
#define FONT_ROW_END 254
|
|
|
|
|
#define FONT_COL_BEGIN 64
|
|
|
|
|
#define FONT_COL_END 254
|
|
|
|
|
#define FONT_TOTAL (((FONT_ROW_END)-(FONT_ROW_BEGIN)+1)*((FONT_COL_END)-(FONT_COL_BEGIN)+1))
|
|
|
|
|
|
|
|
|
|
static int myWcslen(const unsigned short* str)
|
|
|
|
|
{
|
|
|
|
|
int i=0;
|
|
|
|
|
while(*str++) i++;
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GET_GBK_WORD(table, s, e) \
|
|
|
|
|
if (pUnicodeIn[i] >= s && pUnicodeIn[i] <= e) \
|
|
|
|
|
{ \
|
|
|
|
|
iIndex = pUnicodeIn[i]-s; \
|
|
|
|
|
oneGbk = table[iIndex]; \
|
|
|
|
|
pGbk = (unsigned char*)&oneGbk; \
|
|
|
|
|
pGBKOut[j++] = *(pGbk+1); \
|
|
|
|
|
pGBKOut[j++] = *pGbk; \
|
|
|
|
|
continue; \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int MyUnicodeToGBK(char* pGBKOut, int iGbkBufSize, const unsigned short* pUnicodeIn)
|
|
|
|
|
{
|
|
|
|
|
unsigned char* pOneUnicode = NULL;
|
|
|
|
|
int len = 0;
|
|
|
|
|
int iIndex = 0;
|
|
|
|
|
int i = 0, j = 0;
|
|
|
|
|
|
|
|
|
|
if (pUnicodeIn == NULL || pGBKOut == NULL || iGbkBufSize <= 2)
|
|
|
|
|
{
|
|
|
|
|
LOGI("pUnicodeIn == NULL || pGBKOut == NULL || iGbkBufSize <= 2");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(pGBKOut, 0, iGbkBufSize);
|
|
|
|
|
|
|
|
|
|
len = myWcslen(pUnicodeIn);
|
|
|
|
|
for (i = 0, j = 0; i < len; i++)
|
|
|
|
|
{
|
|
|
|
|
unsigned char* pGbk = NULL;
|
|
|
|
|
unsigned short oneGbk = 0;
|
|
|
|
|
pOneUnicode = (unsigned char*)&pUnicodeIn[i];
|
|
|
|
|
if (j >= iGbkBufSize-1)
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pOneUnicode[1] == 0)
|
|
|
|
|
{// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD><EFBFBD>
|
|
|
|
|
if (pOneUnicode[0] == 0xb7)
|
|
|
|
|
{
|
|
|
|
|
pGBKOut[j++] = 0xa1;
|
|
|
|
|
pGBKOut[j++] = 0xa4;
|
|
|
|
|
}
|
|
|
|
|
else if (pOneUnicode[0] == 0xB0)
|
|
|
|
|
{
|
|
|
|
|
pGBKOut[j++] = 0xA1;
|
|
|
|
|
pGBKOut[j++] = 0xE3;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
pGBKOut[j++] = pOneUnicode[0];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2011-09-26 17:50:31 +08:00
|
|
|
|
GET_GBK_WORD(g_gbk_table1_array, UNICODE1_BEGIN, UNICODE1_END);// <20><><EFBFBD><EFBFBD>
|
|
|
|
|
GET_GBK_WORD(g_gbk_table2_array, UNICODE2_BEGIN, UNICODE2_END);// ȫ<><C8AB><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD><EFBFBD>
|
|
|
|
|
GET_GBK_WORD(g_gbk_table3_array, UNICODE3_BEGIN, UNICODE3_END);// <20><><EFBFBD>ñ<EFBFBD><C3B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
2011-09-20 02:19:26 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
j = j > iGbkBufSize-1 ? iGbkBufSize-1 : j;
|
|
|
|
|
|
|
|
|
|
pGBKOut[j] = '\0';
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int MyGBKToUnicode(unsigned short* pUnicodeOut, int iUniBufSize, const char* pGBKIn)
|
|
|
|
|
{
|
|
|
|
|
int i = 0;
|
|
|
|
|
int j = 0;
|
|
|
|
|
int len = 0;
|
|
|
|
|
int iIndex = 0;
|
|
|
|
|
unsigned char* pUnicode = NULL;
|
|
|
|
|
unsigned short oneGbkHanzi = 0;
|
|
|
|
|
unsigned char* pOneGbkHanzi = (unsigned char*)&oneGbkHanzi;
|
|
|
|
|
const unsigned char* pGBKInU = (const unsigned char*)pGBKIn;
|
|
|
|
|
|
|
|
|
|
if (pUnicodeOut == NULL || pGBKIn == NULL || iUniBufSize <= 3)
|
|
|
|
|
{
|
|
|
|
|
LOGI("pUnicodeOut == NULL || pGBKIn == NULL || iUniBufSize <= 3");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(pUnicodeOut, 0, iUniBufSize);
|
|
|
|
|
|
|
|
|
|
len = strlen(pGBKIn);
|
|
|
|
|
for (i = 0; i < len; i++)
|
|
|
|
|
{
|
|
|
|
|
if (j >= iUniBufSize/2-1)
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pUnicode = (unsigned char*)&pUnicodeOut[j];
|
|
|
|
|
if (pGBKInU[i] >= 0x00 && pGBKInU[i] <= 0x80)
|
|
|
|
|
{// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD><EFBFBD>
|
|
|
|
|
*(pUnicode++) = pGBKInU[i];
|
|
|
|
|
*pUnicode = 0x00;
|
|
|
|
|
++j;
|
|
|
|
|
}
|
|
|
|
|
else if ((pGBKInU[i] >= 0x81 && pGBKInU[i] <= 0xFE)
|
|
|
|
|
&&(pGBKInU[i+1] >= 0x40 && pGBKInU[i+1] <= 0xFE)
|
|
|
|
|
&& (pGBKInU[i+1] != 0x7F)
|
|
|
|
|
)
|
|
|
|
|
{// <20><><EFBFBD><EFBFBD>
|
|
|
|
|
pOneGbkHanzi[1] = pGBKInU[i];
|
|
|
|
|
pOneGbkHanzi[0] = pGBKInU[i+1];
|
|
|
|
|
iIndex = (pOneGbkHanzi[1]-0x81)*191+(pOneGbkHanzi[0]-0x40);
|
2011-09-26 17:50:31 +08:00
|
|
|
|
pUnicodeOut[j] = g_uni_table_array[iIndex];
|
2011-09-20 02:19:26 +08:00
|
|
|
|
++i;
|
|
|
|
|
++j;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// LOGI("unicode len = %d", j);
|
|
|
|
|
|
|
|
|
|
j = j > iUniBufSize/2-1 ? iUniBufSize/2-1 : j;
|
|
|
|
|
|
|
|
|
|
pUnicodeOut[j] = L'\0';
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int GetUtf8Len(unsigned short* pUniStr)
|
|
|
|
|
{
|
|
|
|
|
int wideLen = 0;
|
|
|
|
|
int len = 0;
|
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
|
|
if (pUniStr == NULL || (wideLen = myWcslen(pUniStr)) <= 0)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < wideLen; i++)
|
|
|
|
|
{
|
|
|
|
|
unsigned short c = pUniStr[i];
|
|
|
|
|
if (c < 0x80)
|
|
|
|
|
{
|
|
|
|
|
len += 1;
|
|
|
|
|
}
|
|
|
|
|
else if (c < 0x800)
|
|
|
|
|
{
|
|
|
|
|
len += 2;
|
|
|
|
|
}
|
|
|
|
|
else if (c < 0x10000)
|
|
|
|
|
{
|
|
|
|
|
len += 3;
|
|
|
|
|
}
|
|
|
|
|
else if (c < 0x200000)
|
|
|
|
|
{
|
|
|
|
|
len += 4;
|
|
|
|
|
}
|
|
|
|
|
else if (c < 0x4000000)
|
|
|
|
|
{
|
|
|
|
|
len += 5;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
len += 6;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return len;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int unichar_to_utf8 (unsigned short c, char* outbuf)
|
|
|
|
|
{
|
|
|
|
|
/* If this gets modified, also update the copy in g_string_insert_unichar() */
|
|
|
|
|
size_t len = 0;
|
|
|
|
|
int first;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
if (c < 0x80)
|
|
|
|
|
{
|
|
|
|
|
first = 0;
|
|
|
|
|
len = 1;
|
|
|
|
|
}
|
|
|
|
|
else if (c < 0x800)
|
|
|
|
|
{
|
|
|
|
|
first = 0xc0;
|
|
|
|
|
len = 2;
|
|
|
|
|
}
|
|
|
|
|
else if (c < 0x10000)
|
|
|
|
|
{
|
|
|
|
|
first = 0xe0;
|
|
|
|
|
len = 3;
|
|
|
|
|
}
|
|
|
|
|
else if (c < 0x200000)
|
|
|
|
|
{
|
|
|
|
|
first = 0xf0;
|
|
|
|
|
len = 4;
|
|
|
|
|
}
|
|
|
|
|
else if (c < 0x4000000)
|
|
|
|
|
{
|
|
|
|
|
first = 0xf8;
|
|
|
|
|
len = 5;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
first = 0xfc;
|
|
|
|
|
len = 6;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (outbuf)
|
|
|
|
|
{
|
|
|
|
|
for (i = len - 1; i > 0; --i)
|
|
|
|
|
{
|
|
|
|
|
outbuf[i] = (c & 0x3f) | 0x80;
|
|
|
|
|
c >>= 6;
|
|
|
|
|
}
|
|
|
|
|
outbuf[0] = c | first;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return len;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int MyGBKToUTF8_M(char** ppszOutUtf8, char* pszGbk)
|
|
|
|
|
{
|
|
|
|
|
int i = 0;
|
|
|
|
|
int iRet = 0;
|
|
|
|
|
int wideLenGuess = 0;
|
|
|
|
|
int wideLen = 0;
|
|
|
|
|
int utfLen = 0;
|
|
|
|
|
unsigned short* pUnicode = NULL;
|
|
|
|
|
if (ppszOutUtf8 == NULL || pszGbk == NULL || strlen(pszGbk) <= 0)
|
|
|
|
|
return 0;
|
|
|
|
|
wideLenGuess = (strlen(pszGbk)+1)*2;
|
|
|
|
|
pUnicode = (unsigned short*)malloc(wideLenGuess);
|
|
|
|
|
if (pUnicode == NULL)
|
|
|
|
|
{
|
|
|
|
|
LOGI("int MyGBKToUTF8_M malloc unicode buf fails!");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(pUnicode, 0, wideLenGuess);
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
if (0 == MyGBKToUnicode(pUnicode, wideLenGuess, pszGbk))
|
|
|
|
|
{
|
|
|
|
|
LOGI("MyGBKToUnicode fails in MyGBKToUTF8_M");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
wideLen = myWcslen(pUnicode);
|
|
|
|
|
utfLen = GetUtf8Len(pUnicode);
|
|
|
|
|
*ppszOutUtf8 = (char*)malloc(utfLen+1);
|
|
|
|
|
if (*ppszOutUtf8 == NULL)
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(*ppszOutUtf8, 0, utfLen+1);
|
|
|
|
|
for (i = 0; i < wideLen; i++)
|
|
|
|
|
{
|
|
|
|
|
char szTmpUtf8[7] = {0};
|
|
|
|
|
unichar_to_utf8(pUnicode[i], szTmpUtf8);
|
|
|
|
|
strcat(*ppszOutUtf8, szTmpUtf8);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iRet = 1;
|
|
|
|
|
}while (0);
|
|
|
|
|
|
|
|
|
|
SAFE_FREE(pUnicode);
|
|
|
|
|
|
|
|
|
|
return iRet;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*************************************************************************************************
|
|
|
|
|
|
|
|
|
|
* <EFBFBD><EFBFBD>UTF8<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ת<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Unicode<EFBFBD><EFBFBD>UCS-2<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
|
|
|
|
* char* pInput ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>'\0'<EFBFBD><EFBFBD>β<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD>
|
|
|
|
|
|
|
|
|
|
* char** ppOutput ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD>
|
|
|
|
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><EFBFBD>
|
|
|
|
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>ת<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Unicode<EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-1
|
|
|
|
|
|
|
|
|
|
* ע<EFBFBD>⣺
|
|
|
|
|
|
|
|
|
|
* 1. UTF8û<EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>⣬<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Unicode<EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
|
|
|
|
* <EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD><EFBFBD><EFBFBD>ˣ<EFBFBD>Big Endian<EFBFBD><EFBFBD><EFBFBD><EFBFBD>С<EFBFBD>ˣ<EFBFBD>Little Endian<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD>
|
|
|
|
|
|
|
|
|
|
* <EFBFBD><EFBFBD>Intel<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><EFBFBD><EFBFBD>С<EFBFBD>˷<EFBFBD><EFBFBD><EFBFBD>ʾ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˱<EFBFBD><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><EFBFBD><EFBFBD>С<EFBFBD>˷<EFBFBD><EFBFBD><EFBFBD>ʾ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>͵<EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>λ<EFBFBD><EFBFBD>
|
|
|
|
|
|
|
|
|
|
* 2. <EFBFBD>ڵ<EFBFBD><EFBFBD>ñ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD>ֶ<EFBFBD><EFBFBD>ͷ<EFBFBD> *ppOutput ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ棬<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD>й©<EFBFBD><EFBFBD>
|
|
|
|
|
|
|
|
|
|
**************************************************************************************************/
|
|
|
|
|
|
|
|
|
|
static int utf8_to_unicode(char* pInput, char** ppOutput)
|
|
|
|
|
{
|
|
|
|
|
int outputSize = 0; //<2F><>¼ת<C2BC><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Unicode<64>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
|
|
|
|
|
char *tmp = NULL;
|
|
|
|
|
if (pInput == NULL || strlen(pInput) <= 0)
|
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
|
|
*ppOutput = (char *)malloc(strlen(pInput) * 2); //Ϊ<><CEAA><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>㹻<EFBFBD><E3B9BB><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4>ռ<EFBFBD>
|
|
|
|
|
memset(*ppOutput, 0, strlen(pInput) * 2);
|
|
|
|
|
tmp = *ppOutput; //<2F><>ʱ<EFBFBD><CAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڱ<EFBFBD><DAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>
|
|
|
|
|
|
|
|
|
|
while (*pInput)
|
|
|
|
|
{
|
|
|
|
|
if (*pInput > 0x00 && *pInput <= 0x7F) //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD>Ӣ<EFBFBD><D3A2><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD>
|
|
|
|
|
{
|
|
|
|
|
*tmp = *pInput;
|
|
|
|
|
tmp++;
|
|
|
|
|
*tmp = 0; //С<>˷<EFBFBD><CBB7><EFBFBD>ʾ<EFBFBD><CABE><EFBFBD>ڸߵ<DAB8>ַ<EFBFBD>0
|
|
|
|
|
}
|
|
|
|
|
else if (((*pInput) & 0xE0) == 0xC0) //<2F><><EFBFBD><EFBFBD>˫<EFBFBD>ֽ<EFBFBD>UTF8<46>ַ<EFBFBD>
|
|
|
|
|
{
|
|
|
|
|
char high = *pInput;
|
|
|
|
|
char low = 0;
|
|
|
|
|
pInput++;
|
|
|
|
|
low = *pInput;
|
|
|
|
|
|
|
|
|
|
if ((low & 0xC0) != 0x80) //<2F><><EFBFBD><EFBFBD><EFBFBD>Ƿ<EFBFBD>Ϊ<EFBFBD>Ϸ<EFBFBD><CFB7><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD>ʾ
|
|
|
|
|
{
|
|
|
|
|
return -1; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*tmp = (high << 6) + (low & 0x3F);
|
|
|
|
|
tmp++;
|
|
|
|
|
*tmp = (high >> 2) & 0x07;
|
|
|
|
|
}
|
|
|
|
|
else if (((*pInput) & 0xF0) == 0xE0) //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>UTF8<46>ַ<EFBFBD>
|
|
|
|
|
{
|
|
|
|
|
char high = *pInput;
|
|
|
|
|
char middle = 0;
|
|
|
|
|
char low = 0;
|
|
|
|
|
|
|
|
|
|
pInput++;
|
|
|
|
|
middle = *pInput;
|
|
|
|
|
pInput++;
|
|
|
|
|
low = *pInput;
|
|
|
|
|
|
|
|
|
|
if (((middle & 0xC0) != 0x80) || ((low & 0xC0) != 0x80))
|
|
|
|
|
{
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*tmp = (middle << 6) + (low & 0x7F);
|
|
|
|
|
tmp++;
|
|
|
|
|
*tmp = (high << 4) + ((middle >> 2) & 0x0F);
|
|
|
|
|
}
|
|
|
|
|
else //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD>
|
|
|
|
|
{
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pInput ++;
|
|
|
|
|
tmp ++;
|
|
|
|
|
outputSize += 2;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*tmp = 0;
|
|
|
|
|
tmp++;
|
|
|
|
|
*tmp = 0;
|
|
|
|
|
|
|
|
|
|
return outputSize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int MyUTF8ToGBK_M(char** ppszOutGbk, char* pszUtf8)
|
|
|
|
|
{
|
|
|
|
|
int iRet = 0;
|
|
|
|
|
int unicodeBytes = 0;
|
|
|
|
|
unsigned short* pUnicodeStrChar = NULL;
|
|
|
|
|
|
|
|
|
|
if (ppszOutGbk == NULL || pszUtf8 == NULL || strlen(pszUtf8) <= 0)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
unicodeBytes = utf8_to_unicode(pszUtf8, (char**)&pUnicodeStrChar);
|
|
|
|
|
if (unicodeBytes <= 0)
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*ppszOutGbk = (char*)malloc(unicodeBytes+1);
|
|
|
|
|
if (*ppszOutGbk == NULL)
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
memset(*ppszOutGbk, 0, unicodeBytes+1);
|
|
|
|
|
if (0 == MyUnicodeToGBK(*ppszOutGbk, unicodeBytes+1, pUnicodeStrChar))
|
|
|
|
|
{
|
|
|
|
|
LOGI("MyUnicodeToGBK fails in MyUTF8ToGBK_M");
|
|
|
|
|
SAFE_FREE(*ppszOutGbk);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
iRet = 1;
|
|
|
|
|
} while(0);
|
|
|
|
|
|
|
|
|
|
SAFE_FREE(pUnicodeStrChar);
|
|
|
|
|
|
|
|
|
|
return iRet;
|
|
|
|
|
}
|