常见字符编码转Unicode16L小端-CFANZ编程社区

常见字符编码转Unicode16L小端

用于判断不带BOM的utf-8和ANSI
bool CheckUtf8(LPCBYTE p)
{
  for (;*p;++p)
  {
    char c = *p;
    int n = 0;
    for (;c & 0x80;++n)
      c <<= 1;
    if (n == 0)//ASCII
      continue;
    
    if (n==1 || n > 6)
      return false;
    while(--n>0)
    {
      c = *++p;
      if ((c & 0xc0) != 0x80)
        return false;
    }

  }
  return true;
}


wchar_t* ANSIToUnicode(const char* str)
{
  int n = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0);
  auto p = new wchar_t[n];
  MultiByteToWideChar(CP_ACP, 0, str, -1,p, n);
  return p;
}

//即使带BOM也不需要偏移BOM头的那三字节
wchar_t* UTF8ToUnicode(const char* str)
{
  int n = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
  auto p = new wchar_t[n];
  MultiByteToWideChar(CP_UTF8, 0, str, -1, p, n);
  return p;
}


//大小端转换
void CovnertB2L(char* p, int nCount)
{
  while (nCount > 0)
  {
    *p ^= p[1];
    p[1] ^= *p;
    *p ^= p[1];
    nCount -= 2;
    p += 2;
  }
}

0 条评论