How to read and write files in UTF-8 format in win32

1, write data

Initially, three bytes are written before writing data.

BYTE btHead[] = {<!-- --> 0xEF,0xBB,0xBF };
::WriteFile(hFile, btHead, 3, & amp;dwWrite, 0);
::WriteFile(hFile, str, lstrlen(str)*sizeof(TCHAR), & amp;dwWrite, 0);

The file style after writing is:

The format is UTF-8, but there is a BOM behind it, and the Chinese characters are also garbled.
Finally, I tested writing UTF-8 data using c language.

//Open the file according to the specified encoding format
FILE* fp=fopen("UTF_8Test.txt", "wt + ,ccs=UTF-8");

//data input
TCHAR str[] = TEXT("Test file,1234,abcd,ABCD");
fwrite(str, sizeof(TCHAR), lstrlen(str), fp);

//closure
fclose(fp);

Chinese characters are no longer garbled, but the format is still UTF-8 BOM

This is not the effect I want.

After finally searching for information,
1. First convert the string to be saved into a char string in UTF-8 format
2. Writing char string to file

//lpBuffer is the pointer to the wide string passed
//nBufferLeng is the number of characters in the string (the length calculated using lstrlen)

//Find the character length of the string converted to UTF-8
int size = ::WideCharToMultiByte(CP_UTF8, 0, (LPTSTR)lpBuffer, nBufferLeng, NULL, 0, NULL,
NULL);
\t
//Convert to char string
char* pTem = new char[size + 1];
memset(pTem, 0, size);
::WideCharToMultiByte(CP_UTF8, 0, (LPTSTR)lpBuffer, nBufferLeng, pTem, size, nullptr, nullptr);
pTem[size] = '\0';

//Finally write the char string
bResult = ::WriteFile(m_hFile, pTem, size, & amp;dwWrite, 0);
delete[] pTem;

The effect of writing a file like this:


In this way, the Chinese characters will not be garbled, the format is also UTF-8, and there will be no BOM behind it.

2. Read data

Reading and writing data are the same. First, read the data into a char array, and then convert it into a wide string in UTF-8 format.

//Get the file byte length
int n = GetFileSize();

//Write into char array
char* pChar = new char[n + 1];
bResult = ::ReadFile(m_hFile, pChar, n, & amp;dwRead, 0);
pChar[n] = '\0';

//Calculate the character length of the string converted to UTF-8
int len = MultiByteToWideChar(CP_UTF8, 0, pChar, strlen(pChar), nullptr, 0);

//Convert to TCHAR character array
TCHAR* pWChar = new TCHAR[len + 1];
memset(pWChar, 0, len);
MultiByteToWideChar(CP_UTF8, 0, pChar, strlen(pChar), pWChar, len);
pWChar[len] = '\0';

//Copy the character pointer outside the function
lstrcpy((LPTSTR)lpBuffer, pWChar);

//Clear memory
delete[] pWChar;
delete[] pChar;

The effect of reading data:

4. Complete code snippet

QFile.h file

//File operation class
// For Ascii, Unicode, UTF_8, Binary
// Reading and writing of four file formats
classQFile
{<!-- -->
public:
QFile();
~QFile();
public:
enum OpenModule //File opening mode
{<!-- -->
Read = GENERIC_READ, //read-only mode
Write = GENERIC_WRITE, //write-only mode
WriteRead = GENERIC_WRITE | GENERIC_READ //Read and write mode
};

enum FileEncoded //File encoding
{<!-- -->
enAnsii=0, //Ansi encoding
enUnicode, //Unicode encoding
enUTF_8, //UTF-8 encoding
enBinary //Binary encoding
};

public:
//Open the file in the specified mode
BOOL Open(const TCHAR* pFileName, OpenModule openModule);
void Close();
BOOL FileExists(const TCHAR* pFileName);//Check whether the file exists
BOOL CreateNewFile(const TCHAR* pFileName); //Create a new file
//Write data in the specified encoding format
int WriteFile(void* lpBuffer, int nBufferLeng, FileEncoded encodedType);
//Read data in the specified encoding format
int ReadFile(void* lpBuffer, int nBufferLeng, FileEncoded encodedType);
//Get the byte length of the file
int GetFileSize()const;
public:
HANDLE m_hFile;
};

QFile.cpp file

//Write data
int QFile::WriteFile(void* lpBuffer, int nBufferLeng, FileEncoded encodedType)
{<!-- -->
if (lpBuffer == nullptr)
return -1;

if (m_hFile == INVALID_HANDLE_VALUE)
return -1;

DWORD dwWrite = 0;
BOOL bResult = FALSE;

//Encoding UTF-8 writing
if (encodedType == enUTF_8)
{<!-- -->
int size = ::WideCharToMultiByte(CP_UTF8, 0, (LPTSTR)lpBuffer, nBufferLeng, NULL, 0, NULL,
NULL);
char* pTem = new char[size];
memset(pTem, 0, size);
::WideCharToMultiByte(CP_UTF8, 0, (LPTSTR)lpBuffer, nBufferLeng, pTem, size, nullptr, nullptr);
pTem[size] = '\0';
bResult = ::WriteFile(m_hFile, pTem, size, & amp;dwWrite, 0);
return bResult? dwWrite: -1;
}
\t

//Unicode encoding first writes two bytes, and then writes the data
if (encodedType == enUnicode)
{<!-- -->
BYTE btHead[] = {<!-- --> 0xFF,0xFE };
bResult = ::WriteFile(m_hFile, btHead, 2, & amp;dwWrite, 0);
}

//Others are written directly to data
bResult = ::WriteFile(m_hFile, lpBuffer, nBufferLeng, & amp;dwWrite, 0);

return bResult? dwWrite: -1;
}

//Read data
int QFile::ReadFile(void* lpBuffer, int nBufferLeng, FileEncoded encodedType)
{<!-- -->
if (lpBuffer == nullptr)
return -1;

if (m_hFile == INVALID_HANDLE_VALUE)
return -1;

DWORD dwRead = 0;
BOOL bResult = FALSE;

//Read encoding format UTF-8
if (encodedType == enUTF_8)
{<!-- -->
char* pChar = new char[nBufferLeng + 1];
bResult = ::ReadFile(m_hFile, pChar, nBufferLeng, & amp;dwRead, 0);
pChar[nBufferLeng] = '\0';

int len = MultiByteToWideChar(CP_UTF8, 0, pChar, strlen(pChar), nullptr, 0);

TCHAR* pWChar = new TCHAR[len + 1];
memset(pWChar, 0, len);
MultiByteToWideChar(CP_UTF8, 0, pChar, strlen(pChar), pWChar, len);
pWChar[len] = '\0';

lstrcpy((LPTSTR)lpBuffer, pWChar);
delete[] pWChar;
delete[] pChar;

return bResult ? dwRead : -1;
}

//Encode Unicode, skip the first two bytes and then read the data
if (encodedType == enUnicode)
{<!-- -->
SetFilePointer(m_hFile, 2, 0, FILE_BEGIN);
}
bResult = ::ReadFile(m_hFile, lpBuffer, nBufferLeng, & amp;dwRead, 0);
\t
return bResult ? dwRead : -1;
}

//…………

5. Use

//1.Write data
QFile f;
f.Open(Utf_8Txt, QFile::Write);

TCHAR str[] = TEXT("Test data,1234,ABCD");
f.WriteFile(str, lstrlen(str), QFile::enUTF_8);

f.Close();

//2.Read data
QFile f;
f.Open(Utf_8Txt, QFile::Read);

TCHAR str[20] = {<!-- --> 0 };
int nLeng = f.GetFileSize();

f.ReadFile(str, nLeng, QFile::enUTF_8);

f.Close();

MessageBox(nullptr, str, TEXT("Reading data"), 0);