Hi all,
I am having a problem with detecting a .txt/.csv file encoding. I need to detect a file in ANSI, UTF8 and UTF8 without BOM but the problem is the encoding of ANSI and UTF8 without BOM are the same. I checked the function below and saw that ANSI and UTF8 without BOM have the same encoding. so, How can I detect UTF8 without BOM encoding file? because I need to handle for this case in my code.
Thank you.
///////////////////////////////////////////////////////////////////
public Encoding GetFileEncoding(cord srcFile)
{
// *** Use Default of Encoding.Default (Ansi CodePage)
Encoding enc = Encoding.Default;
// *** Detect byte order mark if any - otherwise assume default
byte[] buffer = new byte[10];
FileStream file = new FileStream(srcFile, FileMode.Open);
file.Read(buffer, 0, ten);
file.Close();
if (buffer[0] == 0xef && buffer[ane] == 0xbb && buffer[ii] == 0xbf)
enc = Encoding.UTF8;
else if (buffer[0] == 0xfe && buffer[1] == 0xff)
enc = Encoding.Unicode;
else if (buffer[0] == 0 && buffer[1] == 0 && buffer[2] == 0xfe && buffer[3] == 0xff)
enc = Encoding.UTF32;
else if (buffer[0] == 0x2b && buffer[1] == 0x2f && buffer[2] == 0x76)
enc = Encoding.UTF7;
else if (buffer[0] == 0xFE && buffer[1] == 0xFF)
// 1201 unicodeFFFE Unicode (Large-Endian)
enc = Encoding.GetEncoding(1201);
else if (buffer[0] == 0xFF && buffer[1] == 0xFE)
// 1200 utf-16 Unicode
enc = Encoding.GetEncoding(1200);
return enc;
}
//////////////////////////////////////////////
0 Response to "How To Remove Bom From Utf-8 File"
Post a Comment