js javascript UTF-8 GB2312编码转换
时间:2023-05-29 13:07:00
在用js做项目时,接收方需要使用GB2312汉字编码,发送方使用UTF-8汉字编码。
这里要做汉字编码转换。
在网上找了一个下午,没有找到通用的方法,只能自己做一个汉字编码对应表。
关键是要自己做一个汉字编码对应表
先按键GB2312汉字的编码顺序生成表,然后生成表GB2312表转向成UTF-8、UCS2-BigEndian,UCS-LittleEndian表。所以同一个位置是同一个汉字的编码。
var _GB2312_1_87=[0xA1A1,0xA1A2,... //SECTOR ];var _UTF8_1_87=[0xE38080,0xE38081, ... //SECTOR ];var _UCS2_Big_1_87=[0x300,0x301,... //SECTOR ];var _UCS2_Little_1_87=0x030,0x130,... //SECTOR ];
有了编码表,可以直接查表:
///大小头交换(高8位和低8位交换)function ConvetEndian(uincodeLE) { //高8位和低8位互换 var tmp1 = 0,tmp2 = 0,tmp3 = 0; tmp1 = (uincodeLE & 0x00FF); tmp2 = uincodeLE >> 8; tmp3 = tmp2 | (tmp1 << 8); return tmp3;}function GB2312_TO_Unicode(gcode) { for (var i = 0; i < 7614; i ) { if (gcode == _GB2312_1_87[i]) { return _UCS2_Big_1_87[i]; } } return -1;}function Unicode_TO_GB2312(ucode) { for (var i = 0; i < 7614; i ) { if (ucode == _UCS2_Big_1_87[i]) { return _GB2312_1_87[i]; } } return -1;}//将不长数据转换为UNICODE,最长6个字节。返回转换结果,失败返回-1function utf8ToUnicode(indata) { /*UTF-8编码方式 * 0000 0000 - 0000 007F | 00 0XXXXXXX * 0000 0080 - 0000 07FF | C0 110XXXXX 10XXXXXX * 0000 0800 - 0000 FFFF | E0 1110XXXX 10XXXXXX 10XXXXXX * 0001 0000 - 001F FFFF | F0 11110XXX 10XXXXXX 10XXXXXX 10XXXXXX * 0020 0000 - 03FF FFFF | F8 111110XX 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX * 0400 0000 - 7FFF FFFF | FC 1111110X 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX */ var hbs = indata[0]; var dcode = 0; var dcnt = 0; //先看第一个字节,有多少个字节的有效数据,同时取得第一个字节的有效数据 if (hbs > 0xFE) //超标,不能转换 { return -1; } else if (hbs >= 0xFC) // 6 { dcnt = 6; dcode |= (hbs & 0x01); } else if (hbs >= 0xF8) // 5 { dcnt = 5; dcode |= (hbs & 0x03); } else if (hbs >= 0xF0) // 4 { dcnt = 4; dcode |= (hbs & 0x07); } else if (hbs >= 0xE0) // 3 { dcnt = 3; dcode |= (hbs & 0x0F); } else if (hbs >= 0xC0) // 2 { dcnt = 2; dcode |= (hbs & 0x1F); } else { dcnt = 1; } if (dcnt == 1) { return indata[0]; } for (var i = 1; i < dcnt; i ) { dcode <<= bit给新数据 dcode |= (indata[i] & 0x3F); ///获取新数据的低位6bit } return dcode;}//最大16位unicode编码转换成utf8function Unicode_TO_UTF8(code) { /*UTF-8编码方式 * 0000 0000 - 0000 007F | 0XXXXXXX * 0000 0080 - 0000 07FF | 110XXXXX 10XXXXXX * 0000 0800 - 0000 FFFF | 1110XXXX 10XXXXXX 10XXXXXX * 0001 0000 - 001F FFFF | 11110XXX 10XXXXXX 10XXXXXX 10XXXXXX * 0020 0000 - 03FF FFFF | 111110XX 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX * 0400 0000 - 7FFF FFFF | 1111110X 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX */ var out = [0]; var bCnt = -1; if (code > 0x7FFFFFFF) //太大放不下了 { bCnt = -1; return -1; } else if (code > 0x03FFFFFF) // 6个字节,0400 0000 - 7FFF FFFF { bCnt = 6; } else if (code > 0x001FFFFF) // 5个字节 { bCnt = 5; } else if (code > 0x0000FFFF) // 4个字节 { bCnt = 4; } else if (code > 0x000007FF) // 3个字节 { bCnt = 3; } else if (code > 0x0000007F) // 2个字节 { bCnt = 2; } else // 1个字节 { bCnt = 1; } if (bCnt == 1) { out[0] = code; return bCnt; } var hbs = 0x80; //首字节高位 var utf8Code = 0; // qDebug("codeIn:%x",code); for (var i = 0; i < (bCnt - 1); i ) { out[bCnt - i - 1] = 0x80 | (code & 0x3f); //取最后6Bits code >>= 6; //丢掉6Bits hbs >>= 1; hbs |= 0x80; //首字节高位多一个“1” } out[0] = hbs | code; //首字节 for (var i = 0; i < bCnt; i ) { utf8Code <<= 8; utf8Code |= out[i]; } return utf8Code;}function TextCodec(codeType,'; } } else if (targetC'; } } else if (targetCode & 0xff00)//2bytes { for (var k = 1; k >= 0; k--) { var _byte = (targetCode >> (8 * k)) & 0xff; OutputStr += '0x' + _byte.toString(16).toUpperCase() + ','; } } else //1byte { var _byte = targetCode; OutputStr += '0x' + _byte.toString(16).toUpperCase() + ','; } OutputStr += "//" + inputChar + " " + codeType + "\n"; break; case 'long': OutputStr += '0x' + targetCode.toString(16).toUpperCase() + ',' + "//" + inputChar + " " + codeType + "\n"; break; default: OutputStr += '0x' + targetCode.toString(16).toUpperCase() + ',' + "//" + inputChar + " " + codeType + "\n"; break; } } return OutputStr;};
运行:TextCodec("GB2312","long","输入要转换的中文abc");结果: 0xCAE4,//输 GB2312 0xC8EB,//入 GB2312 0xD2AA,//要 GB2312 0xD7AA,//转 GB2312 0xBBBB,//换 GB2312 0xB5C4,//的 GB2312 0xD6D0,//中 GB2312 0xCEC4,//文 GB2312 0x61,//a GB2312 0x62,//b GB2312 0x63,//c GB2312
完整的编码表,每个表有7614个编码.
var _GB2312_1_87 = [ //GB2312字库开始 0xA1A1, 0xA1A2, 0xA1A3, 0xA1A4, 0xA1A5, 0xA1A6, 0xA1A7, 0xA1A8, 0xA1A9, 0xA1AA, 0xA1AB, 0xA1AC, 0xA1AD, 0xA1AE, 0xA1AF, //第1区: A1A0 : ,、,。,·,ˉ,ˇ,¨,〃,々,—,~,‖,…,‘,’, 0xA1B0, 0xA1B1, 0xA1B2, 0xA1B3, 0xA1B4, 0xA1B5, 0xA1B6, 0xA1B7, 0xA1B8, 0xA1B9, 0xA1BA, 0xA1BB, 0xA1BC, 0xA1BD, 0xA1BE, 0xA1BF, //第1区: A1B0 : “,”,〔,〕,〈,〉,《,》,「,」,『,』,〖,〗,【,】, 0xA1C0, 0xA1C1, 0xA1C2, 0xA1C3, 0xA1C4, 0xA1C5, 0xA1C6, 0xA1C7, 0xA1C8, 0xA1C9, 0xA1CA, 0xA1CB, 0xA1CC, 0xA1CD, 0xA1CE, 0xA1CF, //第1区: A1C0 : ±,×,÷,∶,∧,∨,∑,∏,∪,∩,∈,∷,√,⊥,∥,∠, 0xA1D0, 0xA1D1, 0xA1D2, 0xA1D3, 0xA1D4, 0xA1D5, 0xA1D6, 0xA1D7, 0xA1D8, 0xA1D9, 0xA1DA, 0xA1DB, 0xA1DC, 0xA1DD, 0xA1DE, 0xA1DF, //第1区: A1D0 : ⌒,⊙,∫,∮,≡,≌,≈,∽,∝,≠,≮,≯,≤,≥,∞,∵, 0xA1E0, 0xA1E1, 0xA1E2, 0xA1E3, 0xA1E4, 0xA1E5, 0xA1E6, 0xA1E7, 0xA1E8, 0xA1E9, 0xA1EA, 0xA1EB, 0xA1EC, 0xA1ED, 0xA1EE, 0xA1EF, //第1区: A1E0 : ∴,♂,♀,°,′,″,℃,$,¤,¢,£,‰,§,№,☆,★, 0xA1F0, 0xA1F1, 0xA1F2, 0xA1F3, 0xA1F4, 0xA1F5, 0xA1F6, 0xA1F7, 0xA1F8, 0xA1F9, 0xA1FA, 0xA1FB, 0xA1FC, 0xA1FD, 0xA1FE, //第1区: A1F0 : ○,●,◎,◇,◆,□,■,△,▲,※,→,←,↑,↓,〓, 0xA2A1, 0xA2A2, 0xA2A3, 0xA2A4, 0xA2A5, 0xA2A6, 0xA2A7, 0xA2A8, 0xA2A9, 0xA2AA, 0xA2AB, 0xA2AC, 0xA2AD, 0xA2AE, 0xA2AF, //第2区: A2A0 : ⅰ,ⅱ,ⅲ,ⅳ,ⅴ,ⅵ,ⅶ,ⅷ,ⅸ,ⅹ,,,,,, 0xA2B0, 0xA2B1, 0xA2B2, 0xA2B3, 0xA2B4, 0xA2B5, 0xA2B6, 0xA2B7, 0xA2B8, 0xA2B9, 0xA2BA, 0xA2BB, 0xA2BC, 0xA2BD, 0xA2BE, 0xA2BF, //第2区: A2B0 : ,⒈,⒉,⒊,⒋,⒌,⒍,⒎,⒏,⒐,⒑,⒒,⒓,⒔,⒕,⒖, 0xA2C0, 0xA2C1, 0xA2C2, 0xA2C3, 0xA2C4, 0xA2C5, 0xA2C6, 0xA2C7, 0xA2C8, 0xA2C9, 0xA2CA, 0xA2CB, 0xA2CC, 0xA2CD, 0xA2CE, 0xA2CF, //第2区: A2C0 : ⒗,⒘,⒙,⒚,⒛,⑴,⑵,⑶,⑷,⑸,⑹,⑺,⑻,⑼,⑽,⑾, 0xA2D0, 0xA2D1, 0xA2D2, 0xA2D3, 0xA2D4, 0xA2D5, 0xA2D6, 0xA2D7, 0xA2D8, 0xA2D9, 0xA2DA, 0xA2DB, 0xA2DC, 0xA2DD, 0xA2DE, 0xA2DF, //第2区: A2D0 : ⑿,⒀,⒁,⒂,⒃,⒄,⒅,⒆,⒇,①,②,③,④,⑤,⑥,⑦, 0xA2E0, 0xA2E1, 0xA2E2, 0xA2E3, 0xA2E4, 0xA2E5, 0xA2E6, 0xA2E7, 0xA2E8, 0xA2E9, 0xA2EA, 0xA2EB, 0xA2EC, 0xA2ED, 0xA2EE, 0xA2EF, //第2区: A2E0 : ⑧,⑨,⑩,€,,㈠,㈡,㈢,㈣,㈤,㈥,㈦,㈧,㈨,㈩,, 0xA2F0, 0xA2F1, 0xA2F2, 0xA2F3, 0xA2F4, 0xA2F5, 0xA2F6, 0xA2F7, 0xA2F8, 0xA2F9, 0xA2FA, 0xA2FB, 0xA2FC, 0xA2FD, 0xA2FE, //第2区: A2F0 : ,Ⅰ,Ⅱ,Ⅲ,Ⅳ,Ⅴ,Ⅵ,Ⅶ,Ⅷ,Ⅸ,Ⅹ,Ⅺ,Ⅻ,,, 0xA3A1, 0xA3A2, 0xA3A3, 0xA3A4, 0xA3A5, 0xA3A6, 0xA3A7, 0xA3A8, 0xA3A9, 0xA3AA, 0xA3AB, 0xA3AC, 0xA3AD, 0xA3AE, 0xA3AF, //第3区: A3A0 : !,",#,¥,%,&,',(,),*,+,,,-,.,/, 0xA3B0, 0xA3B1, 0xA3B2, 0xA3B3, 0xA3B4, 0xA3B5, 0xA3B6, 0xA3B7, 0xA3B8, 0xA3B9, 0xA3BA, 0xA3BB, 0xA3BC, 0xA3BD, 0xA3BE, 0xA3BF, //第3区: A3B0 : 0,1,2,3,4,5,6,7,8,9,:,;,<,=,>,?, 0xA3C0, 0xA3C1, 0xA3C2, 0xA3C3, 0xA3C4, 0xA3C5, 0xA3C6, 0xA3C7, 0xA3C8, 0xA3C9, 0xA3CA, 0xA3CB, 0xA3CC, 0xA3CD, 0xA3CE, 0xA3CF, //第3区: A3C0 : @,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O, 0xA3D0, 0xA3D1, 0xA3D2, 0xA3D3, 0xA3D4, 0xA3D5, 0xA3D6, 0xA3D7, 0xA3D8, 0xA3D9, 0xA3DA, 0xA3DB, 0xA3DC, 0xA3DD, 0xA3DE, 0xA3DF, //第3区: A3D0 : P,Q,R,S,T,U,V,W,X,Y,Z,[,\,],^,_, 0xA3E0, 0xA3E1, 0xA3E2, 0xA3E3, 0xA3E4, 0xA3E5, 0xA3E6, 0xA3E7, 0xA3E8, 0xA3E9, 0xA3EA, 0xA3EB, 0xA3EC, 0xA3ED, 0xA3EE, 0xA3EF, //第3区: A3E0 : `,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o, 0xA3F0, 0xA3F1, 0xA3F2, 0xA3F3, 0xA3F4, 0xA3F5, 0xA3F6, 0xA3F7, 0xA3F8, 0xA3F9, 0xA3FA, 0xA3FB, 0xA3FC, 0xA3FD, 0xA3FE, //第3区: A3F0 : p,q,r,s,t,u,v,w,x,y,z,{,|,}, ̄, 0xA4A1, 0xA4A2, 0xA4A3, 0xA4A4, 0xA4A5, 0xA4A6, 0xA4A7, 0xA4A8, 0xA4A9, 0xA4AA, 0xA4AB, 0xA4AC, 0xA4AD, 0xA4AE, 0xA4AF, //第4区: A4A0 : ぁ,あ,ぃ,い,ぅ,う,ぇ,え,ぉ,お,か,が,き,ぎ,く, 0xA4B0, 0xA4B1, 0xA4B2, 0xA4B3, 0xA4B4, 0xA4B5, 0xA4B6, 0xA4B7, 0xA4B8, 0xA4B9, 0xA4BA, 0xA4BB, 0xA4BC, 0xA4BD, 0xA4BE, 0xA4BF, //第4区: A4B0 : ぐ,け,げ,こ,ご,さ,ざ,し,じ,す,ず,せ,ぜ,そ,ぞ,た, 0xA4C0, 0xA4C1, 0xA4C2, 0xA4C3, 0xA4C4, 0xA4C5, 0xA4C6, 0xA4C7, 0xA4C8, 0xA4C9, 0xA4CA, 0xA4CB, 0xA4CC, 0xA4CD, 0xA4CE, 0xA4CF, //第4区: A4C0 : だ,ち,ぢ,っ,つ,づ,て,で,と,ど,な,に,ぬ,ね,の,は, 0xA4D0, 0xA4D1, 0xA4D2, 0xA4D3, 0xA4D4, 0xA4D5, 0xA4D6, 0xA4D7, 0xA4D8, 0xA4D9, 0xA4DA, 0xA4DB, 0xA4DC, 0xA4DD, 0xA4DE, 0xA4DF, //第4区: A4D0 : ば,ぱ,ひ,び,ぴ,ふ,ぶ,ぷ,へ,べ,ぺ,ほ,ぼ,ぽ,ま,み, 0xA4E0, 0xA4E1, 0xA4E2, 0xA4E3, 0xA4E4, 0xA4E5, 0xA4E6, 0xA4E7, 0xA4E8, 0xA4E9, 0xA4EA, 0xA4EB, 0xA4EC, 0xA4ED, 0xA4EE, 0xA4EF, //第4区: A4E0 : む,め,も,ゃ,や,ゅ,ゆ,ょ,よ,ら,り,る,れ,ろ,ゎ,わ, 0xA4F0, 0xA4F1, 0xA4F2, 0xA4F3, 0xA4F4, 0xA4F5, 0xA4F6, 0xA4F7, 0xA4F8, 0xA4F9, 0xA4FA, 0xA4FB, 0xA4FC, 0xA4FD, 0xA4FE, //第4区: A4F0 : ゐ,ゑ,を,ん,,,,,,,,,,,, 0xA5A1, 0xA5A2, 0xA5A3, 0xA5A4, 0xA5A5, 0xA5A6, 0xA5A7, 0xA5A8, 0xA5A9, 0xA5AA, 0xA5AB, 0xA5AC, 0xA5AD, 0xA5AE, 0xA5AF, //第5区: A5A0 : ァ,ア,ィ,イ,ゥ,ウ,ェ,エ,ォ,オ,カ,ガ,キ,ギ,ク, 0xA5B0, 0xA5B1, 0xA5B2, 0xA5B3, 0xA5B4, 0xA5B5, 0xA5B6, 0xA5B7, 0xA5B8, 0xA5B9, 0xA5BA, 0xA5BB, 0xA5BC, 0xA5BD, 0xA5BE, 0xA5BF, //第5区: A5B0 : グ,ケ,ゲ,コ,ゴ,サ,ザ,シ,ジ,ス,ズ,セ,ゼ,ソ,ゾ,タ, 0xA5C0, 0xA5C1, 0xA5C2, 0xA5C3, 0xA5C4, 0xA5C5, 0xA5C6, 0xA5C7, 0xA5C8, 0xA5C9, 0xA5CA, 0xA5CB, 0xA5CC, 0xA5CD, 0xA5CE, 0xA5CF, //第5区: A5C0 : ダ,チ,ヂ,ッ,ツ,ヅ,テ,デ,ト,ド,ナ,ニ,ヌ,ネ,ノ,ハ, 0xA5D0, 0xA5D1, 0xA5D2, 0xA5D3, 0xA5D4, 0xA5D5, 0xA5D6, 0xA5D7, 0xA5D8, 0xA5D9, 0xA5DA, 0xA5DB, 0xA5DC, 0xA5DD, 0xA5DE, 0xA5DF, //第5区: A5D0 : バ,パ,ヒ,ビ,ピ,フ,ブ,プ,ヘ,ベ,ペ,ホ,ボ,ポ,マ,ミ, 0xA5E0, 0xA5E1, 0xA5E2, 0xA5E3, 0xA5E4, 0xA5E5, 0xA5E6, 0xA5E7, 0xA5E8, 0xA5E9, 0xA5EA, 0xA5EB, 0xA5EC, 0xA5ED, 0xA5EE, 0xA5EF, //第5区: A5E0 : ム,メ,モ,ャ,ヤ,ュ,ユ,ョ,ヨ,ラ,リ,ル,レ,ロ,ヮ,ワ, 0xA5F0, 0xA5F1, 0xA5F2, 0xA5F3, 0xA5F4, 0xA5F5, 0xA5F6, 0xA5F7, 0xA5F8, 0xA5F9, 0xA5FA, 0xA5FB, 0xA5FC, 0xA5FD, 0xA5FE, //第5区: A5F0 : ヰ,ヱ,ヲ,ン,ヴ,ヵ,ヶ,,,,,,,,, 0xA6A1, 0xA6A2, 0xA6A3, 0xA6A4, 0xA6A5, 0xA6A6, 0xA6A7, 0xA6A8, 0xA6A9, 0xA6AA, 0xA6AB, 0xA6AC, 0xA6AD, 0xA6AE, 0xA6AF, //第6区: A6A0 : Α,Β,Γ,Δ,Ε,Ζ,Η,Θ,Ι,Κ,Λ,Μ,Ν,Ξ,Ο, 0xA6B0, 0xA6B1, 0xA6B2, 0xA6B3, 0xA6B4, 0xA6B5, 0xA6B6, 0xA6B7, 0xA6B8, 0xA6B9, 0xA6BA, 0xA6BB, 0xA6BC, 0xA6BD, 0xA6BE, 0xA6BF, //第6区: A6B0 : Π,Ρ,Σ,Τ,Υ,Φ,Χ,Ψ,Ω,,,,,,,, 0xA6C0, 0xA6C1, 0xA6C2, 0xA6C3, 0xA6C4, 0xA6C5, 0xA6C6, 0xA6C7, 0xA6C8, 0xA6C9, 0xA6CA, 0xA6CB, 0xA6CC, 0xA6CD, 0xA6CE, 0xA6CF, //第6区: A6C0 : ,α,β,γ,δ,ε,ζ,η,θ,ι,κ,λ,μ,ν,ξ,ο, 0xA6D0, 0xA6D1, 0xA6D2, 0xA6D3, 0xA6D4, 0xA6D5, 0xA6D6, 0xA6D7, 0xA6D8, 0xA6D9, 0xA6DA, 0xA6DB, 0xA6DC, 0xA6DD, 0xA6DE, 0xA6DF, //第6区: A6D0 : π,ρ,σ,τ,υ,φ,χ,ψ,ω,,,,,,,, 0xA6E0, 0xA6E1, 0xA6E2, 0