diff options
| author | Jules Laplace <jules@okfoc.us> | 2015-07-16 18:05:47 -0400 |
|---|---|---|
| committer | Jules Laplace <jules@okfoc.us> | 2015-07-16 18:05:47 -0400 |
| commit | 1863819e9862bf22a2b9a990b7062cb0ade44b0f (patch) | |
| tree | 62747731346db057ea74610cd3ae499809a5b372 | |
| parent | e7c81c0763d4c4261dc5c0b2502a551b4db69bb0 (diff) | |
escape unicode as escaped bytes (\x)
| -rw-r--r-- | js/matrix.js | 20 | ||||
| -rw-r--r-- | js/unicode.js | 124 |
2 files changed, 127 insertions, 17 deletions
diff --git a/js/matrix.js b/js/matrix.js index b6fb765..5c56bd4 100644 --- a/js/matrix.js +++ b/js/matrix.js @@ -258,22 +258,8 @@ Matrix.prototype.irssi = function(){ .replace(/\x02/g, '\\x02') .replace(/\x03/g, '\\x03') // console.log(txt.length) - var escaped_txt = "", kode - for (var i = 0; i < txt.length; i++) { - kode = txt.charCodeAt(i) - if (kode > 0x7f) { - kode = kode.toString(16) - switch (kode.length) { - case 2: - kode = "0" + kode - case 3: - kode = "0" + kode - } - escaped_txt += "\\u" + kode - } - else { - escaped_txt += txt[i] - } - } + + var escaped_txt = unicode.escapeToEscapedBytes(txt) + return '/exec -out printf "%b" "' + escaped_txt + '"\n' } diff --git a/js/unicode.js b/js/unicode.js index cbab781..700434d 100644 --- a/js/unicode.js +++ b/js/unicode.js @@ -305,6 +305,129 @@ var unicode = (function(){ } return groups } + + // encodes unicode characters as escaped utf16 - \xFFFF + // encodes ONLY non-ascii characters + function escapeToUtf16 (txt) { + var escaped_txt = "", kode + for (var i = 0; i < txt.length; i++) { + kode = txt.charCodeAt(i) + if (kode > 0x7f) { + kode = kode.toString(16) + switch (kode.length) { + case 2: + kode = "0" + kode + case 3: + kode = "0" + kode + } + escaped_txt += "\\u" + kode + } + else { + escaped_txt += txt[i] + } + } + return escaped_txt + } + + // encodes unicode characters as escaped bytes - \xFF + // encodes ONLY non-ascii characters + function escapeToEscapedBytes (txt) { + var escaped_txt = "", kode, utf8_bytes + for (var i = 0; i < txt.length; i++) { + kode = txt.charCodeAt(i) + if (kode > 0x7f) { + utf8_bytes = convertUnicodeCodePointToUtf8Bytes(kode) + escaped_txt += convertBytesToEscapedString(utf8_bytes, 16) + } + else { + escaped_txt += txt[i] + } + } + return escaped_txt + } + + // encodes unicode characters as escaped bytes - \xFF + // encodes an ENTIRE string + function escapeAllToEscapedBytes(str, base) { + var unicode_codes = convertStringToUnicodeCodePoints(str); + var data_bytes = convertUnicodeCodePointsToBytes(unicode_codes); + return convertBytesToEscapedString(data_bytes, 16); + } + // [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ] => '\xE3\x81\x82\xE3\x81\x84' + // [ 0343, 0201, 0202, 0343, 0201, 0204 ] => '\343\201\202\343\201\204' + function convertBytesToEscapedString(data_bytes, base) { + var escaped = ''; + for (var i = 0; i < data_bytes.length; ++i) { + var prefix = (base == 16 ? "\\x" : "\\"); + var num_digits = base == 16 ? 2 : 3; + var escaped_byte = prefix + formatNumber(data_bytes[i], base, num_digits) + escaped += escaped_byte; + } + return escaped; + } + // [ 0x3042, 0x3044 ] => [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ] + function convertUnicodeCodePointsToBytes(unicode_codes) { + var utf8_bytes = []; + for (var i = 0; i < unicode_codes.length; ++i) { + var bytes = convertUnicodeCodePointToUtf8Bytes(unicode_codes[i]); + utf8_bytes = utf8_bytes.concat(bytes); + } + return utf8_bytes; + } + // 0x3042 => [ 0xE3, 0x81, 0x82 ] + function convertUnicodeCodePointToUtf8Bytes(unicode_code) { + var utf8_bytes = []; + if (unicode_code < 0x80) { // 1-byte + utf8_bytes.push(unicode_code); + } else if (unicode_code < (1 << 11)) { // 2-byte + utf8_bytes.push((unicode_code >>> 6) | 0xC0); + utf8_bytes.push((unicode_code & 0x3F) | 0x80); + } else if (unicode_code < (1 << 16)) { // 3-byte + utf8_bytes.push((unicode_code >>> 12) | 0xE0); + utf8_bytes.push(((unicode_code >> 6) & 0x3f) | 0x80); + utf8_bytes.push((unicode_code & 0x3F) | 0x80); + } else if (unicode_code < (1 << 21)) { // 4-byte + utf8_bytes.push((unicode_code >>> 18) | 0xF0); + utf8_bytes.push(((unicode_code >> 12) & 0x3F) | 0x80); + utf8_bytes.push(((unicode_code >> 6) & 0x3F) | 0x80); + utf8_bytes.push((unicode_code & 0x3F) | 0x80); + } + return utf8_bytes; + } + // "ã‚ã„" => [ 0x3042, 0x3044 ] + function convertStringToUnicodeCodePoints(str) { + var surrogate_1st = 0; + var unicode_codes = []; + for (var i = 0; i < str.length; ++i) { + var utf16_code = str.charCodeAt(i); + if (surrogate_1st != 0) { + if (utf16_code >= 0xDC00 && utf16_code <= 0xDFFF) { + var surrogate_2nd = utf16_code; + var unicode_code = (surrogate_1st - 0xD800) * (1 << 10) + (1 << 16) + + (surrogate_2nd - 0xDC00); + unicode_codes.push(unicode_code); + } else { + // Malformed surrogate pair ignored. + } + surrogate_1st = 0; + } else if (utf16_code >= 0xD800 && utf16_code <= 0xDBFF) { + surrogate_1st = utf16_code; + } else { + unicode_codes.push(utf16_code); + } + } + return unicode_codes; + } + // 0xff => "ff" + // 0xff => "377" + function formatNumber(number, base, num_digits) { + var str = number.toString(base).toUpperCase(); + for (var i = str.length; i < num_digits; ++i) { + str = "0" + str; + } + return str; + } + return { raw: UNICODE_BLOCK_LIST, lookup: UNICODE_LOOKUP, @@ -313,5 +436,6 @@ var unicode = (function(){ block: block, findGroups: findGroups, paginate: paginate, + escapeToEscapedBytes: escapeToEscapedBytes, } })() |
