Path: csiph.com!usenet.pasdenom.info!aioe.org!.POSTED!not-for-mail From: qwertmonkey@syberianoutpost.ru Newsgroups: comp.lang.java.programmer Subject: retriving escape unicode sequences from files ... Date: Sat, 4 Aug 2012 17:47:23 +0000 (UTC) Organization: Aioe.org NNTP Server Lines: 84 Message-ID: NNTP-Posting-Host: z9Rq+Ge+SLJPEkk9TGcnaw.user.speranza.aioe.org X-Complaints-To: abuse@aioe.org X-Notice: Filtered by postfilter v. 0.8.2 X-Newsreader: NetComponents Xref: csiph.com comp.lang.java.programmer:17117 Arne, ~ I would use your pattern matcher but instead of "Character.toString((char)Integer.parseInt" ... stuff, I would use a look-up table ~ Here is the outline of my code: ~ // __ private HashMap HMHex2Int; // __ private final String aRegXPtrn = "\\\\u([0-9a-f]{4})"; private final Pattern UKdRegX = Pattern.compile(aRegXPtrn, Pattern.CASE_INSENSITIVE); // __ private final String[] aHex2ByteTbl = new String[]{ "00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "0a", "0b", "0c", "0d", "0e", "0f", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "1a", "1b", "1c", "1d", "1e", "1f", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "2a", "2b", "2c", "2d", "2e", "2f", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "3a", "3b", "3c", "3d", "3e", "3f", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "4a", "4b", "4c", "4d", "4e", "4f", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "5a", "5b", "5c", "5d", "5e", "5f", "60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "6a", "6b", "6c", "6d", "6e", "6f", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "7a", "7b", "7c", "7d", "7e", "7f", "80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "8a", "8b", "8c", "8d", "8e", "8f", "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "9a", "9b", "9c", "9d", "9e", "9f", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "aa", "ab", "ac", "ad", "ae", "af", "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", "b8", "b9", "ba", "bb", "bc", "bd", "be", "bf", "c0", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "c8", "c9", "ca", "cb", "cc", "cd", "ce", "cf", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "da", "db", "dc", "dd", "de", "df", "e0", "e1", "e2", "e3", "e4", "e5", "e6", "e7", "e8", "e9", "ea", "eb", "ec", "ed", "ee", "ef", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "fa", "fb", "fc", "fd", "fe", "ff" }; ~ // __ ctor ~ HMHex2Int = new HashMap(); for(int i = 0; (i < aHex2ByteTbl.length); ++i){ HMHex2Int.put(aHex2ByteTbl[i], HMHex2Int.size()); } ~ then: ~ // __ converts from \(\)u#### (front slash u sequences not turn to strings by the compiler) to unikd public String unescapeHex2String(String aFSU) throws UnsupportedEncodingException{ StringBuilder aBldr = null; // __ int iFSUL; if((aFSU != null) && ((iFSUL = aFSU.length()) > 0)){ int[] iHex = new int[2]; int iHexArL = iHex.length; String aUKdS; aBldr = new StringBuilder(); // __ Matcher UKdRegXMtx = UKdRegX.matcher(aFSU); // __ while (UKdRegXMtx.find()){ aUKdS = aFSU.substring((UKdRegXMtx.start() + 2), UKdRegXMtx.end()); // __ for(int j = 0; (j < iHexArL); ++j){ iHex[j] = HMHex2Byte.get(aUKdS.substring(2*j, 2*(j + 1)).toLowerCase()).intValue(); }// j [0, iHexArL) // __ aBldr.append((char)(16*iHex[0] + iHex[1])); } }// ((aFSU != null) && ((iFSUL = aFSU.length()) > 0)) // __ return(aBldr.toString()); } ~ lbrtchx