Path: csiph.com!v102.xanadu-bbs.net!xanadu-bbs.net!news.glorb.com!news-out.readnews.com!transit4.readnews.com!news-out.news.tds.net!newsreading01.news.tds.net!53ab2750!not-for-mail From: "Arne Vajhøj" Subject: Re: retriving escape unicode sequences from files ... Message-ID: <50254C51.56579.calajapr@time.synchro.net> X-Comment-To: qwertmonkey Newsgroups: comp.lang.java.programmer X-FTN-AREA: COMP.LANG.JAVA.PROGRAMMER X-FTN-MSGID: 1:261/38 35673a7c Content-Type: text/plain; charset=IBM437 Content-Transfer-Encoding: 8bit X-Gateway: time.synchro.net [Synchronet 3.16a-Win32 NewsLink 1.98] Lines: 132 Date: Fri, 10 Aug 2012 18:39:02 GMT NNTP-Posting-Host: 69.21.70.65 X-Complaints-To: news@tds.net X-Trace: newsreading01.news.tds.net 1344623942 69.21.70.65 (Fri, 10 Aug 2012 13:39:02 CDT) NNTP-Posting-Date: Fri, 10 Aug 2012 13:39:02 CDT Organization: tds.net Xref: csiph.com comp.lang.java.programmer:17635 To: qwertmonkey From: "Arne Vajhoj" To: qwertmonkey From: "Arne Vajhoj" To: qwertmonkey From: "Arne Vajhoj" To: qwertmonkey From: Arne Vajhoj On 8/4/2012 1:47 PM, qwertmonkey@syberianoutpost.ru wrote: > ~ > I would use your pattern matcher but instead of > "Character.toString((char)Integer.parseInt" ... stuff, I would use a look-up > table > ~ You could. But I am not sure that it is practical. > Here is the outline of my code: > ~ > // __ > private HashMap HMHex2Int; > // __ > private final String aRegXPtrn = "\\\\u([0-9a-f]{4})"; > private final Pattern UKdRegX = Pattern.compile(aRegXPtrn, > Pattern.CASE_INSENSITIVE); > // __ > private final String[] aHex2ByteTbl = new String[]{ > "00", "01", "02", "03", "04", "05", "06", "07", > "08", "09", "0a", "0b", "0c", "0d", "0e", "0f", > "10", "11", "12", "13", "14", "15", "16", "17", > "18", "19", "1a", "1b", "1c", "1d", "1e", "1f", > "20", "21", "22", "23", "24", "25", "26", "27", > "28", "29", "2a", "2b", "2c", "2d", "2e", "2f", > "30", "31", "32", "33", "34", "35", "36", "37", > "38", "39", "3a", "3b", "3c", "3d", "3e", "3f", > "40", "41", "42", "43", "44", "45", "46", "47", > "48", "49", "4a", "4b", "4c", "4d", "4e", "4f", > "50", "51", "52", "53", "54", "55", "56", "57", > "58", "59", "5a", "5b", "5c", "5d", "5e", "5f", > "60", "61", "62", "63", "64", "65", "66", "67", > "68", "69", "6a", "6b", "6c", "6d", "6e", "6f", > "70", "71", "72", "73", "74", "75", "76", "77", > "78", "79", "7a", "7b", "7c", "7d", "7e", "7f", > "80", "81", "82", "83", "84", "85", "86", "87", > "88", "89", "8a", "8b", "8c", "8d", "8e", "8f", > "90", "91", "92", "93", "94", "95", "96", "97", > "98", "99", "9a", "9b", "9c", "9d", "9e", "9f", > "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", > "a8", "a9", "aa", "ab", "ac", "ad", "ae", "af", > "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", > "b8", "b9", "ba", "bb", "bc", "bd", "be", "bf", > "c0", "c1", "c2", "c3", "c4", "c5", "c6", "c7", > "c8", "c9", "ca", "cb", "cc", "cd", "ce", "cf", > "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", > "d8", "d9", "da", "db", "dc", "dd", "de", "df", > "e0", "e1", "e2", "e3", "e4", "e5", "e6", "e7", > "e8", "e9", "ea", "eb", "ec", "ed", "ee", "ef", > "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", > "f8", "f9", "fa", "fb", "fc", "fd", "fe", "ff" > }; > ~ > // __ ctor > ~ > HMHex2Int = new HashMap(); > for(int i = 0; (i < aHex2ByteTbl.length); ++i){ HMHex2Int.put(aHex2ByteTbl[i], HMHex2Int.size()); } > ~ > then: > ~ > // __ converts from \(\)u#### (front slash u sequences not turn to strings by the compiler) to unikd > public String unescapeHex2String(String aFSU) throws UnsupportedEncodingException{ > StringBuilder aBldr = null; > // __ > int iFSUL; > if((aFSU != null) && ((iFSUL = aFSU.length()) > 0)){ > int[] iHex = new int[2]; > int iHexArL = iHex.length; > String aUKdS; > aBldr = new StringBuilder(); > // __ > Matcher UKdRegXMtx = UKdRegX.matcher(aFSU); > // __ > while (UKdRegXMtx.find()){ > aUKdS = aFSU.substring((UKdRegXMtx.start() + 2), UKdRegXMtx.end()); > // __ > for(int j = 0; (j < iHexArL); ++j){ iHex[j] = HMHex2Byte.get(aUKdS.substring(2*j, 2*(j + 1)).toLowerCase()).intValue(); }// j [0, iHexArL) > // __ > aBldr.append((char)(16*iHex[0] + iHex[1])); > } > }// ((aFSU != null) && ((iFSUL = aFSU.length()) > 0)) > // __ > return(aBldr.toString()); > } > ~ But: 1) the code is difficult to read 2) HMHex2Byte is not declared - it probably is HMHex2Int 3) it seems as if you lookup 4 bit values in an 8 bit table?? 4) the code does not handle code points >255 Arne -+- BBBS/Li6 v4.10 Dada-1 + Origin: Prism bbs (1:261/38) -+- Synchronet 3.16a-Win32 NewsLink 1.98 Time Warp of the Future BBS - telnet://time.synchro.net:24 -+- BBBS/Li6 v4.10 Dada-1 + Origin: Prism bbs (1:261/38) -+- Synchronet 3.16a-Win32 NewsLink 1.98 Time Warp of the Future BBS - telnet://time.synchro.net:24 -+- BBBS/Li6 v4.10 Dada-1 + Origin: Prism bbs (1:261/38) -+- Synchronet 3.16a-Win32 NewsLink 1.98 Time Warp of the Future BBS - telnet://time.synchro.net:24 --- BBBS/Li6 v4.10 Dada-1 * Origin: Prism bbs (1:261/38) --- Synchronet 3.16a-Win32 NewsLink 1.98 Time Warp of the Future BBS - telnet://time.synchro.net:24