Path: csiph.com!newsfeed.hal-mli.net!feeder3.hal-mli.net!newsfeed.hal-mli.net!feeder2.hal-mli.net!newsfeed.xs4all.nl!newsfeed2.news.xs4all.nl!xs4all!post.news.xs4all.nl!not-for-mail Return-Path: X-Original-To: python-list@python.org Delivered-To: python-list@mail.python.org X-Spam-Status: OK 0.001 X-Spam-Evidence: '*H*': 1.00; '*S*': 0.00; '"""': 0.05; '%s"': 0.07; 'inserts': 0.07; 'params': 0.07; 'try:': 0.07; 'utf-8': 0.07; 'python': 0.09; 'cursor': 0.09; 'name)': 0.09; 'name):': 0.09; 'spec': 0.09; 'url:%s': 0.09; 'def': 0.10; '"""gets': 0.16; '"insert': 0.16; 'description)': 0.16; 'guys,': 0.16; 'spec)': 0.16; 'spec,': 0.16; 'subject:Unicode': 0.16; 'url:mi': 0.16; 'unicode': 0.17; 'working.': 0.17; 'insert': 0.23; 'linux': 0.24; 'tried': 0.25; 'values': 0.26; 'message-id:@mail.gmail.com': 0.27; "i'm": 0.29; 'error': 0.30; 'could': 0.32; 'skip:s 30': 0.33; 'to:addr:python-list': 0.33; 'received:google.com': 0.34; 'thanks': 0.34; 'received:209.85': 0.35; 'except': 0.36; 'skip:u 20': 0.36; 'but': 0.36; 'skip:g 30': 0.36; 'skip:m 40': 0.36; 'skip:p 20': 0.36; 'correctly': 0.37; 'received:209': 0.37; 'data': 0.37; 'some': 0.38; 'things': 0.38; 'description': 0.39; 'to:addr:python.org': 0.39; 'header:Received:5': 0.40; 'help': 0.40; 'skip:u 10': 0.60; 'url:index': 0.61; 'dont': 0.64; 'french': 0.64; 'url:cgi': 0.65; 'opener': 0.84; 'url:lang': 0.84; 'url:biz': 0.91; 'url:fr': 0.95 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=mime-version:date:message-id:subject:from:to:content-type; bh=JwSCsRrTI7zc3f6vFBw3h0Ia9gFKTIkgOGe5tMJiuW4=; b=WDRiKLNzup82eEdqLD+6BZ4CTEzU+ssFWXkqWF5qhhy1ZWT2ugeN3GlGnuk93Knjo8 jgKHjjz4MinCYzvwHa00mCe1xFfxGnWO/+b/WasEd8hXvd3hCxXmqql1y9tHhdU+ATiY Zmglkql2kCDRT7PNOsrCqggNVFJDdWZWOOmG05c5V0dAO++BbYej2d9ir/utzQ/PVdX0 WllT5XzykUWpEQy1xsbFV3PINr45Fh64S7Jv6D5UMt7CWRHcYZPQLXFNNG0n7m5j8v6l 9e/3OiyjP1EaGBOxqAVoeIdgkDn57p7EZjar3mqwzXPJLFKde7xD3W6ldvCd/kb51wYi X4yQ== MIME-Version: 1.0 Date: Sun, 16 Dec 2012 22:10:37 +0100 Subject: Unicode From: Anatoli Hristov To: python-list@python.org Content-Type: text/plain; charset=UTF-8 X-BeenThere: python-list@python.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: General discussion list for the Python programming language List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Newsgroups: comp.lang.python Message-ID: Lines: 73 NNTP-Posting-Host: 2001:888:2000:d::a6 X-Trace: 1355692240 news.xs4all.nl 6884 [2001:888:2000:d::a6]:38376 X-Complaints-To: abuse@xs4all.nl Xref: csiph.com comp.lang.python:34949 Hello guys, I'm using Linux CentOS and Python 2.4 with MySQL 5.xx, I get error with Unicode I tried many things that I found on the net but none of them working. If I dont use UTF-8 it inserts the data into the DB but some French char. are not correctly decoded. Could you please help me ? Thanks def PrepareSpecs(product_id, icecat_prod_id, icecat_image_url, name): """Gets the specifications of a product from Icecat.biz and insert them into the DB """ specs = {3:GetSpecsNL(icecat_prod_id),2:GetSpecsFR(icecat_prod_id).decode('utf-8'),1:GetSpecsEN(icecat_prod_id)} SpecsToSQL(product_id,specs,name) CategorySQL(product_id) StoreSQL(product_id) GetIMG(icecat_image_url,icecat_prod_id) return def GetSpecsFR(icecat_prod_id): opener = urllib.FancyURLopener({}) ffr = opener.open("http://prf.icecat.biz/index.cgi?product_id=%s;mi=start;smi=product;shopname=openICEcat-url;lang=fr" % icecat_prod_id) specsfr = ffr.read() #specsfr = specsfr.decode('utf-8') specsfr = RemoveHTML(specsfr) ##specsfr = "%r" % specsfr ## if specsfr: ## try: ## specsfr = str(specsfr) ## except UnicodeEncodeError: ## specsfr = str(specsfr.encode('utf-16')) return specsfr def RemoveHTML(specs): specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("","") specs = specs.replace("

","") specs = specs.replace("

","") return specs def SpecsToSQL(product_id, specs, name): for lang, spec in specs.iteritems(): InsertSpecsDB(product_id, spec, lang, name) return def InsertSpecsDB(product_id, spec, name, lang): db = MySQLdb.connect("localhost","getit","opencart") cursor = db.cursor() sql = "INSERT INTO product_description (product_id, language_id, name, description) VALUES (%s,%s,%s,%s)" params = (product_id, lang, name, spec) cursor.execute(sql, params) id = cursor.lastrowid print"Updated ID %s description %s" %(int(id), lang) return