Path: csiph.com!usenet.pasdenom.info!weretis.net!feeder4.news.weretis.net!feeds.phibee-telecom.net!newsfeed.xs4all.nl!newsfeed3.news.xs4all.nl!xs4all!post.news.xs4all.nl!not-for-mail Return-Path: X-Original-To: python-list@python.org Delivered-To: python-list@mail.python.org X-Spam-Status: OK 0.000 X-Spam-Evidence: '*H*': 1.00; '*S*': 0.00; 'else:': 0.03; 'encoding': 0.05; 'subject:Python': 0.06; '"""': 0.07; '-*-': 0.07; 'args': 0.07; 'failing': 0.07; 'filename:fname piece:py': 0.07; 'only,': 0.07; 'sys': 0.07; 'utf-8': 0.07; '128': 0.09; '[],': 0.09; 'ascii': 0.09; 'attributes': 0.09; 'coding:': 0.09; 'encode': 0.09; 'happen.': 0.09; 'iterate': 0.09; 'method,': 0.09; 'msg': 0.09; 'received:212.227.126': 0.09; 'subject:set': 0.09; 'thrown': 0.09; 'python': 0.11; 'def': 0.12; '"""helper': 0.16; '"""tests': 0.16; '%r"': 0.16; "%s'": 0.16; "'''": 0.16; "'b',": 0.16; "'c',": 0.16; "'d',": 0.16; "'e',": 0.16; "'utf8',": 0.16; '()))': 0.16; '**kwargs):': 0.16; '66,': 0.16; "['a',": 0.16; 'basestring):': 0.16; 'classname': 0.16; 'codec': 0.16; 'dict': 0.16; 'differs': 0.16; 'dump': 0.16; 'emit': 0.16; 'expected,': 0.16; 'factory,': 0.16; 'normal,': 0.16; 'ordinal': 0.16; 'reason.': 0.16; 'record,': 0.16; 'set,': 0.16; 'traceback.': 0.16; 'tup': 0.16; 'elements': 0.16; 'basically': 0.19; 'commit': 0.19; '(the': 0.22; 'code,': 0.22; 'import': 0.22; 'header:User-Agent:1': 0.23; 'example.': 0.24; 'refers': 0.24; 'skip:% 10': 0.24; 'skip:l 30': 0.24; 'string,': 0.24; 'unicode': 0.24; 'visible': 0.24; 'non': 0.24; 'question': 0.24; 'logging': 0.26; 'skip:" 40': 0.26; 'values': 0.27; 'record': 0.27; 'tried': 0.27; 'appear': 0.29; 'rest': 0.29; "doesn't": 0.30; 'characters': 0.30; 'see,': 0.30; 'skip:( 20': 0.30; "i'm": 0.30; 'code': 0.31; "skip:' 10": 0.31; 'usually': 0.31; 'assert': 0.31; 'crash': 0.31; 'keys': 0.31; 'file': 0.32; 'class': 0.32; 'compatible': 0.32; 'handled': 0.32; '(most': 0.33; 'not.': 0.33; 'skip:# 10': 0.33; 'skip:_ 10': 0.34; 'could': 0.34; "can't": 0.35; 'classes': 0.35; 'except': 0.35; 'skip:s 30': 0.35; 'case,': 0.35; 'usual': 0.35; 'but': 0.35; 'keyword': 0.36; 'returning': 0.36; 'charset:us-ascii': 0.36; 'thanks': 0.36; 'hi,': 0.36; 'similar': 0.36; 'should': 0.36; 'two': 0.37; 'level': 0.37; 'starting': 0.37; 'being': 0.38; 'represent': 0.38; 'to:addr:python-list': 0.38; 'recent': 0.39; 'does': 0.39; 'quote': 0.39; 'structure': 0.39; 'to:addr:python.org': 0.39; 'called': 0.40; 'skip:u 10': 0.60; 'read': 0.60; 'easy': 0.60; 'logged': 0.60; 'logs': 0.60; 'name': 0.63; 'kind': 0.63; 'finish': 0.65; 'effectively': 0.66; 'default': 0.69; 'records,': 0.69; 'soul': 0.74; 'other.': 0.75; 'behavior': 0.77; 'bla': 0.84; 'subject:being': 0.84; 'dealt': 0.91; 'good,': 0.91; 'involved.': 0.91; 'subject:skip:s 20': 0.91; 'thereafter': 0.93 X-Virus-Scanned: amavisd-new at lisa.loc From: Hans-Peter Jansen To: python-list@python.org Subject: Python 2.7.5: Strange and differing behavior depending on sys.setdefaultencoding being set Date: Tue, 03 Dec 2013 23:32:25 +0100 User-Agent: KMail/4.11.2 (Linux/3.11.6-4-desktop; KDE/4.11.2; x86_64; ; ) MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="nextPart4922744.ChyCnRIKBJ" Content-Transfer-Encoding: 7Bit X-Provags-ID: V02:K0:KkE66U0qRV2bUoJO6jnpwdVA5swXKMuRwCDyKiYsoqN WgmL3z8Dj2JoC6RSPKBOZb0irdYbjje2ZVHAxguTGCZjN7ATBf WzXU+DVjSBlT7JCl8c6Bva4evMcrmoIxTqges0AfF1sJ8vuGl3 8CNB8nZ2S11NUGQnz2B1QKcRFmO9XjrpctLAlIBa1AN2+9y+p1 WcSai3RYjcgA+DyRgT1Br7d4rxLuyqbr+03Ze5ciBGtlXppZV7 LWu8MeXP3S6/eb8/HHGgtutZS3DVRiQDgF+k1QnCsX/UE7PK5G yd4nHUGgTVc1khbQefEYiQkgiuJ3t6XCNvQ1puWzR10Z6bl3Q= = X-BeenThere: python-list@python.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: General discussion list for the Python programming language List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Newsgroups: comp.lang.python Message-ID: Lines: 204 NNTP-Posting-Host: 2001:888:2000:d::a6 X-Trace: 1386109953 news.xs4all.nl 2888 [2001:888:2000:d::a6]:46233 X-Complaints-To: abuse@xs4all.nl Xref: csiph.com comp.lang.python:60966 This is a multi-part message in MIME format. --nextPart4922744.ChyCnRIKBJ Content-Transfer-Encoding: 7Bit Content-Type: text/plain; charset="us-ascii" Hi, I'm experiencing strange behavior with attached code, that differs depending on sys.setdefaultencoding being set or not. If it is set, the code works as expected, if not - what should be the usual case - the code fails with some non-sensible traceback. I tried to boil it down to a comprehensible state, but some LOC are still involved. I use similar code to represent database records, where the repr's appear in user visible logs for example. It is greatly appreciated, if some kind soul could shed some light on this ungrateful behavior. Thanks in advance, Pete --nextPart4922744.ChyCnRIKBJ Content-Disposition: inline; filename="reprtest.py" Content-Transfer-Encoding: quoted-printable Content-Type: text/x-python; charset="UTF-8"; name="reprtest.py" #!/usr/bin/env python # -*- coding: utf-8 -*- u""" # this code will usually crash with python 2.7.5, except some poor soul= has thrown # a file called sitecustomize.py into systems site-packages containing:= import sys sys.setdefaultencoding('utf-8') # question is, why does it crash without defining a default encoding? # these are basically two classes with a __repr__ method, where one cla= ss # refers to the other. On a repr, a two level recursion should happen. # from the log traces, one can see, that in the good case, this is deal= t # well, in the normal case, the recursion isn't handled well: when it # should continue, it restarts the loop for some reason. # with default encoding set (comments starting the #): DEBUG: ['a', 'b', 'c', 'd', 'e', 'f'] # iterate over these el= ements of D2Rec DEBUG: a DEBUG: b DEBUG: c DEBUG: d DEBUG: ['bla', 'ho', 'hu', 'pi', 'tup'] # iterate over these el= ements of D1Rec DEBUG: bla DEBUG: ho DEBUG: hu DEBUG: pi DEBUG: tup # D1Rec exhausted DEBUG: e # continue with rest of= D2Rec DEBUG: f # finish DEBUG: D2Rec( # dump structure a: 1 b: 2 c: 3 d: [D1Rec( bla: [] ho: u'=C3=A4=C3=B6=C3=BC' hu: 'hu' pi: 3.14 tup: () )] e: u'f=C3=BCnf' f: 'sechs' ) # all is good, without (the normal, but failing way): DEBUG: ['a', 'b', 'c', 'd', 'e', 'f'] # iterate over these el= ements of D2Rec DEBUG: a DEBUG: b DEBUG: c DEBUG: d DEBUG: ['bla', 'ho', 'hu', 'pi', 'tup'] # iterate over these el= ements of D1Rec DEBUG: bla DEBUG: ho DEBUG: hu DEBUG: pi DEBUG: tup # D1Rec exhausted DEBUG: ['a', 'b', 'c', 'd', 'e', 'f'] # it should continue wi= th rest of D1Rec DEBUG: a # but restarts the proc= ess for unknown reasons DEBUG: b # effectively processin= g these items twice DEBUG: c # only to commit suicid= e thereafter DEBUG: d DEBUG: ['bla', 'ho', 'hu', 'pi', 'tup'] DEBUG: bla DEBUG: ho DEBUG: hu DEBUG: pi DEBUG: tup Traceback (most recent call last): File "/usr/lib64/python2.7/logging/__init__.py", line 851, in emit msg =3D self.format(record) File "/usr/lib64/python2.7/logging/__init__.py", line 724, in format return fmt.format(record) File "/usr/lib64/python2.7/logging/__init__.py", line 467, in format s =3D self._fmt % record.__dict__ File "reprtest.py", line 80, in __repr__ return u'%s(\n%s\n)' % (self.__class__.__name__, frec(self.__dict__= )) File "reprtest.py", line 66, in frec ret.append(u'%*s: %s' % (maxklen, key, reprstr(rec[key]))) File "reprtest.py", line 53, in reprstr s =3D repr(s) UnicodeEncodeError: 'ascii' codec can't encode characters in position 2= 2-24: ordinal not in range(128) Logged from file reprtest.py, line 108 # Consequently, the traceback doesn't make any sense.. """ import sys import logging logconfig =3D { 'level': logging.DEBUG, 'format': '%(levelname)s: %(message)s', 'encoding': 'utf8', } logging.basicConfig(**logconfig) log =3D logging.getLogger(__name__) def isascii(s): """tests a string, if it can be represented as pure ascii""" return all(ord(c) < 128 for c in s) def reprstr(s): """helper to format values in a python 2 compatible way, using unicode only, where necessary, and quote strings """ if isinstance(s, basestring): if isascii(s): s =3D repr(str(s)) else: assert isinstance(s, unicode), "only unicode for non ascii = strings allowed: %r" % s s =3D "u'%s'" % s.replace("'", "\\'") else: s =3D repr(s) return s def frec(rec): '''format a dict in a easy to read sorted record presentation ''' ret =3D [] keys =3D [key for key in rec] maxklen =3D len(keys) and max([len(key) for key in keys]) or 0 log.debug(sorted(keys)) for key in sorted(keys): log.debug(key) ret.append(u'%*s: %s' % (maxklen, key, reprstr(rec[key]))) return u'\n'.join(ret) def recordfactory(classname, **kwargs): """record factory, returning a class name classname, and keyword args assigned as class members """ class Record(object): """represent a Record, carrying its attributes as class members= """ def __init__(self, **kwargs): self.__dict__.update(kwargs) def __repr__(self): return u'%s(\n%s\n)' % (self.__class__.__name__, frec(self.= __dict__)) record =3D Record(**kwargs) record.__class__.__name__ =3D classname return record d1rec =3D recordfactory('D1Rec', **dict(hu =3D 'hu', ho =3D u'=C3=A4=C3= =B6=C3=BC', pi =3D 3.14, bla =3D [], tup =3D ())) d2rec =3D recordfactory('D2Rec', **dict(a =3D 1, b =3D 2, c =3D 3, d =3D= [d1rec], e =3D u'f=C3=BCnf', f =3D 'sechs')) log.debug(d2rec) --nextPart4922744.ChyCnRIKBJ--