Path: csiph.com!x330-a1.tempe.blueboxinc.net!usenet.pasdenom.info!gegeweb.org!de-l.enfer-du-nord.net!feeder1.enfer-du-nord.net!newsfeed.eweka.nl!eweka.nl!feeder3.eweka.nl!newsfeed.xs4all.nl!newsfeed5.news.xs4all.nl!xs4all!post.news.xs4all.nl!not-for-mail Return-Path: X-Original-To: python-list@python.org Delivered-To: python-list@mail.python.org X-Spam-Status: OK 0.000 X-Spam-Evidence: '*H*': 1.00; '*S*': 0.00; 'url:sourceforge': 0.02; '(using': 0.05; '3.2': 0.05; 'sys': 0.05; 'python': 0.08; '(it': 0.09; '__name__': 0.09; 'collections': 0.09; 'filename': 0.09; 'ioerror:': 0.09; 'newest': 0.09; 'utf-8': 0.09; 'def': 0.12; 'win32': 0.12; 'received:209.85.214.174': 0.14; 'received:mail- iw0-f174.google.com': 0.14; "'__main__':": 0.16; "'w')": 0.16; '-*-': 0.16; 'code),': 0.16; 'coding:': 0.16; 'confused.': 0.16; 'except:': 0.16; 'indexerror:': 0.16; 'self.url': 0.16; 'url:revision': 0.16; 'compiled': 0.17; 'subject:problem': 0.22; 'reason,': 0.23; 'load': 0.24; "doesn't": 0.25; 'match': 0.26; "i'm": 0.27; 'raise': 0.28; 'keeps': 0.28; 'received:209.85.214': 0.28; 'character': 0.29; 'import': 0.29; 'matches': 0.29; 'class': 0.29; 'version': 0.29; 'code,': 0.29; 'config': 0.30; 'queue': 0.30; 'supposed': 0.31; "skip:' 10": 0.32; 'determined': 0.32; 'to:addr:python-list': 0.33; 'page.': 0.33; '[1]': 0.34; 'characters': 0.34; 'there': 0.35; 'header:User-Agent:1': 0.35; 'fails': 0.35; 'file:': 0.35; 'try:': 0.35; 'using': 0.35; 'message-id:@gmail.com': 0.36; 'skip:o 20': 0.37; 'received:google.com': 0.37; 'received:209.85': 0.37; 'logging': 0.37; 'skip:e 20': 0.37; 'case': 0.37; 'could': 0.38; 'but': 0.38; 'received:192': 0.38; 'skip:s 20': 0.39; 'should': 0.39; 'received:209': 0.39; 'subject:with': 0.39; 'add': 0.39; 'to:addr:python.org': 0.39; 'really': 0.40; 'received:192.168.1': 0.40; 'url:net': 0.63; 'webpage': 0.67; 'url:php': 0.81; '1995': 0.84; 'exc:': 0.84; 'skip:q 30': 0.84; 'url:%0a': 0.91; 'url:exe': 0.91 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:message-id:date:from:user-agent:mime-version:to :subject:x-enigmail-version:content-type:content-transfer-encoding; bh=sUJoXbZVaQ3ji1sI0flUgK6UUsXbKr7ZBHBcFP9g6Ig=; b=NbxdE01l5S/klIzOu5Us4cZx4P7Owg+0QpF+bvICmzxsoNHy8TjU3gjPJfzTaWatyx aQoD8TS3QAiRxY4YT2clFDmQzindPHqzKTfsFOHD/sNSyv7BOc3WkBAqi/QzM+52m6Sd 4nMvRYQ9Bvwg0+rAokvxb9Qyl/4m+a7fkBnjw= DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=message-id:date:from:user-agent:mime-version:to:subject :x-enigmail-version:content-type:content-transfer-encoding; b=enmMJVEJHRUA7UeOo1tjpZTDRM6jjAoqTSgagMyy7M4HiWjoI3XzY1jTOGTj9++ZfW zfpsmfdiacbQoF6cB/2uHEKTFlef8wxYOLo81pFwwo0kJW/9x2MDrns9tmGTDuosu9MI xLe2cFmaUO9c2bKuLPl0FWiXPNv20FHknHTQI= Date: Sun, 29 May 2011 06:45:30 -0500 From: Andrew Berg User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.15) Gecko/20110303 Thunderbird/3.1.9 ThunderBrowse/3.3.5 MIME-Version: 1.0 To: python-list@python.org Subject: Weird problem matching with REs X-Enigmail-Version: 1.1.1 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit X-BeenThere: python-list@python.org X-Mailman-Version: 2.1.12 Precedence: list List-Id: General discussion list for the Python programming language List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Newsgroups: comp.lang.python Message-ID: Lines: 116 NNTP-Posting-Host: 82.94.164.166 X-Trace: 1306669538 news.xs4all.nl 49178 [::ffff:82.94.164.166]:55845 X-Complaints-To: abuse@xs4all.nl Xref: x330-a1.tempe.blueboxinc.net comp.lang.python:6529 I have an RE that should work (it even works in Kodos [1], but not in my code), but it keeps failing to match characters after a newline. I'm writing a little program that scans the webpage of an arbitrary application and gets the newest version advertised on the page. test3.py: > # -*- coding: utf-8 -*- > > import configparser > import re > import urllib.request > import os > import sys > import logging > import collections > > > class CouldNotFindVersion(Exception): > def __init__(self, app_name, reason, exc_value): > self.value = 'The latest version of ' + app_name + ' could not > be determined because ' + reason > self.cause = exc_value > def __str__(self): > return repr(self.value) > > class AppUpdateItem(): > def __init__(self, config_file_name, config_file_section): > self.section = config_file_section > self.name = self.section['Name'] > self.url = self.section['URL'] > self.filename = self.section['Filename'] > self.file_re = re.compile(self.section['FileURLRegex']) > self.ver_re = re.compile(self.section['VersionRegex']) > self.prev_ver = self.section['CurrentVersion'] > try: > self.page = str(urllib.request.urlopen(self.url).read(), > encoding='utf-8') > self.file_URL = self.file_re.findall(self.page)[0] #here > is where it fails > self.last_ver = self.ver_re.findall(self.file_URL)[0] > except urllib.error.URLError: > self.error = str(sys.exc_info()[1]) > logging.info('[' + self.name + ']' + ' Could not load URL: > ' + self.url + ' : ' + self.error) > self.success = False > raise CouldNotFindVersion(self.name, self.error, > sys.exc_info()[0]) > except IndexError: > logging.warning('Regex did not return a match.') > def update_ini(self): > self.section['CurrentVersion'] = self.last_ver > with open(config_file_name, 'w') as configfile: > config.write(configfile) > def rollback_ini(self): > self.section['CurrentVersion'] = self.prev_ver > with open(config_file_name, 'w') as configfile: > config.write(configfile) > def download_file(self): > self.__filename = self.section['Filename'] > with open(self.__filename, 'wb') as file: > self.__file_req = urllib.request.urlopen(self.file_URL).read() > file.write(self.__file_req) > > > if __name__ == '__main__': > config = configparser.ConfigParser() > config_file = 'checklist.ini' > config.read(config_file) > queue = collections.deque() > for section in config.sections(): > try: > queue.append(AppUpdateItem(config_file, config[section])) > except CouldNotFindVersion as exc: > logging.warning(exc.value) > for elem in queue: > if elem.last_ver != elem.prev_ver: > elem.update_ini() > try: > elem.download_file() > except IOError: > logging.warning('[' + elem.name + '] Download failed.') > except: > elem.rollback_ini() > print(elem.name + ' succeeded.') checklist.ini: > [x264_64] > name = x264 (64-bit) > filename = x264.exe > url = http://x264.nl/x264_main.php > fileurlregex = > http://x264.nl/x264/64bit/8bit_depth/revision\n{0,3}[0-9]{4}\n{0,3}/x264\n{0,3}.exe > versionregex = [0-9]{4} > currentversion = 1995 The part it's supposed to match in http://x264.nl/x264_main.php: >