Path: csiph.com!newsfeed.hal-mli.net!feeder3.hal-mli.net!newsfeed.hal-mli.net!feeder1.hal-mli.net!news.tele.dk!feed118.news.tele.dk!news.tele.dk!small.news.tele.dk!newsgate.cistron.nl!newsgate.news.xs4all.nl!post.news.xs4all.nl!not-for-mail Return-Path: X-Original-To: python-list@python.org Delivered-To: python-list@mail.python.org X-Spam-Status: OK 0.013 X-Spam-Evidence: '*H*': 0.97; '*S*': 0.00; 'received:209.85.223': 0.03; 'ascii': 0.07; 'skip:/ 10': 0.07; 'sub': 0.09; 'subject:method': 0.09; 'to:addr:comp.lang.python': 0.09; 'cc:addr :python-list': 0.10; 'increment': 0.16; 'printf': 0.16; 'received:209.85.223.185': 0.16; 'skip:@ 20': 0.16; 'workspace': 0.16; 'wrote:': 0.17; 'subject:page': 0.17; 'skip:% 10': 0.22; 'cc:2**0': 0.23; 'cc:no real name:2**0': 0.24; 'cc:addr:python.org': 0.25; 'header:In-Reply-To:1': 0.25; 'header :User-Agent:1': 0.26; 'values': 0.26; 'right.': 0.27; 'chris': 0.28; 'cat': 0.29; 'hash': 0.29; 'perl': 0.29; 'character': 0.29; 'that.': 0.30; 'push': 0.30; 'gets': 0.32; 'asking': 0.32; 'subject: .': 0.33; 'received:google.com': 0.34; 'received:209.85': 0.35; 'something': 0.35; 'there': 0.35; 'add': 0.36; 'method': 0.36; 'enough': 0.36; 'received:209': 0.37; 'subject:: ': 0.38; 'store': 0.38; 'save': 0.61; 'map': 0.61; 'different': 0.63; 'our': 0.65; '8bit%:100': 0.70; '8bit%:92': 0.70; '2013': 0.84; 'increasingly': 0.84; 'otten': 0.84; 'subject:Using': 0.84; 'wrong...': 0.84 X-Received: by 10.50.242.3 with SMTP id wm3mr91082igc.2.1358925946389; Tue, 22 Jan 2013 23:25:46 -0800 (PST) Newsgroups: comp.lang.python Date: Tue, 22 Jan 2013 23:25:45 -0800 (PST) In-Reply-To: Complaints-To: groups-abuse@google.com Injection-Info: glegroupsg2000goo.googlegroups.com; posting-host=94.68.70.179; posting-account=DYJQ-woAAACEPH85Au2BhUVfFTfSfVa4 References: <50fe787e$0$30003$c3e8da3$5496439d@news.astraweb.com> <50fe8e69$0$30003$c3e8da3$5496439d@news.astraweb.com> <0459659d-4ec2-4c7d-bee3-b4e363c916dd@googlegroups.com> <12a22c5b-88a9-4577-a642-abe1e56cce5e@googlegroups.com> <8ad4a124-37a8-41fc-938d-9535b8affcbf@googlegroups.com> User-Agent: G2/1.0 X-Google-Web-Client: true X-Google-IP: 94.68.70.179 MIME-Version: 1.0 Subject: Re: Using filepath method to identify an .html page From: Ferrous Cranus To: comp.lang.python@googlegroups.com Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Cc: python-list@python.org X-BeenThere: python-list@python.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: General discussion list for the Python programming language List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Message-ID: Lines: 175 NNTP-Posting-Host: 2001:888:2000:d::a6 X-Trace: 1358925949 news.xs4all.nl 6862 [2001:888:2000:d::a6]:46799 X-Complaints-To: abuse@xs4all.nl Xref: csiph.com comp.lang.python:37421 =CE=A4=CE=B7 =CE=A4=CF=81=CE=AF=CF=84=CE=B7, 22 =CE=99=CE=B1=CE=BD=CE=BF=CF= =85=CE=B1=CF=81=CE=AF=CE=BF=CF=85 2013 9:16:34 =CE=BC.=CE=BC. UTC+2, =CE=BF= =CF=87=CF=81=CE=AE=CF=83=CF=84=CE=B7=CF=82 Peter Otten =CE=AD=CE=B3=CF=81= =CE=B1=CF=88=CE=B5: > Ferrous Cranus wrote: >=20 >=20 >=20 > > =CE=A4=CE=B7 =CE=A4=CF=81=CE=AF=CF=84=CE=B7, 22 =CE=99=CE=B1=CE=BD=CE= =BF=CF=85=CE=B1=CF=81=CE=AF=CE=BF=CF=85 2013 6:11:20 =CE=BC.=CE=BC. UTC+2, = =CE=BF =CF=87=CF=81=CE=AE=CF=83=CF=84=CE=B7=CF=82 Chris Angelico >=20 > > =CE=AD=CE=B3=CF=81=CE=B1=CF=88=CE=B5: >=20 >=20 >=20 > >> all of it. You are asking something that is fundamentally >=20 > >> impossible[1]. There simply are not enough numbers to go around. >=20 >=20 >=20 > > Fundamentally impossible? >=20 > >=20 >=20 > > Well.... >=20 > >=20 >=20 > > OK: How about this in Perl: >=20 > >=20 >=20 > > $ cat testMD5.pl >=20 > > use strict; >=20 > >=20 >=20 > > foreach my $url(qw@ /index.html /about/time.html @){ >=20 > > hashit($url); >=20 > > } >=20 > >=20 >=20 > > sub hashit { >=20 > > my $url=3Dshift; >=20 > > my @ltrs=3Dsplit(//,$url); >=20 > > my $hash =3D 0; >=20 > >=20 >=20 > > foreach my $ltr(@ltrs){ >=20 > > $hash =3D ( $hash + ord($ltr)) %10000; >=20 > > } >=20 > > printf "%s: %0.4d\n",$url,$hash >=20 > > =20 >=20 > > } >=20 > >=20 >=20 > >=20 >=20 > > which yields: >=20 > > $ perl testMD5.pl >=20 > > /index.html: 1066 >=20 > > /about/time.html: 1547 >=20 >=20 >=20 > $ cat clashes.pl=20 >=20 > use strict; >=20 >=20 >=20 > foreach my $url(qw@=20 >=20 > /public/fails.html >=20 > /large/cannot.html >=20 > /number/being.html >=20 > /hope/already.html >=20 > /being/really.html >=20 > /index/breath.html >=20 > /can/although.html >=20 > @){ >=20 > hashit($url); >=20 > } >=20 >=20 >=20 > sub hashit { >=20 > my $url=3Dshift; >=20 > my @ltrs=3Dsplit(//,$url); >=20 > my $hash =3D 0; >=20 >=20 >=20 > foreach my $ltr(@ltrs){ >=20 > $hash =3D ( $hash + ord($ltr)) %10000; >=20 > } >=20 > printf "%s: %0.4d\n",$url,$hash >=20 > =20 >=20 > } >=20 > $ perl clashes.pl=20 >=20 > /public/fails.html: 1743 >=20 > /large/cannot.html: 1743 >=20 > /number/being.html: 1743 >=20 > /hope/already.html: 1743 >=20 > /being/really.html: 1743 >=20 > /index/breath.html: 1743 >=20 > /can/although.html: 1743 >=20 >=20 >=20 > Hm, I must be holding it wrong... my @i =3D split(//,$url); # put each letter in it's own bin my $j=3D0; # Initailize our=20 my $k=3D1; # hashing increment values my @m=3D(); # workspace foreach my $n(@i){ my $q=3Dord($n); # ASCII for character $k +=3D $j; # Increment our hash offset $q +=3D $k; # add our "old" value $j =3D $k; # store that.=20 push @m,$q; # save the offsetted value=20 } =20 my $hashval=3D0; #initialize our hash value # Generate that map { $hashval =3D ($hashval + $_) % 10000} @m; Using that method ABC.html and CBA.html now have different values because e= ach letter position's value gets bumped up increasingly from left to right.