Groups | Search | Server Info | Keyboard shortcuts | Login | Register [http] [https] [nntp] [nntps]
Groups > comp.lang.python > #197602
| Path | csiph.com!eternal-september.org!feeder.eternal-september.org!nntp.eternal-september.org!eternal-september.org!.POSTED!not-for-mail |
|---|---|
| From | Lawrence D’Oliveiro <ldo@nz.invalid> |
| Newsgroups | comp.lang.python |
| Subject | valid_identifiers |
| Date | Wed, 17 Dec 2025 02:11:27 -0000 (UTC) |
| Organization | A noiseless patient Spider |
| Lines | 100 |
| Message-ID | <10ht3ge$36mjt$1@dont-email.me> (permalink) |
| MIME-Version | 1.0 |
| Content-Type | text/plain; charset=UTF-8 |
| Content-Transfer-Encoding | 8bit |
| Injection-Date | Wed, 17 Dec 2025 02:11:27 +0000 (UTC) |
| Injection-Info | dont-email.me; posting-host="a62dc5638bcf09dbe941e6b59284c084"; logging-data="3365501"; mail-complaints-to="abuse@eternal-september.org"; posting-account="U2FsdGVkX1+50BEvFRd6nDafK79z5uzP" |
| User-Agent | Pan/0.164 (Kupiansk) |
| Cancel-Lock | sha1:jyqdz7wm2eDyuUfbR/Js1SVM3gE= |
| Xref | csiph.com comp.lang.python:197602 |
Show key headers only | View raw
#!/usr/bin/python3
#+
# Which characters are valid in identifiers?
# See details at <https://docs.python.org/3/reference/lexical_analysis.html#identifiers>.
#-
import sys
import unicodedata as ud
class SeqRuns :
def __init__(self, seq) :
runs = []
start = end = None
elts = iter(seq)
while True :
elt = next(elts, None)
if elt == None or start != None and elt != end + 1 :
if start != None :
runs.append((start, end))
#end if
start = None
if elt == None :
break
#end if
if start == None :
start = elt
#end if
end = elt
#end while
self.runs = runs
#end __init__
def nrelts(self) :
return sum(e[1] - e[0] for e in self.runs)
#end nrelts
def __len__(self) :
return len(self.runs)
#end __len__
def __iter__(self) :
return iter(self.runs)
#end __iter__
#end SeqRuns
UNICODE_RANGE = range(sys.maxunicode + 1)
# special cases from <https://www.unicode.org/Public/13.0.0/ucd/PropList.txt>
OTHER_ID_START = {0x1885, 0x1886, 0x2118, 0x212E, 0x309B, 0x309C}
# Other_ID_Start
OTHER_ID_CONTINUE = \
( # Other_ID_Continue
{0x00B7, 0x0387}
|
set(range(0x1369, 0x1371 + 1))
|
{0x19DA}
)
ID_START_EXTRA = {ord("_")}
ID_START = SeqRuns \
(
c for c in UNICODE_RANGE
if
ud.category(chr(c)) in {"Lu", "Ll", "Lt", "Lm", "Lo", "Nl"}
or
c in OTHER_ID_START | ID_START_EXTRA
)
ID_CONTINUE = SeqRuns \
(
c for c in UNICODE_RANGE
if ud.category(chr(c)) in {"Mn", "Mc", "Nd", "Pc"} or c in OTHER_ID_CONTINUE
)
# identifiers are compared according to NFKC normalization
for n, l in \
(
("start", ID_START),
("continue", ID_CONTINUE),
) \
:
sys.stdout.write \
(
"%s[%d]: {%s}\n"
%
(
n,
l.nrelts(),
", ".join
(
(
lambda : "%#04X" % c[0],
lambda : "%#04X..%#04X" % c,
)[c[1] != c[0]]()
for c in l
),
)
)
#end for
Back to comp.lang.python | Previous | Next | Find similar | Unroll thread
valid_identifiers Lawrence D’Oliveiro <ldo@nz.invalid> - 2025-12-17 02:11 +0000
csiph-web