import io import html import token import tokenize import keyword import builtins import re import sys PALETTE = { 'kw': 'text-pink-400', 'builtin': 'text-violet-300', 'name': 'text-sky-300', 'func': 'text-sky-300', 'attr': 'text-green-300', 'str': 'text-amber-300', 'num': 'text-cyan-300', 'op': 'text-red-400', 'punct': 'text-red-400', 'cmt': 'text-gray-500 italic', 'err': 'bg-red-900 text-red-300', 'ht_tag': 'text-fuchsia-300', 'ht_attr': 'text-green-300', 'ht_eq': 'text-red-400', 'ht_str': 'text-amber-300', 'ht_comment': 'text-gray-500 italic', 'ht_text': 'text-slate-300' } _builtin_names = set(dir(builtins)) def esc(s): return html.escape(s).replace(' ', ' ').replace('\t', ' '*4) def py_highlight(src): out = [] prev = None gen = tokenize.generate_tokens(io.StringIO(src).readline) for ttype, val, *_ in gen: cls = '' if ttype == token.NAME: if keyword.iskeyword(val): cls = 'kw' elif prev and prev[0] == token.OP and prev[1] == '.': cls = 'attr' elif val in _builtin_names: cls = 'builtin' else: cls = 'name' elif ttype == token.OP: cls = 'op' if re.match(r'[+\-*/%=<>!^|&~]', val) else 'punct' elif ttype == token.STRING: cls = 'str' elif ttype == token.NUMBER: cls = 'num' elif ttype == token.COMMENT: cls = 'cmt' elif ttype == token.ERRORTOKEN: cls = 'err' prev = (ttype, val) piece = esc(val).replace('\n', '\n') # keep newlines out.append( f'{piece}' if cls else piece) return ''.join(out) # HTML regexes FULL_TAG = r'<\s*/?\s*[A-Za-z0-9:-]+[!?]?(?:\s[^<>]*?)?>' HT_RE = re.compile(r'(?s)()|(' + FULL_TAG + r')|([^<]+)') TAG_NAME_RE = re.compile(r'(?s)(<\s*/?\s*)([A-Za-z0-9:-]+[!?]?)(.*?)(>)') ATTR_RE = re.compile(r'([A-Za-z0-9:-]+)(\s*=\s*)?', re.S) QSTR_RE = re.compile(r'(".*?"|\'.*?\')', re.S) PY_OPEN = re.compile(r'(?i)^<\s*([A-Za-z0-9:-]+[!?]?)') # capture tag name PY_NAMES = {'py!', 'p?'} def process_fulltag(fulltag): m = TAG_NAME_RE.match(fulltag) if not m: return f'{esc(fulltag)}' pre, name, rest, gt = m.groups() out = [esc(pre), f'{esc(name)}'] if rest: i = 0 while i < len(rest): ma = ATTR_RE.match(rest, i) if ma: an, eq = ma.groups() out.append( f'{esc(an)}') if eq: out.append(esc(eq)) i = ma.end() continue mq = QSTR_RE.match(rest, i) if mq: q = mq.group(1) out.append( f'{esc(q)}') i = mq.end() continue out.append(esc(rest[i])) i += 1 out.append(esc(gt)) return ''.join(out) def _html_tokens(src): out = [] i = 0 L = len(src) # iterate through matches but manage consumption manually to support multi-line py containers for m in HT_RE.finditer(src): if m.start() < i: continue com, fulltag, text = m.groups() if com: out.append( f'{esc(com)}') i = m.end() continue if fulltag: # detect tag name mt = TAG_NAME_RE.match(fulltag) name = mt.group(2) if mt else '' lname = name.lower() is_open = not re.match(r'<\s*/', fulltag) # if opening py-container, find the matching closing tag (first occurrence) if lname in PY_NAMES and is_open: out.append(process_fulltag(fulltag)) # opening # closing tag pattern, case-insensitive close_re = re.compile(rf'(?i)\s*{re.escape(name)}\s*>') mclose = close_re.search(src, m.end()) if mclose: inner = src[m.end():mclose.start()] out.append(py_highlight(inner)) out.append(process_fulltag(mclose.group(0))) # closing i = mclose.end() # continue scanning after closing continue else: # no closing found: just output opening and continue i = m.end() continue else: out.append(process_fulltag(fulltag)) i = m.end() continue if text: out.append( f'{esc(text)}') i = m.end() return ''.join(out) def highlight(src, lang='py'): return _html_tokens(src) if lang and lang.lower().startswith('h') else py_highlight(src) def code_comp(code, lang='py'): code = code.split('\n') lines = '\n'.join( [f'{highlight(code_line, lang)}' for line, code_line in enumerate(code, 1)]) return f'''