PyWPR/www/docs/components/code.py

import io
import html
import token
import tokenize
import keyword
import builtins
import re
import sys

PALETTE = {
    'kw': 'text-pink-400', 'builtin': 'text-violet-300', 'name': 'text-sky-300', 'func': 'text-sky-300',
    'attr': 'text-green-300', 'str': 'text-amber-300', 'num': 'text-cyan-300', 'op': 'text-red-400',
    'punct': 'text-red-400', 'cmt': 'text-gray-500 italic', 'err': 'bg-red-900 text-red-300',
    'ht_tag': 'text-fuchsia-300', 'ht_attr': 'text-green-300', 'ht_eq': 'text-red-400',
    'ht_str': 'text-amber-300', 'ht_comment': 'text-gray-500 italic', 'ht_text': 'text-slate-300'
}

_builtin_names = set(dir(builtins))


def esc(s):
    return html.escape(s).replace(' ', '&nbsp;').replace('\t', '&nbsp;'*4)


def py_highlight(src):
    out = []
    prev = None
    gen = tokenize.generate_tokens(io.StringIO(src).readline)
    for ttype, val, *_ in gen:
        cls = ''
        if ttype == token.NAME:
            if keyword.iskeyword(val):
                cls = 'kw'
            elif prev and prev[0] == token.OP and prev[1] == '.':
                cls = 'attr'
            elif val in _builtin_names:
                cls = 'builtin'
            else:
                cls = 'name'
        elif ttype == token.OP:
            cls = 'op' if re.match(r'[+\-*/%=<>!^|&~]', val) else 'punct'
        elif ttype == token.STRING:
            cls = 'str'
        elif ttype == token.NUMBER:
            cls = 'num'
        elif ttype == token.COMMENT:
            cls = 'cmt'
        elif ttype == token.ERRORTOKEN:
            cls = 'err'
        prev = (ttype, val)
        piece = esc(val).replace('\n', '\n')  # keep newlines
        out.append(
            f'<span class="{PALETTE.get(cls, "")}">{piece}</span>' if cls else piece)
    return ''.join(out)


# HTML regexes
FULL_TAG = r'<\s*/?\s*[A-Za-z0-9:-]+[!?]?(?:\s[^<>]*?)?>'
HT_RE = re.compile(r'(?s)(<!--.*?-->)|(' + FULL_TAG + r')|([^<]+)')
TAG_NAME_RE = re.compile(r'(?s)(<\s*/?\s*)([A-Za-z0-9:-]+[!?]?)(.*?)(>)')
ATTR_RE = re.compile(r'([A-Za-z0-9:-]+)(\s*=\s*)?', re.S)
QSTR_RE = re.compile(r'(".*?"|\'.*?\')', re.S)
PY_OPEN = re.compile(r'(?i)^<\s*([A-Za-z0-9:-]+[!?]?)')  # capture tag name

PY_NAMES = {'py!', 'p?'}


def process_fulltag(fulltag):
    m = TAG_NAME_RE.match(fulltag)
    if not m:
        return f'<span class="{PALETTE["ht_tag"]}">{esc(fulltag)}</span>'
    pre, name, rest, gt = m.groups()
    out = [esc(pre), f'<span class="{PALETTE["ht_tag"]}">{esc(name)}</span>']
    if rest:
        i = 0
        while i < len(rest):
            ma = ATTR_RE.match(rest, i)
            if ma:
                an, eq = ma.groups()
                out.append(
                    f'<span class="{PALETTE["ht_attr"]}">{esc(an)}</span>')
                if eq:
                    out.append(esc(eq))
                i = ma.end()
                continue
            mq = QSTR_RE.match(rest, i)
            if mq:
                q = mq.group(1)
                out.append(
                    f'<span class="{PALETTE["ht_str"]}">{esc(q)}</span>')
                i = mq.end()
                continue
            out.append(esc(rest[i]))
            i += 1
    out.append(esc(gt))
    return ''.join(out)


def _html_tokens(src):
    out = []
    i = 0
    L = len(src)
    # iterate through matches but manage consumption manually to support multi-line py containers
    for m in HT_RE.finditer(src):
        if m.start() < i:
            continue
        com, fulltag, text = m.groups()
        if com:
            out.append(
                f'<span class="{PALETTE["ht_comment"]}">{esc(com)}</span>')
            i = m.end()
            continue
        if fulltag:
            # detect tag name
            mt = TAG_NAME_RE.match(fulltag)
            name = mt.group(2) if mt else ''
            lname = name.lower()
            is_open = not re.match(r'<\s*/', fulltag)
            # if opening py-container, find the matching closing tag (first occurrence)
            if lname in PY_NAMES and is_open:
                out.append(process_fulltag(fulltag))   # opening
                # closing tag pattern, case-insensitive
                close_re = re.compile(rf'(?i)</\s*{re.escape(name)}\s*>')
                mclose = close_re.search(src, m.end())
                if mclose:
                    inner = src[m.end():mclose.start()]
                    out.append(py_highlight(inner))
                    out.append(process_fulltag(mclose.group(0)))  # closing
                    i = mclose.end()
                    # continue scanning after closing
                    continue
                else:
                    # no closing found: just output opening and continue
                    i = m.end()
                    continue
            else:
                out.append(process_fulltag(fulltag))
                i = m.end()
                continue
        if text:
            out.append(
                f'<span class="{PALETTE["ht_text"]}">{esc(text)}</span>')
            i = m.end()
    return ''.join(out)


def highlight(src, lang='py'):
    return _html_tokens(src) if lang and lang.lower().startswith('h') else py_highlight(src)


def code_comp(code, lang='py'):
    code = code.split('\n')
    lines = '\n'.join(
        [f'<span class="before:content-[\'{str(line).zfill(len(str(len(code))))}\'] ">{highlight(code_line, lang)}</span>' for line, code_line in enumerate(code, 1)])
    return f'''
<div class="border border-slate-500 border-4 rounded-xl p-5 bg-gradient-to-br from-zinc-900 to-slate-800 w-full text-center">
    <div class="flex gap-1.5 *:w-[12px] *:h-[12px] *:bg-white *:rounded-full *:transition pb-3">
        <span class="hover:bg-red-600"></span>
        <span class="hover:bg-yellow-300"></span>
        <span class="hover:bg-green-500"></span>
    </div>
    <div class="*:text-white font-mono flex flex-col gap-0 my-2 *:before:mr-3 *:text-sm *:text-left *:before:px-4 *:before:border-r *:before:border-white/20 *:flex">
        {lines}
    </div>
</div>
        '''