"""
This module contains support functions for more advanced unicode operations.
This is not a public API and is for Numba internal use only. Most of the
functions are relatively straightforward translations of the functions with the
same name in CPython.
"""
from collections import namedtuple
from enum import IntEnum

import llvmlite.ir
import numpy as np

from numba.core import types, cgutils
from numba.core.imputils import (impl_ret_untracked)

from numba.core.extending import overload, intrinsic, register_jitable
from numba.core.errors import TypingError

# This is equivalent to the struct `_PyUnicode_TypeRecord defined in CPython's
# Objects/unicodectype.c
typerecord = namedtuple('typerecord',
                        'upper lower title decimal digit flags')

# The Py_UCS4 type from CPython:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/unicodeobject.h#L112    # noqa: E501
_Py_UCS4 = types.uint32

# ------------------------------------------------------------------------------
# Start code related to/from CPython's unicodectype impl
#
# NOTE: the original source at:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c   # noqa: E501
# contains this statement:
#
# /*
#   Unicode character type helpers.
#
#   Written by Marc-Andre Lemburg (mal@lemburg.com).
#   Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
#
#   Copyright (c) Corporation for National Research Initiatives.
#
# */


# This enum contains the values defined in CPython's Objects/unicodectype.c that
# provide masks for use against the various members of the typerecord
#
# See: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L13-L27    # noqa: E501
#


_Py_TAB = 0x9
_Py_LINEFEED = 0xa
_Py_CARRIAGE_RETURN = 0xd
_Py_SPACE = 0x20


class _PyUnicode_TyperecordMasks(IntEnum):
    ALPHA_MASK = 0x01
    DECIMAL_MASK = 0x02
    DIGIT_MASK = 0x04
    LOWER_MASK = 0x08
    LINEBREAK_MASK = 0x10
    SPACE_MASK = 0x20
    TITLE_MASK = 0x40
    UPPER_MASK = 0x80
    XID_START_MASK = 0x100
    XID_CONTINUE_MASK = 0x200
    PRINTABLE_MASK = 0x400
    NUMERIC_MASK = 0x800
    CASE_IGNORABLE_MASK = 0x1000
    CASED_MASK = 0x2000
    EXTENDED_CASE_MASK = 0x4000


def _PyUnicode_gettyperecord(a):
    raise RuntimeError("Calling the Python definition is invalid")


@intrinsic
def _gettyperecord_impl(typingctx, codepoint):
    """
    Provides the binding to numba_gettyperecord, returns a `typerecord`
    namedtuple of properties from the codepoint.
    """
    if not isinstance(codepoint, types.Integer):
        raise TypingError("codepoint must be an integer")

    def details(context, builder, signature, args):
        ll_void = context.get_value_type(types.void)
        ll_Py_UCS4 = context.get_value_type(_Py_UCS4)
        ll_intc = context.get_value_type(types.intc)
        ll_intc_ptr = ll_intc.as_pointer()
        ll_uchar = context.get_value_type(types.uchar)
        ll_uchar_ptr = ll_uchar.as_pointer()
        ll_ushort = context.get_value_type(types.ushort)
        ll_ushort_ptr = ll_ushort.as_pointer()
        fnty = llvmlite.ir.FunctionType(ll_void, [
            ll_Py_UCS4,    # code
            ll_intc_ptr,   # upper
            ll_intc_ptr,   # lower
            ll_intc_ptr,   # title
            ll_uchar_ptr,  # decimal
            ll_uchar_ptr,  # digit
            ll_ushort_ptr, # flags
        ])
        fn = cgutils.get_or_insert_function(
            builder.module,
            fnty, name="numba_gettyperecord")
        upper = cgutils.alloca_once(builder, ll_intc, name='upper')
        lower = cgutils.alloca_once(builder, ll_intc, name='lower')
        title = cgutils.alloca_once(builder, ll_intc, name='title')
        decimal = cgutils.alloca_once(builder, ll_uchar, name='decimal')
        digit = cgutils.alloca_once(builder, ll_uchar, name='digit')
        flags = cgutils.alloca_once(builder, ll_ushort, name='flags')

        byref = [ upper, lower, title, decimal, digit, flags]
        builder.call(fn, [args[0]] + byref)
        buf = []
        for x in byref:
            buf.append(builder.load(x))

        res = context.make_tuple(builder, signature.return_type, tuple(buf))
        return impl_ret_untracked(context, builder, signature.return_type, res)

    tupty = types.NamedTuple([types.intc, types.intc, types.intc, types.uchar,
                              types.uchar, types.ushort], typerecord)
    sig = tupty(_Py_UCS4)
    return sig, details


@overload(_PyUnicode_gettyperecord)
def gettyperecord_impl(a):
    """
    Provides a _PyUnicode_gettyperecord binding, for convenience it will accept
    single character strings and code points.
    """
    if isinstance(a, types.UnicodeType):
        from numba.cpython.unicode import _get_code_point

        def impl(a):
            if len(a) > 1:
                msg = "gettyperecord takes a single unicode character"
                raise ValueError(msg)
            code_point = _get_code_point(a, 0)
            data = _gettyperecord_impl(_Py_UCS4(code_point))
            return data
        return impl
    if isinstance(a, types.Integer):
        return lambda a: _gettyperecord_impl(_Py_UCS4(a))


# whilst it's possible to grab the _PyUnicode_ExtendedCase symbol as it's global
# it is safer to use a defined api:
@intrinsic
def _PyUnicode_ExtendedCase(typingctx, index):
    """
    Accessor function for the _PyUnicode_ExtendedCase array, binds to
    numba_get_PyUnicode_ExtendedCase which wraps the array and does the lookup
    """
    if not isinstance(index, types.Integer):
        raise TypingError("Expected an index")

    def details(context, builder, signature, args):
        ll_Py_UCS4 = context.get_value_type(_Py_UCS4)
        ll_intc = context.get_value_type(types.intc)
        fnty = llvmlite.ir.FunctionType(ll_Py_UCS4, [ll_intc])
        fn = cgutils.get_or_insert_function(
            builder.module,
            fnty, name="numba_get_PyUnicode_ExtendedCase")
        return builder.call(fn, [args[0]])

    sig = _Py_UCS4(types.intc)
    return sig, details

# The following functions are replications of the functions with the same name
# in CPython's Objects/unicodectype.c


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L64-L71    # noqa: E501
@register_jitable
def _PyUnicode_ToTitlecase(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    if (ctype.flags & _PyUnicode_TyperecordMasks.EXTENDED_CASE_MASK):
        return _PyUnicode_ExtendedCase(ctype.title & 0xFFFF)
    return ch + ctype.title


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L76-L81    # noqa: E501
@register_jitable
def _PyUnicode_IsTitlecase(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.TITLE_MASK != 0


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L86-L91    # noqa: E501
@register_jitable
def _PyUnicode_IsXidStart(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.XID_START_MASK != 0


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L96-L101    # noqa: E501
@register_jitable
def _PyUnicode_IsXidContinue(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.XID_CONTINUE_MASK != 0


@register_jitable
def _PyUnicode_ToDecimalDigit(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    if ctype.flags & _PyUnicode_TyperecordMasks.DECIMAL_MASK:
        return ctype.decimal
    return -1


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L123-L1128  # noqa: E501
@register_jitable
def _PyUnicode_ToDigit(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    if ctype.flags & _PyUnicode_TyperecordMasks.DIGIT_MASK:
        return ctype.digit
    return -1


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L140-L145    # noqa: E501
@register_jitable
def _PyUnicode_IsNumeric(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.NUMERIC_MASK != 0


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L160-L165    # noqa: E501
@register_jitable
def _PyUnicode_IsPrintable(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.PRINTABLE_MASK != 0


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L170-L175    # noqa: E501
@register_jitable
def _PyUnicode_IsLowercase(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.LOWER_MASK != 0


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L180-L185    # noqa: E501
@register_jitable
def _PyUnicode_IsUppercase(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.UPPER_MASK != 0


@register_jitable
def _PyUnicode_IsLineBreak(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.LINEBREAK_MASK != 0


@register_jitable
def _PyUnicode_ToUppercase(ch):
    raise NotImplementedError


@register_jitable
def _PyUnicode_ToLowercase(ch):
    raise NotImplementedError


# From: https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodectype.c#L211-L225    # noqa: E501
@register_jitable
def _PyUnicode_ToLowerFull(ch, res):
    ctype = _PyUnicode_gettyperecord(ch)
    if (ctype.flags & _PyUnicode_TyperecordMasks.EXTENDED_CASE_MASK):
        index = ctype.lower & 0xFFFF
        n = ctype.lower >> 24
        for i in range(n):
            res[i] = _PyUnicode_ExtendedCase(index + i)
        return n
    res[0] = ch + ctype.lower
    return 1


# From: https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodectype.c#L227-L241    # noqa: E501
@register_jitable
def _PyUnicode_ToTitleFull(ch, res):
    ctype = _PyUnicode_gettyperecord(ch)
    if (ctype.flags & _PyUnicode_TyperecordMasks.EXTENDED_CASE_MASK):
        index = ctype.title & 0xFFFF
        n = ctype.title >> 24
        for i in range(n):
            res[i] = _PyUnicode_ExtendedCase(index + i)
        return n
    res[0] = ch + ctype.title
    return 1


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L243-L257    # noqa: E501
@register_jitable
def _PyUnicode_ToUpperFull(ch, res):
    ctype = _PyUnicode_gettyperecord(ch)
    if (ctype.flags & _PyUnicode_TyperecordMasks.EXTENDED_CASE_MASK):
        index = ctype.upper & 0xFFFF
        n = ctype.upper >> 24
        for i in range(n):
            # Perhaps needed to use unicode._set_code_point() here
            res[i] = _PyUnicode_ExtendedCase(index + i)
        return n
    res[0] = ch + ctype.upper
    return 1


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L259-L272    # noqa: E501
@register_jitable
def _PyUnicode_ToFoldedFull(ch, res):
    ctype = _PyUnicode_gettyperecord(ch)
    extended_case_mask = _PyUnicode_TyperecordMasks.EXTENDED_CASE_MASK
    if ctype.flags & extended_case_mask and (ctype.lower >> 20) & 7:
        index = (ctype.lower & 0xFFFF) + (ctype.lower >> 24)
        n = (ctype.lower >> 20) & 7
        for i in range(n):
            res[i] = _PyUnicode_ExtendedCase(index + i)
        return n
    return _PyUnicode_ToLowerFull(ch, res)


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L274-L279    # noqa: E501
@register_jitable
def _PyUnicode_IsCased(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.CASED_MASK != 0


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L281-L286    # noqa: E501
@register_jitable
def _PyUnicode_IsCaseIgnorable(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.CASE_IGNORABLE_MASK != 0


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L123-L135    # noqa: E501
@register_jitable
def _PyUnicode_IsDigit(ch):
    if _PyUnicode_ToDigit(ch) < 0:
        return 0
    return 1


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L106-L118    # noqa: E501
@register_jitable
def _PyUnicode_IsDecimalDigit(ch):
    if _PyUnicode_ToDecimalDigit(ch) < 0:
        return 0
    return 1


# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L291-L296    # noqa: E501
@register_jitable
def _PyUnicode_IsSpace(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.SPACE_MASK != 0


@register_jitable
def _PyUnicode_IsAlpha(ch):
    ctype = _PyUnicode_gettyperecord(ch)
    return ctype.flags & _PyUnicode_TyperecordMasks.ALPHA_MASK != 0


# End code related to/from CPython's unicodectype impl
# ------------------------------------------------------------------------------


# ------------------------------------------------------------------------------
# Start code related to/from CPython's pyctype

# From the definition in CPython's Include/pyctype.h
# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L5-L11    # noqa: E501
class _PY_CTF(IntEnum):
    LOWER = 0x01
    UPPER = 0x02
    ALPHA = 0x01 | 0x02
    DIGIT = 0x04
    ALNUM = 0x01 | 0x02 | 0x04
    SPACE = 0x08
    XDIGIT = 0x10


# From the definition in CPython's Python/pyctype.c
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Python/pyctype.c#L5    # noqa: E501
_Py_ctype_table = np.array([
    0,  # 0x0 '\x00'
    0,  # 0x1 '\x01'
    0,  # 0x2 '\x02'
    0,  # 0x3 '\x03'
    0,  # 0x4 '\x04'
    0,  # 0x5 '\x05'
    0,  # 0x6 '\x06'
    0,  # 0x7 '\x07'
    0,  # 0x8 '\x08'
    _PY_CTF.SPACE,  # 0x9 '\t'
    _PY_CTF.SPACE,  # 0xa '\n'
    _PY_CTF.SPACE,  # 0xb '\v'
    _PY_CTF.SPACE,  # 0xc '\f'
    _PY_CTF.SPACE,  # 0xd '\r'
    0,  # 0xe '\x0e'
    0,  # 0xf '\x0f'
    0,  # 0x10 '\x10'
    0,  # 0x11 '\x11'
    0,  # 0x12 '\x12'
    0,  # 0x13 '\x13'
    0,  # 0x14 '\x14'
    0,  # 0x15 '\x15'
    0,  # 0x16 '\x16'
    0,  # 0x17 '\x17'
    0,  # 0x18 '\x18'
    0,  # 0x19 '\x19'
    0,  # 0x1a '\x1a'
    0,  # 0x1b '\x1b'
    0,  # 0x1c '\x1c'
    0,  # 0x1d '\x1d'
    0,  # 0x1e '\x1e'
    0,  # 0x1f '\x1f'
    _PY_CTF.SPACE,  # 0x20 ' '
    0,  # 0x21 '!'
    0,  # 0x22 '"'
    0,  # 0x23 '#'
    0,  # 0x24 '$'
    0,  # 0x25 '%'
    0,  # 0x26 '&'
    0,  # 0x27 "'"
    0,  # 0x28 '('
    0,  # 0x29 ')'
    0,  # 0x2a '*'
    0,  # 0x2b '+'
    0,  # 0x2c ','
    0,  # 0x2d '-'
    0,  # 0x2e '.'
    0,  # 0x2f '/'
    _PY_CTF.DIGIT | _PY_CTF.XDIGIT,  # 0x30 '0'
    _PY_CTF.DIGIT | _PY_CTF.XDIGIT,  # 0x31 '1'
    _PY_CTF.DIGIT | _PY_CTF.XDIGIT,  # 0x32 '2'
    _PY_CTF.DIGIT | _PY_CTF.XDIGIT,  # 0x33 '3'
    _PY_CTF.DIGIT | _PY_CTF.XDIGIT,  # 0x34 '4'
    _PY_CTF.DIGIT | _PY_CTF.XDIGIT,  # 0x35 '5'
    _PY_CTF.DIGIT | _PY_CTF.XDIGIT,  # 0x36 '6'
    _PY_CTF.DIGIT | _PY_CTF.XDIGIT,  # 0x37 '7'
    _PY_CTF.DIGIT | _PY_CTF.XDIGIT,  # 0x38 '8'
    _PY_CTF.DIGIT | _PY_CTF.XDIGIT,  # 0x39 '9'
    0,  # 0x3a ':'
    0,  # 0x3b ';'
    0,  # 0x3c '<'
    0,  # 0x3d '='
    0,  # 0x3e '>'
    0,  # 0x3f '?'
    0,  # 0x40 '@'
    _PY_CTF.UPPER | _PY_CTF.XDIGIT,  # 0x41 'A'
    _PY_CTF.UPPER | _PY_CTF.XDIGIT,  # 0x42 'B'
    _PY_CTF.UPPER | _PY_CTF.XDIGIT,  # 0x43 'C'
    _PY_CTF.UPPER | _PY_CTF.XDIGIT,  # 0x44 'D'
    _PY_CTF.UPPER | _PY_CTF.XDIGIT,  # 0x45 'E'
    _PY_CTF.UPPER | _PY_CTF.XDIGIT,  # 0x46 'F'
    _PY_CTF.UPPER,  # 0x47 'G'
    _PY_CTF.UPPER,  # 0x48 'H'
    _PY_CTF.UPPER,  # 0x49 'I'
    _PY_CTF.UPPER,  # 0x4a 'J'
    _PY_CTF.UPPER,  # 0x4b 'K'
    _PY_CTF.UPPER,  # 0x4c 'L'
    _PY_CTF.UPPER,  # 0x4d 'M'
    _PY_CTF.UPPER,  # 0x4e 'N'
    _PY_CTF.UPPER,  # 0x4f 'O'
    _PY_CTF.UPPER,  # 0x50 'P'
    _PY_CTF.UPPER,  # 0x51 'Q'
    _PY_CTF.UPPER,  # 0x52 'R'
    _PY_CTF.UPPER,  # 0x53 'S'
    _PY_CTF.UPPER,  # 0x54 'T'
    _PY_CTF.UPPER,  # 0x55 'U'
    _PY_CTF.UPPER,  # 0x56 'V'
    _PY_CTF.UPPER,  # 0x57 'W'
    _PY_CTF.UPPER,  # 0x58 'X'
    _PY_CTF.UPPER,  # 0x59 'Y'
    _PY_CTF.UPPER,  # 0x5a 'Z'
    0,  # 0x5b '['
    0,  # 0x5c '\\'
    0,  # 0x5d ']'
    0,  # 0x5e '^'
    0,  # 0x5f '_'
    0,  # 0x60 '`'
    _PY_CTF.LOWER | _PY_CTF.XDIGIT,  # 0x61 'a'
    _PY_CTF.LOWER | _PY_CTF.XDIGIT,  # 0x62 'b'
    _PY_CTF.LOWER | _PY_CTF.XDIGIT,  # 0x63 'c'
    _PY_CTF.LOWER | _PY_CTF.XDIGIT,  # 0x64 'd'
    _PY_CTF.LOWER | _PY_CTF.XDIGIT,  # 0x65 'e'
    _PY_CTF.LOWER | _PY_CTF.XDIGIT,  # 0x66 'f'
    _PY_CTF.LOWER,  # 0x67 'g'
    _PY_CTF.LOWER,  # 0x68 'h'
    _PY_CTF.LOWER,  # 0x69 'i'
    _PY_CTF.LOWER,  # 0x6a 'j'
    _PY_CTF.LOWER,  # 0x6b 'k'
    _PY_CTF.LOWER,  # 0x6c 'l'
    _PY_CTF.LOWER,  # 0x6d 'm'
    _PY_CTF.LOWER,  # 0x6e 'n'
    _PY_CTF.LOWER,  # 0x6f 'o'
    _PY_CTF.LOWER,  # 0x70 'p'
    _PY_CTF.LOWER,  # 0x71 'q'
    _PY_CTF.LOWER,  # 0x72 'r'
    _PY_CTF.LOWER,  # 0x73 's'
    _PY_CTF.LOWER,  # 0x74 't'
    _PY_CTF.LOWER,  # 0x75 'u'
    _PY_CTF.LOWER,  # 0x76 'v'
    _PY_CTF.LOWER,  # 0x77 'w'
    _PY_CTF.LOWER,  # 0x78 'x'
    _PY_CTF.LOWER,  # 0x79 'y'
    _PY_CTF.LOWER,  # 0x7a 'z'
    0,  # 0x7b '{'
    0,  # 0x7c '|'
    0,  # 0x7d '}'
    0,  # 0x7e '~'
    0,  # 0x7f '\x7f'
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
], dtype=np.intc)


# From the definition in CPython's Python/pyctype.c
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Python/pyctype.c#L145    # noqa: E501
_Py_ctype_tolower = np.array([
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
    0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
    0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
    0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
    0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
    0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
], dtype=np.uint8)


# From the definition in CPython's Python/pyctype.c
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Python/pyctype.c#L180
_Py_ctype_toupper = np.array([
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
    0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
    0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
    0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
    0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
    0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
    0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
    0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
    0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
    0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
], dtype=np.uint8)


class _PY_CTF_LB(IntEnum):
    LINE_BREAK = 0x01
    LINE_FEED = 0x02
    CARRIAGE_RETURN = 0x04


_Py_ctype_islinebreak = np.array([
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    _PY_CTF_LB.LINE_BREAK | _PY_CTF_LB.LINE_FEED,  # 0xa '\n'
    _PY_CTF_LB.LINE_BREAK,  # 0xb '\v'
    _PY_CTF_LB.LINE_BREAK,  # 0xc '\f'
    _PY_CTF_LB.LINE_BREAK | _PY_CTF_LB.CARRIAGE_RETURN,  # 0xd '\r'
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    _PY_CTF_LB.LINE_BREAK,  # 0x1c '\x1c'
    _PY_CTF_LB.LINE_BREAK,  # 0x1d '\x1d'
    _PY_CTF_LB.LINE_BREAK,  # 0x1e '\x1e'
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    _PY_CTF_LB.LINE_BREAK,  # 0x85 '\x85'
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0,
], dtype=np.intc)


# Translation of:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pymacro.h#L25    # noqa: E501
@register_jitable
def _Py_CHARMASK(ch):
    """
    Equivalent to the CPython macro `Py_CHARMASK()`, masks off all but the
    lowest 256 bits of ch.
    """
    return types.uint8(ch) & types.uint8(0xff)


# Translation of:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L30    # noqa: E501
@register_jitable
def _Py_TOUPPER(ch):
    """
    Equivalent to the CPython macro `Py_TOUPPER()` converts an ASCII range
    code point to the upper equivalent
    """
    return _Py_ctype_toupper[_Py_CHARMASK(ch)]


# Translation of:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L29    # noqa: E501
@register_jitable
def _Py_TOLOWER(ch):
    """
    Equivalent to the CPython macro `Py_TOLOWER()` converts an ASCII range
    code point to the lower equivalent
    """
    return _Py_ctype_tolower[_Py_CHARMASK(ch)]


# Translation of:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L18    # noqa: E501
@register_jitable
def _Py_ISLOWER(ch):
    """
    Equivalent to the CPython macro `Py_ISLOWER()`
    """
    return _Py_ctype_table[_Py_CHARMASK(ch)] & _PY_CTF.LOWER


# Translation of:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L19    # noqa: E501
@register_jitable
def _Py_ISUPPER(ch):
    """
    Equivalent to the CPython macro `Py_ISUPPER()`
    """
    return _Py_ctype_table[_Py_CHARMASK(ch)] & _PY_CTF.UPPER


# Translation of:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L20    # noqa: E501
@register_jitable
def _Py_ISALPHA(ch):
    """
    Equivalent to the CPython macro `Py_ISALPHA()`
    """
    return _Py_ctype_table[_Py_CHARMASK(ch)] & _PY_CTF.ALPHA


# Translation of:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L21    # noqa: E501
@register_jitable
def _Py_ISDIGIT(ch):
    """
    Equivalent to the CPython macro `Py_ISDIGIT()`
    """
    return _Py_ctype_table[_Py_CHARMASK(ch)] & _PY_CTF.DIGIT


# Translation of:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L22    # noqa: E501
@register_jitable
def _Py_ISXDIGIT(ch):
    """
    Equivalent to the CPython macro `Py_ISXDIGIT()`
    """
    return _Py_ctype_table[_Py_CHARMASK(ch)] & _PY_CTF.XDIGIT


# Translation of:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L23    # noqa: E501
@register_jitable
def _Py_ISALNUM(ch):
    """
    Equivalent to the CPython macro `Py_ISALNUM()`
    """
    return _Py_ctype_table[_Py_CHARMASK(ch)] & _PY_CTF.ALNUM


# Translation of:
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L24    # noqa: E501
@register_jitable
def _Py_ISSPACE(ch):
    """
    Equivalent to the CPython macro `Py_ISSPACE()`
    """
    return _Py_ctype_table[_Py_CHARMASK(ch)] & _PY_CTF.SPACE


@register_jitable
def _Py_ISLINEBREAK(ch):
    """Check if character is ASCII line break"""
    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.LINE_BREAK


@register_jitable
def _Py_ISLINEFEED(ch):
    """Check if character is line feed `\n`"""
    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.LINE_FEED


@register_jitable
def _Py_ISCARRIAGERETURN(ch):
    """Check if character is carriage return `\r`"""
    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.CARRIAGE_RETURN


# End code related to/from CPython's pyctype
# ------------------------------------------------------------------------------
