Source code for reactpy.utils

from __future__ import annotations

import re
from collections.abc import Iterable
from itertools import chain
from typing import Any, Callable, Generic, TypeVar, cast

from lxml import etree
from lxml.html import fromstring, tostring

from reactpy.core.types import VdomDict
from reactpy.core.vdom import vdom

_RefValue = TypeVar("_RefValue")
_ModelTransform = Callable[[VdomDict], Any]
_UNDEFINED: Any = object()


[docs]class Ref(Generic[_RefValue]): """Hold a reference to a value This is used in imperative code to mutate the state of this object in order to incur side effects. Generally refs should be avoided if possible, but sometimes they are required. Notes: You can compare the contents for two ``Ref`` objects using the ``==`` operator. """ __slots__ = ("current",) def __init__(self, initial_value: _RefValue = _UNDEFINED) -> None: if initial_value is not _UNDEFINED: self.current = initial_value """The present value"""
[docs] def set_current(self, new: _RefValue) -> _RefValue: """Set the current value and return what is now the old value This is nice to use in ``lambda`` functions. """ old = self.current self.current = new return old
def __eq__(self, other: object) -> bool: try: return isinstance(other, Ref) and (other.current == self.current) except AttributeError: # attribute error occurs for uninitialized refs return False def __repr__(self) -> str: try: current = repr(self.current) except AttributeError: # attribute error occurs for uninitialized refs current = "<undefined>" return f"{type(self).__name__}({current})"
[docs]def vdom_to_html(vdom: VdomDict) -> str: """Convert a VDOM dictionary into an HTML string Only the following keys are translated to HTML: - ``tagName`` - ``attributes`` - ``children`` (must be strings or more VDOM dicts) Parameters: vdom: The VdomDict element to convert to HTML """ temp_root = etree.Element("__temp__") _add_vdom_to_etree(temp_root, vdom) html = cast(bytes, tostring(temp_root)).decode() # strip out temp root <__temp__> element return html[10:-11]
[docs]def html_to_vdom( html: str, *transforms: _ModelTransform, strict: bool = True ) -> VdomDict: """Transform HTML into a DOM model. Unique keys can be provided to HTML elements using a ``key=...`` attribute within your HTML tag. Parameters: html: The raw HTML as a string transforms: Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM dictionary which will be replaced by ``new``. For example, you could use a transform function to add highlighting to a ``<code/>`` block. strict: If ``True``, raise an exception if the HTML does not perfectly follow HTML5 syntax. """ if not isinstance(html, str): # nocov msg = f"Expected html to be a string, not {type(html).__name__}" raise TypeError(msg) # If the user provided a string, convert it to a list of lxml.etree nodes try: root_node: etree._Element = fromstring( html.strip(), parser=etree.HTMLParser( remove_comments=True, remove_pis=True, remove_blank_text=True, recover=not strict, ), ) except etree.XMLSyntaxError as e: if not strict: raise e # nocov msg = "An error has occurred while parsing the HTML.\n\nThis HTML may be malformatted, or may not perfectly adhere to HTML5.\nIf you believe the exception above was due to something intentional, you can disable the strict parameter on html_to_vdom().\nOtherwise, repair your broken HTML and try again." raise HTMLParseError(msg) from e return _etree_to_vdom(root_node, transforms)
[docs]class HTMLParseError(etree.LxmlSyntaxError): # type: ignore[misc] """Raised when an HTML document cannot be parsed using strict parsing."""
def _etree_to_vdom( node: etree._Element, transforms: Iterable[_ModelTransform] ) -> VdomDict: """Transform an lxml etree node into a DOM model Parameters: node: The ``lxml.etree._Element`` node transforms: Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM dictionary which will be replaced by ``new``. For example, you could use a transform function to add highlighting to a ``<code/>`` block. """ if not isinstance(node, etree._Element): # nocov msg = f"Expected node to be a etree._Element, not {type(node).__name__}" raise TypeError(msg) # Recursively call _etree_to_vdom() on all children children = _generate_vdom_children(node, transforms) # Convert the lxml node to a VDOM dict el = vdom(node.tag, dict(node.items()), *children) # Perform any necessary mutations on the VDOM attributes to meet VDOM spec _mutate_vdom(el) # Apply any provided transforms. for transform in transforms: el = transform(el) return el def _add_vdom_to_etree(parent: etree._Element, vdom: VdomDict | dict[str, Any]) -> None: try: tag = vdom["tagName"] except KeyError as e: msg = f"Expected a VDOM dict, not {vdom}" raise TypeError(msg) from e else: vdom = cast(VdomDict, vdom) if tag: element = etree.SubElement(parent, tag) element.attrib.update( _vdom_attr_to_html_str(k, v) for k, v in vdom.get("attributes", {}).items() ) else: element = parent for c in vdom.get("children", []): if isinstance(c, dict): _add_vdom_to_etree(element, c) else: """ LXML handles string children by storing them under `text` and `tail` attributes of Element objects. The `text` attribute, if present, effectively becomes that element's first child. Then the `tail` attribute, if present, becomes a sibling that follows that element. For example, consider the following HTML: <p><a>hello</a>world</p> In this code sample, "hello" is the `text` attribute of the `<a>` element and "world" is the `tail` attribute of that same `<a>` element. It's for this reason that, depending on whether the element being constructed has non-string a child element, we need to assign a `text` vs `tail` attribute to that element or the last non-string child respectively. """ if len(element): last_child = element[-1] last_child.tail = f"{last_child.tail or ''}{c}" else: element.text = f"{element.text or ''}{c}" def _mutate_vdom(vdom: VdomDict) -> None: """Performs any necessary mutations on the VDOM attributes to meet VDOM spec. Currently, this function only transforms the ``style`` attribute into a dictionary whose keys are camelCase so as to be renderable by React. This function may be extended in the future. """ # Determine if the style attribute needs to be converted to a dict if ( "attributes" in vdom and "style" in vdom["attributes"] and isinstance(vdom["attributes"]["style"], str) ): # Convince type checker that it's safe to mutate attributes assert isinstance(vdom["attributes"], dict) # noqa: S101 # Convert style attribute from str -> dict with camelCase keys vdom["attributes"]["style"] = { key.strip().replace("-", "_"): value.strip() for key, value in ( part.split(":", 1) for part in vdom["attributes"]["style"].split(";") if ":" in part ) } def _generate_vdom_children( node: etree._Element, transforms: Iterable[_ModelTransform] ) -> list[VdomDict | str]: """Generates a list of VDOM children from an lxml node. Inserts inner text and/or tail text in between VDOM children, if necessary. """ return ( # Get the inner text of the current node [node.text] if node.text else [] ) + list( chain( *( # Recursively convert each child node to VDOM [_etree_to_vdom(child, transforms)] # Insert the tail text between each child node + ([child.tail] if child.tail else []) for child in node.iterchildren(None) ) ) )
[docs]def del_html_head_body_transform(vdom: VdomDict) -> VdomDict: """Transform intended for use with `html_to_vdom`. Removes `<html>`, `<head>`, and `<body>` while preserving their children. Parameters: vdom: The VDOM dictionary to transform. """ if vdom["tagName"] in {"html", "body", "head"}: return {"tagName": "", "children": vdom["children"]} return vdom
def _vdom_attr_to_html_str(key: str, value: Any) -> tuple[str, str]: if key == "style": if isinstance(value, dict): value = ";".join( # We lower only to normalize - CSS is case-insensitive: # https://www.w3.org/TR/css-fonts-3/#font-family-casing f"{_CAMEL_CASE_SUB_PATTERN.sub('-', k).lower()}:{v}" for k, v in value.items() ) elif ( # camel to data-* attributes key.startswith("data_") # camel to aria-* attributes or key.startswith("aria_") # handle special cases or key in DASHED_HTML_ATTRS ): key = key.replace("_", "-") elif ( # camel to data-* attributes key.startswith("data") # camel to aria-* attributes or key.startswith("aria") # handle special cases or key in DASHED_HTML_ATTRS ): key = _CAMEL_CASE_SUB_PATTERN.sub("-", key) if callable(value): # nocov raise TypeError(f"Cannot convert callable attribute {key}={value} to HTML") # Again, we lower the attribute name only to normalize - HTML is case-insensitive: # http://w3c.github.io/html-reference/documents.html#case-insensitivity return key.lower(), str(value) # see list of HTML attributes with dashes in them: # https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes#attribute_list DASHED_HTML_ATTRS = {"accept_charset", "acceptCharset", "http_equiv", "httpEquiv"} # Pattern for delimitting camelCase names (e.g. camelCase to camel-case) _CAMEL_CASE_SUB_PATTERN = re.compile(r"(?<!^)(?=[A-Z])")