跳转至

RNA

multimolecule.utils.rna.contact_map_to_dot_bracket

Python
contact_map_to_dot_bracket(
    contact_map: Tensor | ndarray,
    *,
    unsafe: bool = False,
    threshold: float | None = None,
    matching: Matching = "greedy"
) -> str

Convert a contact map (NumPy or Torch) to a dot-bracket notation string.

Examples:

Torch input

Python Console Session
1
2
3
4
5
6
7
>>> import torch
>>> contact_map_tensor = torch.tensor([[0, 0, 0, 1],
...                                    [0, 0, 1, 0],
...                                    [0, 1, 0, 0],
...                                    [1, 0, 0, 0]])
>>> contact_map_to_dot_bracket(contact_map_tensor)
'(())'

NumPy input

Python Console Session
1
2
3
4
5
6
7
>>> import numpy as np
>>> contact_map = np.array([[0, 0, 0, 1],
...                          [0, 0, 1, 0],
...                          [0, 1, 0, 0],
...                          [1, 0, 0, 0]])
>>> contact_map_to_dot_bracket(contact_map)
'(())'

List input

Python Console Session
>>> contact_map_to_dot_bracket([[0, 1], [1, 0]])
'()'
Source code in multimolecule/utils/rna/secondary_structure/notations.py
Python
def contact_map_to_dot_bracket(
    contact_map: Tensor | np.ndarray,
    *,
    unsafe: bool = False,
    threshold: float | None = None,
    matching: Matching = "greedy",
) -> str:
    """
    Convert a contact map (NumPy or Torch) to a dot-bracket notation string.

    Examples:
        Torch input
        >>> import torch
        >>> contact_map_tensor = torch.tensor([[0, 0, 0, 1],
        ...                                    [0, 0, 1, 0],
        ...                                    [0, 1, 0, 0],
        ...                                    [1, 0, 0, 0]])
        >>> contact_map_to_dot_bracket(contact_map_tensor)
        '(())'

        NumPy input
        >>> import numpy as np
        >>> contact_map = np.array([[0, 0, 0, 1],
        ...                          [0, 0, 1, 0],
        ...                          [0, 1, 0, 0],
        ...                          [1, 0, 0, 0]])
        >>> contact_map_to_dot_bracket(contact_map)
        '(())'

        List input
        >>> contact_map_to_dot_bracket([[0, 1], [1, 0]])
        '()'
    """
    return pairs_to_dot_bracket(
        contact_map_to_pairs(contact_map, unsafe=unsafe, threshold=threshold, matching=matching),
        length=len(contact_map),
        unsafe=unsafe,
    )

multimolecule.utils.rna.contact_map_to_pairs

Python
contact_map_to_pairs(
    contact_map: Tensor,
    *,
    unsafe: bool = False,
    threshold: float | None = None,
    matching: Matching = "greedy"
) -> Tensor
Python
contact_map_to_pairs(
    contact_map: ndarray,
    *,
    unsafe: bool = False,
    threshold: float | None = None,
    matching: Matching = "greedy"
) -> ndarray
Python
contact_map_to_pairs(
    contact_map: Sequence,
    *,
    unsafe: bool = False,
    threshold: float | None = None,
    matching: Matching = "greedy"
) -> PairsList
Python
contact_map_to_pairs(
    contact_map: Tensor | ndarray | Sequence,
    *,
    unsafe: bool = False,
    threshold: float | None = None,
    matching: Matching = "greedy"
) -> Tensor | ndarray | PairsList

Convert a contact map to a list of base pairs.

If contact_map is a torch tensor, returns a (K, 2) torch.LongTensor. Otherwise, returns a numpy (K, 2) int array (list inputs return a list of tuples).

For integer/bool contact maps, any non-zero entry is treated as a contact and the map is expected to represent a binary (symmetric) adjacency matrix.

For floating-point contact maps, values are interpreted as pairing probabilities in [0, 1] (or logits/scores in unsafe mode), and conflicting pairs are decoded using either greedy NMS-style matching or exact blossom matching.

Examples:

Torch input

Python Console Session
1
2
3
4
5
6
7
>>> import torch
>>> contact_map_tensor = torch.tensor([[0, 0, 0, 1],
...                                   [0, 0, 1, 0],
...                                   [0, 1, 0, 0],
...                                   [1, 0, 0, 0]])
>>> contact_map_to_pairs(contact_map_tensor).tolist()
[[0, 3], [1, 2]]

NumPy input

Python Console Session
1
2
3
4
5
6
7
8
9
>>> import numpy as np
>>> contact_map_array = np.array([[0, 0, 0, 1],
...                               [0, 0, 1, 0],
...                               [0, 1, 0, 0],
...                               [1, 0, 0, 0]])
>>> contact_map_to_pairs(contact_map_array).tolist()
[[0, 3], [1, 2]]
>>> contact_map_to_pairs(np.array([[0.0, 0.8], [0.8, 0.0]]), threshold=0.5).tolist()
[[0, 1]]

List input

Python Console Session
>>> contact_map_to_pairs([[0, 1], [1, 0]])
[(0, 1)]
Source code in multimolecule/utils/rna/secondary_structure/notations.py
Python
def contact_map_to_pairs(
    contact_map: Tensor | np.ndarray | Sequence,
    *,
    unsafe: bool = False,
    threshold: float | None = None,
    matching: Matching = "greedy",
) -> Tensor | np.ndarray | PairsList:
    """
    Convert a contact map to a list of base pairs.

    If ``contact_map`` is a torch tensor, returns a ``(K, 2)`` torch.LongTensor.
    Otherwise, returns a numpy ``(K, 2)`` int array (list inputs return a list of tuples).

    For integer/bool contact maps, any non-zero entry is treated as a contact and the map is
    expected to represent a binary (symmetric) adjacency matrix.

    For floating-point contact maps, values are interpreted as pairing probabilities in ``[0, 1]``
    (or logits/scores in ``unsafe`` mode), and conflicting pairs are decoded using either greedy
    NMS-style matching or exact blossom matching.

    Examples:
        Torch input
        >>> import torch
        >>> contact_map_tensor = torch.tensor([[0, 0, 0, 1],
        ...                                   [0, 0, 1, 0],
        ...                                   [0, 1, 0, 0],
        ...                                   [1, 0, 0, 0]])
        >>> contact_map_to_pairs(contact_map_tensor).tolist()
        [[0, 3], [1, 2]]

        NumPy input
        >>> import numpy as np
        >>> contact_map_array = np.array([[0, 0, 0, 1],
        ...                               [0, 0, 1, 0],
        ...                               [0, 1, 0, 0],
        ...                               [1, 0, 0, 0]])
        >>> contact_map_to_pairs(contact_map_array).tolist()
        [[0, 3], [1, 2]]
        >>> contact_map_to_pairs(np.array([[0.0, 0.8], [0.8, 0.0]]), threshold=0.5).tolist()
        [[0, 1]]

        List input
        >>> contact_map_to_pairs([[0, 1], [1, 0]])
        [(0, 1)]
    """
    if threshold is None:
        threshold = 0.5
    if isinstance(threshold, bool) or not isinstance(threshold, Real):
        raise TypeError("threshold must be a real number")
    threshold = float(threshold)
    matching = _validate_matching(matching)

    if isinstance(contact_map, Tensor):
        if contact_map.ndim != 2 or contact_map.shape[0] != contact_map.shape[1]:
            raise ValueError("Contact map must be a square 2D matrix.")
        if contact_map.is_floating_point():
            return _torch_contact_map_to_pairs_float(contact_map, unsafe=unsafe, threshold=threshold, matching=matching)
        return _torch_contact_map_to_pairs_binary(contact_map, unsafe=unsafe, matching=matching)
    if isinstance(contact_map, np.ndarray):
        if contact_map.ndim != 2 or contact_map.shape[0] != contact_map.shape[1]:
            raise ValueError("Contact map must be a square 2D matrix.")
        if np.issubdtype(contact_map.dtype, np.floating):
            return _numpy_contact_map_to_pairs_float(contact_map, unsafe=unsafe, threshold=threshold, matching=matching)
        return _numpy_contact_map_to_pairs_binary(contact_map, unsafe=unsafe, matching=matching)
    if isinstance(contact_map, Sequence):
        contact_map = np.asarray(contact_map)
        if contact_map.ndim != 2 or contact_map.shape[0] != contact_map.shape[1]:
            raise ValueError("Contact map must be a square 2D matrix.")
        if np.issubdtype(contact_map.dtype, np.floating):
            pairs = _numpy_contact_map_to_pairs_float(
                contact_map, unsafe=unsafe, threshold=threshold, matching=matching
            )
        else:
            pairs = _numpy_contact_map_to_pairs_binary(contact_map, unsafe=unsafe, matching=matching)
        return [tuple(pair) for pair in pairs.tolist()]
    raise TypeError("contact_map must be a torch.Tensor, numpy.ndarray, or sequence")

multimolecule.utils.rna.crossing_mask

Python
crossing_mask(
    pairs: Tensor | ndarray | Pairs,
) -> Tensor | ndarray | List[bool]

Return a boolean mask for pairs that cross any other pair.

Parameters:

Name Type Description Default

pairs

Tensor | ndarray | Pairs

torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

required

Returns:

Type Description
Tensor | ndarray | List[bool]

Boolean mask for the input pairs using the same backend as input.

Raises:

Type Description
ValueError

If pairs has invalid shape for the selected backend.

TypeError

If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

Examples:

Torch input

Python Console Session
1
2
3
>>> import torch
>>> crossing_mask(torch.tensor([[0, 2], [1, 3]])).tolist()
[True, True]

NumPy input

Python Console Session
1
2
3
>>> import numpy as np
>>> crossing_mask(np.array([[0, 2], [1, 3]])).tolist()
[True, True]

List input

Python Console Session
1
2
3
4
>>> crossing_mask([(0, 2), (1, 3)])
[True, True]
>>> crossing_mask([(0, 3), (1, 2)])
[False, False]
Source code in multimolecule/utils/rna/secondary_structure/pseudoknot.py
Python
def crossing_mask(pairs: Tensor | np.ndarray | Pairs) -> Tensor | np.ndarray | List[bool]:
    """
    Return a boolean mask for pairs that cross any other pair.

    Args:
        pairs: torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

    Returns:
        Boolean mask for the input pairs using the same backend as input.

    Raises:
        ValueError: If pairs has invalid shape for the selected backend.
        TypeError: If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

    Examples:
        Torch input
        >>> import torch
        >>> crossing_mask(torch.tensor([[0, 2], [1, 3]])).tolist()
        [True, True]

        NumPy input
        >>> import numpy as np
        >>> crossing_mask(np.array([[0, 2], [1, 3]])).tolist()
        [True, True]

        List input
        >>> crossing_mask([(0, 2), (1, 3)])
        [True, True]
        >>> crossing_mask([(0, 3), (1, 2)])
        [False, False]
    """
    if isinstance(pairs, Tensor):
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a torch.Tensor with shape (n, 2)")
        return _torch_crossing_mask(pairs)
    if isinstance(pairs, np.ndarray):
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a numpy.ndarray with shape (n, 2)")
        return _numpy_crossing_mask(pairs)
    if isinstance(pairs, Sequence):
        if not pairs:
            return []
        pairs = np.asarray(pairs, dtype=int)
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be an array-like with shape (n, 2)")
        return _numpy_crossing_mask(pairs).tolist()
    raise TypeError("pairs must be a torch.Tensor, numpy.ndarray, or sequence of (i, j) pairs")

multimolecule.utils.rna.crossing_pairs

Python
crossing_pairs(pairs: ndarray) -> ndarray
Python
crossing_pairs(pairs: Pairs) -> PairsList
Python
crossing_pairs(pairs: Tensor) -> Tensor
Python
crossing_pairs(
    pairs: Tensor | ndarray | Pairs,
) -> Tensor | ndarray | PairsList

Return pairs from segments that cross any other segment (no-heuristic PK).

Pairs are expected to be normalized (unique, sorted with i < j). Use normalize_pairs if you need to normalize raw inputs.

Parameters:

Name Type Description Default

pairs

Tensor | ndarray | Pairs

torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

required

Returns:

Type Description
Tensor | ndarray | PairsList

Crossing pairs using the same backend as input.

Raises:

Type Description
ValueError

If pairs has invalid shape for the selected backend.

TypeError

If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

Examples:

Torch input

Python Console Session
1
2
3
>>> import torch
>>> crossing_pairs(torch.tensor([[0, 2], [1, 3]])).tolist()
[[0, 2], [1, 3]]

NumPy input

Python Console Session
1
2
3
>>> import numpy as np
>>> crossing_pairs(np.array([[0, 2], [1, 3]])).tolist()
[[0, 2], [1, 3]]

List input

Python Console Session
>>> crossing_pairs([(0, 2), (1, 3)])
[(0, 2), (1, 3)]
Source code in multimolecule/utils/rna/secondary_structure/pseudoknot.py
Python
def crossing_pairs(pairs: Tensor | np.ndarray | Pairs) -> Tensor | np.ndarray | PairsList:
    """
    Return pairs from segments that cross any other segment (no-heuristic PK).

    Pairs are expected to be normalized (unique, sorted with i < j).
    Use ``normalize_pairs`` if you need to normalize raw inputs.

    Args:
        pairs: torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

    Returns:
        Crossing pairs using the same backend as input.

    Raises:
        ValueError: If pairs has invalid shape for the selected backend.
        TypeError: If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

    Examples:
        Torch input
        >>> import torch
        >>> crossing_pairs(torch.tensor([[0, 2], [1, 3]])).tolist()
        [[0, 2], [1, 3]]

        NumPy input
        >>> import numpy as np
        >>> crossing_pairs(np.array([[0, 2], [1, 3]])).tolist()
        [[0, 2], [1, 3]]

        List input
        >>> crossing_pairs([(0, 2), (1, 3)])
        [(0, 2), (1, 3)]
    """
    if isinstance(pairs, Tensor):
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a torch.Tensor with shape (n, 2)")
        return _torch_crossing_pairs(pairs)
    if isinstance(pairs, np.ndarray):
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a numpy.ndarray with shape (n, 2)")
        return _numpy_crossing_pairs(pairs)
    if isinstance(pairs, Sequence):
        if not pairs:
            return []
        pairs = np.asarray(pairs, dtype=int)
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be an array-like with shape (n, 2)")
        return list(map(tuple, _numpy_crossing_pairs(pairs).tolist()))
    raise TypeError("pairs must be a torch.Tensor, numpy.ndarray, or sequence of (i, j) pairs")

multimolecule.utils.rna.dot_bracket_to_contact_map

Python
dot_bracket_to_contact_map(dot_bracket: str) -> ndarray

Convert a dot-bracket notation string to a numpy contact map.

Examples:

Python Console Session
>>> dot_bracket_to_contact_map('(())').astype(int).tolist()
[[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]]
Source code in multimolecule/utils/rna/secondary_structure/notations.py
Python
def dot_bracket_to_contact_map(dot_bracket: str) -> np.ndarray:
    """
    Convert a dot-bracket notation string to a numpy contact map.

    Examples:
        >>> dot_bracket_to_contact_map('(())').astype(int).tolist()
        [[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]]
    """
    return pairs_to_contact_map(dot_bracket_to_pairs(dot_bracket), length=len(dot_bracket))

multimolecule.utils.rna.dot_bracket_to_pairs

Python
dot_bracket_to_pairs(dot_bracket: str) -> ndarray

Convert a dot-bracket notation string to a list of base-pair indices.

Parameters:

Name Type Description Default

dot_bracket

str

Dot-bracket notation. Supports pseudoknots via multiple bracket types, including (), [], {}, <>, and A-Z/a-z. Unpaired tokens (., +, _, ,) are treated as unpaired positions.

required

Returns:

Type Description
ndarray

A numpy array of shape (n, 2) with pairs (i, j) where 0 <= i < j < len(dot_bracket).

Raises:

Type Description
ValueError

On unmatched or invalid symbols.

Examples:

Python Console Session
1
2
3
4
5
6
>>> dot_bracket_to_pairs("((.))").tolist()
[[0, 4], [1, 3]]
>>> dot_bracket_to_pairs("([)]").tolist()
[[0, 2], [1, 3]]
>>> dot_bracket_to_pairs("...").tolist()
[]
Source code in multimolecule/utils/rna/secondary_structure/notations.py
Python
def dot_bracket_to_pairs(dot_bracket: str) -> np.ndarray:
    """
    Convert a dot-bracket notation string to a list of base-pair indices.

    Args:
        dot_bracket: Dot-bracket notation. Supports pseudoknots via multiple
            bracket types, including (), [], {}, <>, and A-Z/a-z. Unpaired
            tokens (`.`, `+`, `_`, `,`) are treated as unpaired positions.

    Returns:
        A numpy array of shape (n, 2) with pairs ``(i, j)`` where ``0 <= i < j < len(dot_bracket)``.

    Raises:
        ValueError: On unmatched or invalid symbols.

    Examples:
        >>> dot_bracket_to_pairs("((.))").tolist()
        [[0, 4], [1, 3]]
        >>> dot_bracket_to_pairs("([)]").tolist()
        [[0, 2], [1, 3]]
        >>> dot_bracket_to_pairs("...").tolist()
        []
    """
    stacks: defaultdict[str, List[int]] = defaultdict(list)
    pairs: PairsList = []
    for i, symbol in enumerate(dot_bracket):
        if symbol in _DOT_BRACKET_PAIR_TABLE:
            stacks[symbol].append(i)
        elif symbol in _REVERSE_DOT_BRACKET_PAIR_TABLE:
            opener = _REVERSE_DOT_BRACKET_PAIR_TABLE[symbol]
            try:
                j = stacks[opener].pop()
            except IndexError:
                raise ValueError(f"Unmatched symbol {symbol} at position {i} in sequence {dot_bracket}") from None
            pairs.append((j, i))
        elif symbol not in _UNPAIRED_TOKENS:
            raise ValueError(f"Invalid symbol {symbol} at position {i} in sequence {dot_bracket}")
    for symbol, stack in stacks.items():
        if stack:
            raise ValueError(f"Unmatched symbol {symbol} at position {stack[0]} in sequence {dot_bracket}")
    if not pairs:
        return np.empty((0, 2), dtype=int)
    pairs.sort()
    return np.asarray(pairs, dtype=int)

multimolecule.utils.rna.has_pseudoknot

Python
has_pseudoknot(pairs: Tensor | ndarray | Pairs) -> bool

Return True if any pseudoknot pairs are present under segment-MWIS split.

Pair inputs are expected to be normalized.

Parameters:

Name Type Description Default

pairs

Tensor | ndarray | Pairs

torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

required

Returns:

Type Description
bool

True if pseudoknot pairs exist, otherwise False.

Raises:

Type Description
ValueError

If pairs has invalid shape for the selected backend.

TypeError

If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

Examples:

Torch input

Python Console Session
1
2
3
>>> import torch
>>> has_pseudoknot(torch.tensor([[0, 2], [1, 3]]))
True

NumPy input

Python Console Session
1
2
3
>>> import numpy as np
>>> has_pseudoknot(np.array([[0, 2], [1, 3]]))
True

List input

Python Console Session
1
2
3
4
>>> has_pseudoknot([(0, 2), (1, 3)])
True
>>> has_pseudoknot([(0, 3), (1, 2)])
False
Source code in multimolecule/utils/rna/secondary_structure/pseudoknot.py
Python
def has_pseudoknot(pairs: Tensor | np.ndarray | Pairs) -> bool:
    """
    Return True if any pseudoknot pairs are present under segment-MWIS split.

    Pair inputs are expected to be normalized.

    Args:
        pairs: torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

    Returns:
        True if pseudoknot pairs exist, otherwise False.

    Raises:
        ValueError: If pairs has invalid shape for the selected backend.
        TypeError: If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

    Examples:
        Torch input
        >>> import torch
        >>> has_pseudoknot(torch.tensor([[0, 2], [1, 3]]))
        True

        NumPy input
        >>> import numpy as np
        >>> has_pseudoknot(np.array([[0, 2], [1, 3]]))
        True

        List input
        >>> has_pseudoknot([(0, 2), (1, 3)])
        True
        >>> has_pseudoknot([(0, 3), (1, 2)])
        False
    """
    _, pseudoknot_pairs = split_pseudoknot_pairs(pairs)
    if isinstance(pseudoknot_pairs, Tensor):
        return bool(pseudoknot_pairs.numel())
    if isinstance(pseudoknot_pairs, np.ndarray):
        return bool(pseudoknot_pairs.size)
    if isinstance(pseudoknot_pairs, Sequence):
        return bool(pseudoknot_pairs)
    raise TypeError("pairs must be a torch.Tensor, numpy.ndarray, or sequence of (i, j) pairs")

multimolecule.utils.rna.pairs_to_contact_map

Python
pairs_to_contact_map(
    pairs: Tensor,
    length: int | None = None,
    unsafe: bool = False,
) -> Tensor
Python
pairs_to_contact_map(
    pairs: ndarray,
    length: int | None = None,
    unsafe: bool = False,
) -> ndarray
Python
pairs_to_contact_map(
    pairs: PairsList,
    length: int | None = None,
    unsafe: bool = False,
) -> List[List[bool]]
Python
pairs_to_contact_map(
    pairs: Tensor | ndarray | Pairs,
    length: int | None = None,
    unsafe: bool = False,
) -> Tensor | ndarray | List[List[bool]]

Convert base pairs to a symmetric contact map.

If pairs is a torch tensor, returns a boolean torch.Tensor on the same device. Otherwise, returns a numpy boolean array. If length is None, it is inferred as max(pairs) + 1.

Examples:

Torch input

Python Console Session
1
2
3
4
>>> import torch
>>> contact_map_tensor = pairs_to_contact_map(torch.tensor([[0, 3], [1, 2]]), length=4)
>>> contact_map_tensor.to(torch.int).tolist()
[[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]]

NumPy input

Python Console Session
1
2
3
4
5
6
>>> import numpy as np
>>> contact_map = pairs_to_contact_map(np.array([(0, 3), (1, 2)]), length=4)
>>> contact_map.astype(int).tolist()
[[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]]
>>> pairs_to_contact_map(np.array([(0, 2)])).astype(int).tolist()
[[0, 0, 1], [0, 0, 0], [1, 0, 0]]

List input

Python Console Session
>>> pairs_to_contact_map([(0, 2)])
[[False, False, True], [False, False, False], [True, False, False]]
Source code in multimolecule/utils/rna/secondary_structure/notations.py
Python
def pairs_to_contact_map(
    pairs: Tensor | np.ndarray | Pairs, length: int | None = None, unsafe: bool = False
) -> Tensor | np.ndarray | List[List[bool]]:
    """
    Convert base pairs to a symmetric contact map.

    If ``pairs`` is a torch tensor, returns a boolean torch.Tensor on the same device.
    Otherwise, returns a numpy boolean array.
    If ``length`` is None, it is inferred as ``max(pairs) + 1``.

    Examples:
        Torch input
        >>> import torch
        >>> contact_map_tensor = pairs_to_contact_map(torch.tensor([[0, 3], [1, 2]]), length=4)
        >>> contact_map_tensor.to(torch.int).tolist()
        [[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]]

        NumPy input
        >>> import numpy as np
        >>> contact_map = pairs_to_contact_map(np.array([(0, 3), (1, 2)]), length=4)
        >>> contact_map.astype(int).tolist()
        [[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]]
        >>> pairs_to_contact_map(np.array([(0, 2)])).astype(int).tolist()
        [[0, 0, 1], [0, 0, 0], [1, 0, 0]]

        List input
        >>> pairs_to_contact_map([(0, 2)])
        [[False, False, True], [False, False, False], [True, False, False]]
    """
    if isinstance(pairs, Tensor):
        if pairs.numel() == 0:
            return _torch_pairs_to_contact_map(pairs.view(0, 2), length, unsafe)
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a torch.Tensor with shape (n, 2)")
        return _torch_pairs_to_contact_map(pairs, length, unsafe)
    if isinstance(pairs, np.ndarray):
        if pairs.size == 0:
            return _numpy_pairs_to_contact_map(pairs.reshape(0, 2), length, unsafe)
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a numpy.ndarray with shape (n, 2)")
        return _numpy_pairs_to_contact_map(pairs, length, unsafe)
    if isinstance(pairs, Sequence):
        pairs = np.asarray(pairs, dtype=int)
        if pairs.size == 0:
            return _numpy_pairs_to_contact_map(pairs.reshape(0, 2), length, unsafe)
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be an array-like with shape (n, 2)")
        return _numpy_pairs_to_contact_map(pairs, length, unsafe).tolist()
    raise TypeError("pairs must be a torch.Tensor, numpy.ndarray, or sequence of (i, j) pairs")

multimolecule.utils.rna.pairs_to_dot_bracket

Python
pairs_to_dot_bracket(
    pairs: Tensor | ndarray | Pairs,
    length: int,
    unsafe: bool = False,
) -> str

Convert base pairs to a dot-bracket string (backend-aware input, string output).

Torch inputs are accepted and internally converted to NumPy for string building. In safe mode, tiers are assigned using an exact minimal-tier coloring. In unsafe mode, a greedy tiering is used for speed and may use more bracket types.

Examples:

Torch input

Python Console Session
1
2
3
>>> import torch
>>> pairs_to_dot_bracket(torch.tensor([[0, 2], [1, 3]]), length=4)
'([)]'

NumPy input

Python Console Session
1
2
3
>>> import numpy as np
>>> pairs_to_dot_bracket(np.array([(0, 3), (1, 2)]), length=4)
'(())'

List input

Python Console Session
>>> pairs_to_dot_bracket([(0, 3), (1, 2)], length=4)
'(())'
Source code in multimolecule/utils/rna/secondary_structure/notations.py
Python
def pairs_to_dot_bracket(pairs: Tensor | np.ndarray | Pairs, length: int, unsafe: bool = False) -> str:
    """
    Convert base pairs to a dot-bracket string (backend-aware input, string output).

    Torch inputs are accepted and internally converted to NumPy for string building.
    In safe mode, tiers are assigned using an exact minimal-tier coloring.
    In unsafe mode, a greedy tiering is used for speed and may use more bracket types.

    Examples:
        Torch input
        >>> import torch
        >>> pairs_to_dot_bracket(torch.tensor([[0, 2], [1, 3]]), length=4)
        '([)]'

        NumPy input
        >>> import numpy as np
        >>> pairs_to_dot_bracket(np.array([(0, 3), (1, 2)]), length=4)
        '(())'

        List input
        >>> pairs_to_dot_bracket([(0, 3), (1, 2)], length=4)
        '(())'
    """
    # Always operate in NumPy for string construction
    if isinstance(pairs, Tensor):
        pairs = pairs.detach().cpu().numpy()
    elif isinstance(pairs, np.ndarray):
        pass
    elif isinstance(pairs, Sequence):
        pairs = np.asarray(list(pairs), dtype=int)
    else:
        raise TypeError("pairs must be a torch.Tensor, numpy.ndarray, or sequence of (i, j) pairs")
    if pairs.size == 0:
        return _numpy_pairs_to_dot_bracket(pairs, length, unsafe)
    if pairs.ndim != 2 or pairs.shape[1] != 2:
        raise ValueError("pairs must be an array-like with shape (n, 2)")
    return _numpy_pairs_to_dot_bracket(pairs, length, unsafe)

multimolecule.utils.rna.nested_pairs

Python
nested_pairs(
    pairs: Tensor | ndarray | Pairs,
) -> Tensor | ndarray | PairsList

Return primary pairs from the segment-MWIS split.

Parameters:

Name Type Description Default

pairs

Tensor | ndarray | Pairs

torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

required

Returns:

Type Description
Tensor | ndarray | PairsList

Primary pairs using the same backend as input.

This is equivalent to split_pseudoknot_pairs(pairs)[0] and expects normalized unique pairs.

Raises:

Type Description
ValueError

If pairs has invalid shape for the selected backend.

TypeError

If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

Examples:

Torch input

Python Console Session
1
2
3
>>> import torch
>>> nested_pairs(torch.tensor([[0, 2], [1, 3]])).tolist()
[[1, 3]]

NumPy input

Python Console Session
1
2
3
>>> import numpy as np
>>> nested_pairs(np.array([[0, 2], [1, 3]])).tolist()
[[1, 3]]

List input

Python Console Session
>>> nested_pairs([(0, 2), (1, 3)])
[(1, 3)]
Source code in multimolecule/utils/rna/secondary_structure/pseudoknot.py
Python
def nested_pairs(pairs: Tensor | np.ndarray | Pairs) -> Tensor | np.ndarray | PairsList:
    """
    Return primary pairs from the segment-MWIS split.

    Args:
        pairs: torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

    Returns:
        Primary pairs using the same backend as input.

    This is equivalent to ``split_pseudoknot_pairs(pairs)[0]`` and expects
    normalized unique pairs.

    Raises:
        ValueError: If pairs has invalid shape for the selected backend.
        TypeError: If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

    Examples:
        Torch input
        >>> import torch
        >>> nested_pairs(torch.tensor([[0, 2], [1, 3]])).tolist()
        [[1, 3]]

        NumPy input
        >>> import numpy as np
        >>> nested_pairs(np.array([[0, 2], [1, 3]])).tolist()
        [[1, 3]]

        List input
        >>> nested_pairs([(0, 2), (1, 3)])
        [(1, 3)]
    """
    primary, _ = split_pseudoknot_pairs(pairs)
    return primary

multimolecule.utils.rna.pseudoknot_nucleotides

Python
pseudoknot_nucleotides(
    pairs: Tensor | ndarray | Pairs,
) -> Tensor | ndarray | List[int]

Return nucleotide indices involved in any pseudoknot pair.

Pair inputs are expected to be normalized.

Parameters:

Name Type Description Default

pairs

Tensor | ndarray | Pairs

torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

required

Returns:

Type Description
Tensor | ndarray | List[int]

Unique nucleotide indices using the same backend as input (sequence inputs return Python lists).

Raises:

Type Description
ValueError

If pairs has invalid shape for the selected backend.

TypeError

If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

Examples:

Torch input

Python Console Session
1
2
3
>>> import torch
>>> pseudoknot_nucleotides(torch.tensor([[0, 2], [1, 3]])).tolist()
[0, 2]

NumPy input

Python Console Session
1
2
3
>>> import numpy as np
>>> pseudoknot_nucleotides(np.array([[0, 2], [1, 3]])).tolist()
[0, 2]

List input

Python Console Session
>>> pseudoknot_nucleotides([(0, 2), (1, 3)])
[0, 2]
Source code in multimolecule/utils/rna/secondary_structure/pseudoknot.py
Python
def pseudoknot_nucleotides(pairs: Tensor | np.ndarray | Pairs) -> Tensor | np.ndarray | List[int]:
    """
    Return nucleotide indices involved in any pseudoknot pair.

    Pair inputs are expected to be normalized.

    Args:
        pairs: torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

    Returns:
        Unique nucleotide indices using the same backend as input (sequence inputs return Python lists).

    Raises:
        ValueError: If pairs has invalid shape for the selected backend.
        TypeError: If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

    Examples:
        Torch input
        >>> import torch
        >>> pseudoknot_nucleotides(torch.tensor([[0, 2], [1, 3]])).tolist()
        [0, 2]

        NumPy input
        >>> import numpy as np
        >>> pseudoknot_nucleotides(np.array([[0, 2], [1, 3]])).tolist()
        [0, 2]

        List input
        >>> pseudoknot_nucleotides([(0, 2), (1, 3)])
        [0, 2]
    """
    if isinstance(pairs, Tensor):
        pseudoknot_pairs_data = pseudoknot_pairs(pairs)
        if pseudoknot_pairs_data.numel() == 0:
            return torch.empty((0,), dtype=torch.long, device=pairs.device)
        return torch.unique(pseudoknot_pairs_data.view(-1)).to(torch.long)
    if isinstance(pairs, np.ndarray):
        pseudoknot_pairs_data = pseudoknot_pairs(pairs)
        if pseudoknot_pairs_data.size == 0:
            return np.empty((0,), dtype=int)
        return np.unique(pseudoknot_pairs_data.reshape(-1))
    if isinstance(pairs, Sequence):
        pseudoknot_pairs_data = pseudoknot_pairs(pairs)
        if not pseudoknot_pairs_data:
            return []
        return np.unique(np.asarray(pseudoknot_pairs_data, dtype=int).reshape(-1)).tolist()
    raise TypeError("pairs must be a torch.Tensor, numpy.ndarray, or sequence of (i, j) pairs")

multimolecule.utils.rna.pseudoknot_pairs

Python
pseudoknot_pairs(pairs: ndarray) -> ndarray
Python
pseudoknot_pairs(pairs: Pairs) -> PairsList
Python
pseudoknot_pairs(pairs: Tensor) -> Tensor
Python
pseudoknot_pairs(
    pairs: Tensor | ndarray | Pairs,
) -> Tensor | ndarray | PairsList

Return pseudoknot pairs from segments not selected by MWIS.

Parameters:

Name Type Description Default

pairs

Tensor | ndarray | Pairs

torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

required

Returns:

Type Description
Tensor | ndarray | PairsList

Pseudoknot pairs using the same backend as input.

This is equivalent to split_pseudoknot_pairs(pairs)[1] and expects normalized unique pairs.

Raises:

Type Description
ValueError

If pairs has invalid shape for the selected backend.

TypeError

If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

Tie-breaks for equal total base pairs: (1) minimize unpaired-within-span, (2) minimize total span, (3) minimize number of segments, (4) deterministic order fallback.

Examples:

Torch input

Python Console Session
1
2
3
>>> import torch
>>> pseudoknot_pairs(torch.tensor([[0, 2], [1, 3]])).tolist()
[[0, 2]]

NumPy input

Python Console Session
1
2
3
>>> import numpy as np
>>> pseudoknot_pairs(np.array([[0, 2], [1, 3]])).tolist()
[[0, 2]]

List input

Python Console Session
>>> pseudoknot_pairs([(0, 2), (1, 3)])
[(0, 2)]
Source code in multimolecule/utils/rna/secondary_structure/pseudoknot.py
Python
def pseudoknot_pairs(pairs: Tensor | np.ndarray | Pairs) -> Tensor | np.ndarray | PairsList:
    """
    Return pseudoknot pairs from segments not selected by MWIS.

    Args:
        pairs: torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

    Returns:
        Pseudoknot pairs using the same backend as input.

    This is equivalent to ``split_pseudoknot_pairs(pairs)[1]`` and expects
    normalized unique pairs.

    Raises:
        ValueError: If pairs has invalid shape for the selected backend.
        TypeError: If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

    Tie-breaks for equal total base pairs: (1) minimize unpaired-within-span,
    (2) minimize total span, (3) minimize number of segments, (4) deterministic
    order fallback.

    Examples:
        Torch input
        >>> import torch
        >>> pseudoknot_pairs(torch.tensor([[0, 2], [1, 3]])).tolist()
        [[0, 2]]

        NumPy input
        >>> import numpy as np
        >>> pseudoknot_pairs(np.array([[0, 2], [1, 3]])).tolist()
        [[0, 2]]

        List input
        >>> pseudoknot_pairs([(0, 2), (1, 3)])
        [(0, 2)]
    """
    _, pseudoknot = split_pseudoknot_pairs(pairs)
    return pseudoknot

multimolecule.utils.rna.pseudoknot_tiers

Python
pseudoknot_tiers(
    pairs: Tensor | ndarray | Pairs, unsafe: bool = False
) -> List[Tensor] | List[ndarray] | Tiers

Return dot-bracket tiers as non-crossing groups of pairs.

Pairs are expected to be normalized (unique, sorted with i < j). Use normalize_pairs if you need to normalize raw inputs.

Parameters:

Name Type Description Default

pairs

Tensor | ndarray | Pairs

torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

required

unsafe

bool

Use greedy tiering for speed instead of minimal coloring.

False

Returns:

Type Description
List[Tensor] | List[ndarray] | Tiers

A list of tiers. Each tier is a list/array/tensor of pairs.

Raises:

Type Description
ValueError

If pairs has invalid shape for the selected backend.

TypeError

If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

Examples:

Torch input

Python Console Session
1
2
3
4
>>> import torch
>>> tiers = pseudoknot_tiers(torch.tensor([[0, 2], [1, 3]]))
>>> [tier.tolist() for tier in tiers]
[[[0, 2]], [[1, 3]]]

NumPy input

Python Console Session
1
2
3
4
>>> import numpy as np
>>> tiers = pseudoknot_tiers(np.array([[0, 2], [1, 3]]))
>>> [tier.tolist() for tier in tiers]
[[[0, 2]], [[1, 3]]]

List input

Python Console Session
>>> pseudoknot_tiers([(0, 3), (1, 2)])
[[(0, 3), (1, 2)]]
Source code in multimolecule/utils/rna/secondary_structure/pseudoknot.py
Python
def pseudoknot_tiers(
    pairs: Tensor | np.ndarray | Pairs, unsafe: bool = False
) -> List[Tensor] | List[np.ndarray] | Tiers:
    """
    Return dot-bracket tiers as non-crossing groups of pairs.

    Pairs are expected to be normalized (unique, sorted with i < j).
    Use ``normalize_pairs`` if you need to normalize raw inputs.

    Args:
        pairs: torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.
        unsafe: Use greedy tiering for speed instead of minimal coloring.

    Returns:
        A list of tiers. Each tier is a list/array/tensor of pairs.

    Raises:
        ValueError: If pairs has invalid shape for the selected backend.
        TypeError: If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

    Examples:
        Torch input
        >>> import torch
        >>> tiers = pseudoknot_tiers(torch.tensor([[0, 2], [1, 3]]))
        >>> [tier.tolist() for tier in tiers]
        [[[0, 2]], [[1, 3]]]

        NumPy input
        >>> import numpy as np
        >>> tiers = pseudoknot_tiers(np.array([[0, 2], [1, 3]]))
        >>> [tier.tolist() for tier in tiers]
        [[[0, 2]], [[1, 3]]]

        List input
        >>> pseudoknot_tiers([(0, 3), (1, 2)])
        [[(0, 3), (1, 2)]]
    """
    if isinstance(pairs, Tensor):
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a torch.Tensor with shape (n, 2)")
        return _torch_tiers_from_pairs(pairs, unsafe=unsafe)
    if isinstance(pairs, np.ndarray):
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a numpy.ndarray with shape (n, 2)")
        return _numpy_tiers_from_pairs(pairs, unsafe=unsafe)
    if isinstance(pairs, Sequence):
        if not pairs:
            return []
        pairs = np.asarray(pairs, dtype=int)
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be an array-like with shape (n, 2)")
        tiers = _numpy_tiers_from_pairs(pairs, unsafe=unsafe)
        return [list(map(tuple, tier.tolist())) for tier in tiers]
    raise TypeError("pairs must be a torch.Tensor, numpy.ndarray, or sequence of (i, j) pairs")

multimolecule.utils.rna.split_crossing_pairs

Python
split_crossing_pairs(
    pairs: Tensor,
) -> Tuple[Tensor, Tensor]
Python
split_crossing_pairs(
    pairs: ndarray,
) -> Tuple[ndarray, ndarray]
Python
split_crossing_pairs(
    pairs: PairsList,
) -> Tuple[PairsList, PairsList]
Python
split_crossing_pairs(
    pairs: Tensor | ndarray | Pairs,
) -> Tuple[
    Tensor | ndarray | PairsList,
    Tensor | ndarray | PairsList,
]

Split pairs into non-crossing pairs and crossing pairs (no-heuristic).

Pairs are expected to be normalized (unique, sorted with i < j). Use normalize_pairs if you need to normalize raw inputs.

Parameters:

Name Type Description Default

pairs

Tensor | ndarray | Pairs

torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

required

Returns:

Type Description
Tuple[Tensor | ndarray | PairsList, Tensor | ndarray | PairsList]

(non_crossing_pairs, crossing_pairs) using the same backend as input.

Raises:

Type Description
ValueError

If pairs has invalid shape for the selected backend.

TypeError

If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

Examples:

Torch input

Python Console Session
1
2
3
4
>>> import torch
>>> primary, crossing = split_crossing_pairs(torch.tensor([[0, 2], [1, 3]]))
>>> primary.tolist(), crossing.tolist()
([], [[0, 2], [1, 3]])

NumPy input

Python Console Session
1
2
3
4
>>> import numpy as np
>>> primary, crossing = split_crossing_pairs(np.array([[0, 3], [1, 2]]))
>>> primary.tolist(), crossing.tolist()
([[0, 3], [1, 2]], [])

List input

Python Console Session
1
2
3
4
>>> split_crossing_pairs([(0, 2), (1, 3)])
([], [(0, 2), (1, 3)])
>>> split_crossing_pairs([(0, 3), (1, 2)])
([(0, 3), (1, 2)], [])
Source code in multimolecule/utils/rna/secondary_structure/pseudoknot.py
Python
def split_crossing_pairs(
    pairs: Tensor | np.ndarray | Pairs,
) -> Tuple[Tensor | np.ndarray | PairsList, Tensor | np.ndarray | PairsList]:
    """
    Split pairs into non-crossing pairs and crossing pairs (no-heuristic).

    Pairs are expected to be normalized (unique, sorted with i < j).
    Use ``normalize_pairs`` if you need to normalize raw inputs.

    Args:
        pairs: torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

    Returns:
        (non_crossing_pairs, crossing_pairs) using the same backend as input.

    Raises:
        ValueError: If pairs has invalid shape for the selected backend.
        TypeError: If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

    Examples:
        Torch input
        >>> import torch
        >>> primary, crossing = split_crossing_pairs(torch.tensor([[0, 2], [1, 3]]))
        >>> primary.tolist(), crossing.tolist()
        ([], [[0, 2], [1, 3]])

        NumPy input
        >>> import numpy as np
        >>> primary, crossing = split_crossing_pairs(np.array([[0, 3], [1, 2]]))
        >>> primary.tolist(), crossing.tolist()
        ([[0, 3], [1, 2]], [])

        List input
        >>> split_crossing_pairs([(0, 2), (1, 3)])
        ([], [(0, 2), (1, 3)])
        >>> split_crossing_pairs([(0, 3), (1, 2)])
        ([(0, 3), (1, 2)], [])
    """
    if isinstance(pairs, Tensor):
        if pairs.numel() == 0:
            empty = pairs.view(0, 2)
            return empty, empty
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a torch.Tensor with shape (n, 2)")
        if pairs.shape[0] < 2:
            return _torch_sort_pairs(pairs), pairs.new_empty((0, 2))
        mask = _torch_crossing_mask(pairs)
        if not bool(mask.any().item()):
            return _torch_sort_pairs(pairs), pairs.new_empty((0, 2))
        primary = _torch_sort_pairs(pairs[~mask])
        crossing = _torch_sort_pairs(pairs[mask])
        return primary, crossing
    if isinstance(pairs, np.ndarray):
        if pairs.size == 0:
            empty = pairs.reshape(0, 2)
            return empty, empty
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a numpy.ndarray with shape (n, 2)")
        if pairs.shape[0] < 2:
            return _numpy_sort_pairs(pairs), pairs[:0]
        mask = _numpy_crossing_mask(pairs)
        if not mask.any():
            return _numpy_sort_pairs(pairs), pairs[:0]
        primary = _numpy_sort_pairs(pairs[~mask])
        crossing = _numpy_sort_pairs(pairs[mask])
        return primary, crossing
    if isinstance(pairs, Sequence):
        if not pairs:
            return [], []
        pairs = np.asarray(pairs, dtype=int)
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be an array-like with shape (n, 2)")
        mask = _numpy_crossing_mask(pairs)
        if not mask.any():
            return list(map(tuple, _numpy_sort_pairs(pairs).tolist())), []
        primary = _numpy_sort_pairs(pairs[~mask]).tolist()
        crossing = _numpy_sort_pairs(pairs[mask]).tolist()
        return list(map(tuple, primary)), list(map(tuple, crossing))
    raise TypeError("pairs must be a torch.Tensor, numpy.ndarray, or sequence of (i, j) pairs")

multimolecule.utils.rna.split_pseudoknot_pairs

Python
split_pseudoknot_pairs(
    pairs: ndarray,
) -> Tuple[ndarray, ndarray]
Python
split_pseudoknot_pairs(
    pairs: Pairs,
) -> Tuple[PairsList, PairsList]
Python
split_pseudoknot_pairs(
    pairs: Tensor,
) -> Tuple[Tensor, Tensor]
Python
split_pseudoknot_pairs(
    pairs: Tensor | ndarray | Pairs,
) -> Tuple[
    Tensor | ndarray | PairsList,
    Tensor | ndarray | PairsList,
]

Split base pairs into primary and pseudoknot pairs using segment-level MWIS.

Pairs are expected to be normalized (unique, sorted with i < j). Use normalize_pairs if you need to normalize raw inputs.

Tie-breaks order for equal total base pairs is lexicographic on:

  1. minimize unpaired-within-span
  2. minimize total span
  3. minimize number of segments
  4. deterministic segment order (prefer 3’ segments)

Parameters:

Name Type Description Default

pairs

Tensor | ndarray | Pairs

torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

required

Returns:

Type Description
Tuple[Tensor | ndarray | PairsList, Tensor | ndarray | PairsList]

(nested_pairs, pseudoknot_pairs) using the same backend as input.

Raises:

Type Description
ValueError

If pairs has invalid shape for the selected backend.

TypeError

If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

Examples:

Torch input

Python Console Session
1
2
3
4
>>> import torch
>>> primary, pseudoknot_pairs = split_pseudoknot_pairs(torch.tensor([[0, 2], [1, 3]]))
>>> primary.tolist(), pseudoknot_pairs.tolist()
([[1, 3]], [[0, 2]])

NumPy input

Python Console Session
1
2
3
4
>>> import numpy as np
>>> primary, pseudoknot_pairs = split_pseudoknot_pairs(np.array([[0, 2], [1, 3]]))
>>> primary.tolist(), pseudoknot_pairs.tolist()
([[1, 3]], [[0, 2]])

List input

Python Console Session
1
2
3
4
>>> split_pseudoknot_pairs([(0, 2), (1, 3)])
([(1, 3)], [(0, 2)])
>>> split_pseudoknot_pairs([(0, 3), (1, 2)])
([(0, 3), (1, 2)], [])
Source code in multimolecule/utils/rna/secondary_structure/pseudoknot.py
Python
def split_pseudoknot_pairs(
    pairs: Tensor | np.ndarray | Pairs,
) -> Tuple[Tensor | np.ndarray | PairsList, Tensor | np.ndarray | PairsList]:
    """
    Split base pairs into primary and pseudoknot pairs using segment-level MWIS.

    Pairs are expected to be normalized (unique, sorted with i < j).
    Use ``normalize_pairs`` if you need to normalize raw inputs.

    Tie-breaks order for equal total base pairs is lexicographic on:

    1. minimize unpaired-within-span
    2. minimize total span
    3. minimize number of segments
    4. deterministic segment order (prefer 3' segments)

    Args:
        pairs: torch.Tensor, numpy.ndarray, or array-like with shape (n, 2) and 0-based indices.

    Returns:
        (nested_pairs, pseudoknot_pairs) using the same backend as input.

    Raises:
        ValueError: If pairs has invalid shape for the selected backend.
        TypeError: If pairs is not a torch.Tensor, numpy.ndarray, or array-like with shape (n, 2).

    Examples:
        Torch input
        >>> import torch
        >>> primary, pseudoknot_pairs = split_pseudoknot_pairs(torch.tensor([[0, 2], [1, 3]]))
        >>> primary.tolist(), pseudoknot_pairs.tolist()
        ([[1, 3]], [[0, 2]])

        NumPy input
        >>> import numpy as np
        >>> primary, pseudoknot_pairs = split_pseudoknot_pairs(np.array([[0, 2], [1, 3]]))
        >>> primary.tolist(), pseudoknot_pairs.tolist()
        ([[1, 3]], [[0, 2]])

        List input
        >>> split_pseudoknot_pairs([(0, 2), (1, 3)])
        ([(1, 3)], [(0, 2)])
        >>> split_pseudoknot_pairs([(0, 3), (1, 2)])
        ([(0, 3), (1, 2)], [])
    """
    if isinstance(pairs, Tensor):
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a torch.Tensor with shape (n, 2)")
        primary, pseudoknot = _torch_split_pseudoknot_pairs(pairs)
        return _torch_sort_pairs(primary), _torch_sort_pairs(pseudoknot)
    if isinstance(pairs, np.ndarray):
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be a numpy.ndarray with shape (n, 2)")
        primary, pseudoknot = _numpy_split_pseudoknot_pairs(pairs)
        return _numpy_sort_pairs(primary), _numpy_sort_pairs(pseudoknot)
    if isinstance(pairs, Sequence):
        if not pairs:
            return [], []
        pairs = np.asarray(pairs, dtype=int)
        if pairs.ndim != 2 or pairs.shape[1] != 2:
            raise ValueError("pairs must be an array-like with shape (n, 2)")
        primary, pseudoknot = _numpy_split_pseudoknot_pairs(pairs)
        primary = _numpy_sort_pairs(primary)
        pseudoknot = _numpy_sort_pairs(pseudoknot)
        return list(map(tuple, primary.tolist())), list(map(tuple, pseudoknot.tolist()))
    raise TypeError("pairs must be a torch.Tensor, numpy.ndarray, or sequence of (i, j) pairs")