Skip to content

Регулярные выражения

Bases: BaseFileFilter, FrozenModel

Filter files or directories with path matching a regular expression.

.. versionadded:: 0.8.0 Replaces deprecated onetl.core.FileFilter

Parameters

pattern : :obj:re.Pattern

Regular expression (e.g. ``\d+\.csv``) for which any **file** (only file) path should match.

If input is a string, regular expression will be compiles using ``re.IGNORECASE`` and ``re.DOTALL`` flags.

Examples

Create regexp filter from string:

.. code:: python

from onetl.file.filter import Regexp

regexp = Regexp(r"\d+\.csv")

Create regexp filter from :obj:re.Pattern:

.. code:: python

import re

from onetl.file.filter import Regexp

regexp = Regexp(re.compile(r"\d+\.csv", re.IGNORECASE | re.DOTALL))
Source code in onetl/file/filter/regexp.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
class Regexp(BaseFileFilter, FrozenModel):
    r"""Filter files or directories with path matching a regular expression.

    .. versionadded:: 0.8.0
        Replaces deprecated ``onetl.core.FileFilter``

    Parameters
    ----------

    pattern : :obj:`re.Pattern`

        Regular expression (e.g. ``\d+\.csv``) for which any **file** (only file) path should match.

        If input is a string, regular expression will be compiles using ``re.IGNORECASE`` and ``re.DOTALL`` flags.

    Examples
    --------

    Create regexp filter from string:

    .. code:: python

        from onetl.file.filter import Regexp

        regexp = Regexp(r"\d+\.csv")

    Create regexp filter from :obj:`re.Pattern`:

    .. code:: python

        import re

        from onetl.file.filter import Regexp

        regexp = Regexp(re.compile(r"\d+\.csv", re.IGNORECASE | re.DOTALL))
    """

    class Config:
        arbitrary_types_allowed = True

    pattern: re.Pattern

    def __init__(self, pattern: str):
        # this is only to allow passing regexp as positional argument
        super().__init__(pattern=pattern)  # type: ignore

    def __repr__(self):
        return f"{self.__class__.__name__}({self.pattern!r})"

    def match(self, path: PathProtocol) -> bool:
        if not path.is_file():
            return True

        return self.pattern.search(os.fspath(path)) is not None

    @validator("pattern", pre=True)
    def _validate_pattern(cls, value: re.Pattern | str) -> re.Pattern:
        if isinstance(value, str):
            try:
                return re.compile(value, re.IGNORECASE | re.DOTALL)
            except re.error as e:
                raise ValueError(f"Invalid regexp: {value!r}") from e

        return value

match(path)

Source code in onetl/file/filter/regexp.py
66
67
68
69
70
def match(self, path: PathProtocol) -> bool:
    if not path.is_file():
        return True

    return self.pattern.search(os.fspath(path)) is not None