Skip to content

Регулярные выражения

Bases: BaseFileFilter, FrozenModel

Filter files or directories with path matching a regular expression.

Added in 0.8.0

Replaces deprecated onetl.core.FileFilter

Parameters

pattern : [re.Pattern][]

Regular expression (e.g. `\d+\.csv`) for which any **file** (only file) path should match.

If input is a string, regular expression will be compiles using `re.IGNORECASE` and `re.DOTALL` flags.

Examples

Create regexp filter from string:

from onetl.file.filter import Regexp

regexp = Regexp(r"\d+\.csv")
Create regexp filter from [re.Pattern][]:

import re

from onetl.file.filter import Regexp

regexp = Regexp(re.compile(r"\d+\.csv", re.IGNORECASE | re.DOTALL))
Source code in onetl/file/filter/regexp.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
class Regexp(BaseFileFilter, FrozenModel):
    r"""Filter files or directories with path matching a regular expression.

    !!! success "Added in 0.8.0"
        Replaces deprecated `onetl.core.FileFilter`

    Parameters
    ----------

    pattern : [re.Pattern][]

        Regular expression (e.g. `\d+\.csv`) for which any **file** (only file) path should match.

        If input is a string, regular expression will be compiles using `re.IGNORECASE` and `re.DOTALL` flags.

    Examples
    --------

    Create regexp filter from string:

    ```python
    from onetl.file.filter import Regexp

    regexp = Regexp(r"\d+\.csv")

    ```
    Create regexp filter from [re.Pattern][]:

    ```python
    import re

    from onetl.file.filter import Regexp

    regexp = Regexp(re.compile(r"\d+\.csv", re.IGNORECASE | re.DOTALL))

    ```
    """

    class Config:
        arbitrary_types_allowed = True

    pattern: re.Pattern

    def __init__(self, pattern: str):
        # this is only to allow passing regexp as positional argument
        super().__init__(pattern=pattern)

    def __repr__(self):
        return f"{self.__class__.__name__}({self.pattern!r})"

    def match(self, path: PathProtocol) -> bool:
        if not path.is_file():
            return True

        return self.pattern.search(os.fspath(path)) is not None

    @validator("pattern", pre=True)
    def _validate_pattern(cls, value: re.Pattern | str) -> re.Pattern:
        if isinstance(value, str):
            try:
                return re.compile(value, re.IGNORECASE | re.DOTALL)
            except re.error as e:
                msg = f"Invalid regexp: {value!r}"
                raise ValueError(msg) from e

        return value

match(path)

Source code in onetl/file/filter/regexp.py
67
68
69
70
71
def match(self, path: PathProtocol) -> bool:
    if not path.is_file():
        return True

    return self.pattern.search(os.fspath(path)) is not None