Bases: BaseFileFilter, FrozenModel
Filter files or directories with path matching a regular expression.
.. versionadded:: 0.8.0
Replaces deprecated onetl.core.FileFilter
Parameters
pattern : :obj:re.Pattern
Regular expression (e.g. ``\d+\.csv``) for which any **file** (only file) path should match.
If input is a string, regular expression will be compiles using ``re.IGNORECASE`` and ``re.DOTALL`` flags.
Examples
Create regexp filter from string:
.. code:: python
from onetl.file.filter import Regexp
regexp = Regexp(r"\d+\.csv")
Create regexp filter from :obj:re.Pattern:
.. code:: python
import re
from onetl.file.filter import Regexp
regexp = Regexp(re.compile(r"\d+\.csv", re.IGNORECASE | re.DOTALL))
Source code in onetl/file/filter/regexp.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 | class Regexp(BaseFileFilter, FrozenModel):
r"""Filter files or directories with path matching a regular expression.
.. versionadded:: 0.8.0
Replaces deprecated ``onetl.core.FileFilter``
Parameters
----------
pattern : :obj:`re.Pattern`
Regular expression (e.g. ``\d+\.csv``) for which any **file** (only file) path should match.
If input is a string, regular expression will be compiles using ``re.IGNORECASE`` and ``re.DOTALL`` flags.
Examples
--------
Create regexp filter from string:
.. code:: python
from onetl.file.filter import Regexp
regexp = Regexp(r"\d+\.csv")
Create regexp filter from :obj:`re.Pattern`:
.. code:: python
import re
from onetl.file.filter import Regexp
regexp = Regexp(re.compile(r"\d+\.csv", re.IGNORECASE | re.DOTALL))
"""
class Config:
arbitrary_types_allowed = True
pattern: re.Pattern
def __init__(self, pattern: str):
# this is only to allow passing regexp as positional argument
super().__init__(pattern=pattern) # type: ignore
def __repr__(self):
return f"{self.__class__.__name__}({self.pattern!r})"
def match(self, path: PathProtocol) -> bool:
if not path.is_file():
return True
return self.pattern.search(os.fspath(path)) is not None
@validator("pattern", pre=True)
def _validate_pattern(cls, value: re.Pattern | str) -> re.Pattern:
if isinstance(value, str):
try:
return re.compile(value, re.IGNORECASE | re.DOTALL)
except re.error as e:
raise ValueError(f"Invalid regexp: {value!r}") from e
return value
|
match(path)
Source code in onetl/file/filter/regexp.py
| def match(self, path: PathProtocol) -> bool:
if not path.is_file():
return True
return self.pattern.search(os.fspath(path)) is not None
|