Bases: BaseFileFilter, FrozenModel
Filter files or directories with path matching a regular expression.
Added in 0.8.0
Replaces deprecated onetl.core.FileFilter
Parameters
pattern : [re.Pattern][]
Regular expression (e.g. `\d+\.csv`) for which any **file** (only file) path should match.
If input is a string, regular expression will be compiles using `re.IGNORECASE` and `re.DOTALL` flags.
Examples
Create regexp filter from string:
from onetl.file.filter import Regexp
regexp = Regexp(r"\d+\.csv")
Create regexp filter from [re.Pattern][]:
import re
from onetl.file.filter import Regexp
regexp = Regexp(re.compile(r"\d+\.csv", re.IGNORECASE | re.DOTALL))
Source code in onetl/file/filter/regexp.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 | class Regexp(BaseFileFilter, FrozenModel):
r"""Filter files or directories with path matching a regular expression.
!!! success "Added in 0.8.0"
Replaces deprecated `onetl.core.FileFilter`
Parameters
----------
pattern : [re.Pattern][]
Regular expression (e.g. `\d+\.csv`) for which any **file** (only file) path should match.
If input is a string, regular expression will be compiles using `re.IGNORECASE` and `re.DOTALL` flags.
Examples
--------
Create regexp filter from string:
```python
from onetl.file.filter import Regexp
regexp = Regexp(r"\d+\.csv")
```
Create regexp filter from [re.Pattern][]:
```python
import re
from onetl.file.filter import Regexp
regexp = Regexp(re.compile(r"\d+\.csv", re.IGNORECASE | re.DOTALL))
```
"""
class Config:
arbitrary_types_allowed = True
pattern: re.Pattern
def __init__(self, pattern: str):
# this is only to allow passing regexp as positional argument
super().__init__(pattern=pattern)
def __repr__(self):
return f"{self.__class__.__name__}({self.pattern!r})"
def match(self, path: PathProtocol) -> bool:
if not path.is_file():
return True
return self.pattern.search(os.fspath(path)) is not None
@validator("pattern", pre=True)
def _validate_pattern(cls, value: re.Pattern | str) -> re.Pattern:
if isinstance(value, str):
try:
return re.compile(value, re.IGNORECASE | re.DOTALL)
except re.error as e:
msg = f"Invalid regexp: {value!r}"
raise ValueError(msg) from e
return value
|
match(path)
Source code in onetl/file/filter/regexp.py
| def match(self, path: PathProtocol) -> bool:
if not path.is_file():
return True
return self.pattern.search(os.fspath(path)) is not None
|