update
This commit is contained in:
parent
f9e47c91b4
commit
325bbebc2d
8 changed files with 131 additions and 5 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -5,6 +5,10 @@ build/
|
||||||
dist/
|
dist/
|
||||||
wheels/
|
wheels/
|
||||||
*.egg-info
|
*.egg-info
|
||||||
|
.pytest_cache
|
||||||
|
|
||||||
# Virtual environments
|
# Virtual environments
|
||||||
.venv
|
.venv
|
||||||
|
|
||||||
|
.vscode
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,3 +13,8 @@ dependencies = []
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["uv_build>=0.10.9,<0.11.0"]
|
requires = ["uv_build>=0.10.9,<0.11.0"]
|
||||||
build-backend = "uv_build"
|
build-backend = "uv_build"
|
||||||
|
|
||||||
|
[dependency-groups]
|
||||||
|
dev = [
|
||||||
|
"pytest>=9.0.2",
|
||||||
|
]
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
from .filter import Wiqaya
|
||||||
|
from .utils import remove_tashkeel
|
||||||
|
|
||||||
|
__all__ = ["Wiqaya", "remove_tashkeel"]
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from .utils import remove_tashkeel
|
||||||
|
|
||||||
|
|
||||||
DATA_DIR = Path(__file__).parent.parent.parent / "data"
|
DATA_DIR = Path(__file__).parent.parent.parent / "data"
|
||||||
|
|
@ -16,14 +16,23 @@ class Wiqaya:
|
||||||
raise ValueError(f"Language '{self.lang}' not supported")
|
raise ValueError(f"Language '{self.lang}' not supported")
|
||||||
|
|
||||||
def is_profane(self, text) -> bool:
|
def is_profane(self, text) -> bool:
|
||||||
words = text.lower().split()
|
words = self._process(text)
|
||||||
return any(word in self.WORDS for word in words)
|
return any(word in self.WORDS for word in words)
|
||||||
|
|
||||||
def get_profane_words(self, text) -> list[str]:
|
def get_profane_words(self, text) -> list[str]:
|
||||||
words = text.lower().split()
|
words = self._process(text)
|
||||||
return [word for word in words if word in self.WORDS]
|
return [word for word in words if word in self.WORDS]
|
||||||
|
|
||||||
|
def censor(self, text: str, char: str = "*") -> str:
|
||||||
|
words = self._process(text)
|
||||||
|
for word in words:
|
||||||
|
if word in self.WORDS:
|
||||||
|
text = text.replace(word, char * len(word))
|
||||||
|
return text
|
||||||
|
|
||||||
|
def _process(self, text: str) -> list[str]:
|
||||||
|
if self.lang == "ar":
|
||||||
|
text = remove_tashkeel(text)
|
||||||
|
return text.lower().split()
|
||||||
|
|
||||||
|
|
||||||
t = Wiqaya("ar").get_profane_words("اهلا بك يا بزاز")
|
|
||||||
print(t)
|
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
TASHKEEL = re.compile(r'[\u0610-\u061A\u064B-\u065F]')
|
||||||
|
|
||||||
|
# remove tashkeel from arabic
|
||||||
|
def remove_tashkeel(text: str) -> str:
|
||||||
|
if not TASHKEEL.search(text):
|
||||||
|
return text
|
||||||
|
return TASHKEEL.sub('', text)
|
||||||
11
tests/test_filter.py
Normal file
11
tests/test_filter.py
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
from wiqaya import Wiqaya
|
||||||
|
|
||||||
|
def test_is_profane():
|
||||||
|
w = Wiqaya(lang="ar")
|
||||||
|
assert w.is_profane("نص عادي") == False
|
||||||
|
|
||||||
|
def test_censor():
|
||||||
|
w = Wiqaya(lang="ar")
|
||||||
|
text = "نص سيء حرامي"
|
||||||
|
print(w.censor(text, char="*") )
|
||||||
|
assert w.censor(text, char="*") == "نص سيء *****"
|
||||||
12
tests/test_utils.py
Normal file
12
tests/test_utils.py
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
from wiqaya.utils import remove_tashkeel
|
||||||
|
|
||||||
|
def test_no_tashkeel():
|
||||||
|
text = "مرحبا"
|
||||||
|
assert remove_tashkeel(text) == text
|
||||||
|
|
||||||
|
def test_with_tashkeel():
|
||||||
|
assert remove_tashkeel("مَرْحَباً") == "مرحبا"
|
||||||
|
|
||||||
|
def test_english_unchanged():
|
||||||
|
text = "hello world"
|
||||||
|
assert remove_tashkeel(text) == text
|
||||||
71
uv.lock
generated
71
uv.lock
generated
|
|
@ -2,7 +2,78 @@ version = 1
|
||||||
revision = 3
|
revision = 3
|
||||||
requires-python = ">=3.12"
|
requires-python = ">=3.12"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorama"
|
||||||
|
version = "0.4.6"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "iniconfig"
|
||||||
|
version = "2.3.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "packaging"
|
||||||
|
version = "26.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pluggy"
|
||||||
|
version = "1.6.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pygments"
|
||||||
|
version = "2.19.2"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytest"
|
||||||
|
version = "9.0.2"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||||
|
{ name = "iniconfig" },
|
||||||
|
{ name = "packaging" },
|
||||||
|
{ name = "pluggy" },
|
||||||
|
{ name = "pygments" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wiqaya"
|
name = "wiqaya"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
|
|
||||||
|
[package.dev-dependencies]
|
||||||
|
dev = [
|
||||||
|
{ name = "pytest" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.metadata]
|
||||||
|
|
||||||
|
[package.metadata.requires-dev]
|
||||||
|
dev = [{ name = "pytest", specifier = ">=9.0.2" }]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue