This commit is contained in:
tayf 2026-03-08 09:50:41 +02:00
parent f9e47c91b4
commit 325bbebc2d
8 changed files with 131 additions and 5 deletions

4
.gitignore vendored
View file

@ -5,6 +5,10 @@ build/
dist/ dist/
wheels/ wheels/
*.egg-info *.egg-info
.pytest_cache
# Virtual environments # Virtual environments
.venv .venv
.vscode

View file

@ -13,3 +13,8 @@ dependencies = []
[build-system] [build-system]
requires = ["uv_build>=0.10.9,<0.11.0"] requires = ["uv_build>=0.10.9,<0.11.0"]
build-backend = "uv_build" build-backend = "uv_build"
[dependency-groups]
dev = [
"pytest>=9.0.2",
]

View file

@ -0,0 +1,4 @@
from .filter import Wiqaya
from .utils import remove_tashkeel
__all__ = ["Wiqaya", "remove_tashkeel"]

View file

@ -1,5 +1,5 @@
from pathlib import Path from pathlib import Path
from .utils import remove_tashkeel
DATA_DIR = Path(__file__).parent.parent.parent / "data" DATA_DIR = Path(__file__).parent.parent.parent / "data"
@ -16,14 +16,23 @@ class Wiqaya:
raise ValueError(f"Language '{self.lang}' not supported") raise ValueError(f"Language '{self.lang}' not supported")
def is_profane(self, text) -> bool: def is_profane(self, text) -> bool:
words = text.lower().split() words = self._process(text)
return any(word in self.WORDS for word in words) return any(word in self.WORDS for word in words)
def get_profane_words(self, text) -> list[str]: def get_profane_words(self, text) -> list[str]:
words = text.lower().split() words = self._process(text)
return [word for word in words if word in self.WORDS] return [word for word in words if word in self.WORDS]
def censor(self, text: str, char: str = "*") -> str:
words = self._process(text)
for word in words:
if word in self.WORDS:
text = text.replace(word, char * len(word))
return text
def _process(self, text: str) -> list[str]:
if self.lang == "ar":
text = remove_tashkeel(text)
return text.lower().split()
t = Wiqaya("ar").get_profane_words("اهلا بك يا بزاز")
print(t)

View file

@ -0,0 +1,10 @@
import re
TASHKEEL = re.compile(r'[\u0610-\u061A\u064B-\u065F]')
# remove tashkeel from arabic
def remove_tashkeel(text: str) -> str:
if not TASHKEEL.search(text):
return text
return TASHKEEL.sub('', text)

11
tests/test_filter.py Normal file
View file

@ -0,0 +1,11 @@
from wiqaya import Wiqaya
def test_is_profane():
w = Wiqaya(lang="ar")
assert w.is_profane("نص عادي") == False
def test_censor():
w = Wiqaya(lang="ar")
text = "نص سيء حرامي"
print(w.censor(text, char="*") )
assert w.censor(text, char="*") == "نص سيء *****"

12
tests/test_utils.py Normal file
View file

@ -0,0 +1,12 @@
from wiqaya.utils import remove_tashkeel
def test_no_tashkeel():
text = "مرحبا"
assert remove_tashkeel(text) == text
def test_with_tashkeel():
assert remove_tashkeel("مَرْحَباً") == "مرحبا"
def test_english_unchanged():
text = "hello world"
assert remove_tashkeel(text) == text

71
uv.lock generated
View file

@ -2,7 +2,78 @@ version = 1
revision = 3 revision = 3
requires-python = ">=3.12" requires-python = ">=3.12"
[[package]]
name = "colorama"
version = "0.4.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
[[package]]
name = "iniconfig"
version = "2.3.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
]
[[package]]
name = "packaging"
version = "26.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
]
[[package]]
name = "pluggy"
version = "1.6.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
]
[[package]]
name = "pygments"
version = "2.19.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
]
[[package]]
name = "pytest"
version = "9.0.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
]
[[package]] [[package]]
name = "wiqaya" name = "wiqaya"
version = "0.1.0" version = "0.1.0"
source = { editable = "." } source = { editable = "." }
[package.dev-dependencies]
dev = [
{ name = "pytest" },
]
[package.metadata]
[package.metadata.requires-dev]
dev = [{ name = "pytest", specifier = ">=9.0.2" }]