diff --git a/.gitignore b/.gitignore index 505a3b1..73285a3 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,10 @@ build/ dist/ wheels/ *.egg-info +.pytest_cache # Virtual environments .venv + +.vscode + diff --git a/pyproject.toml b/pyproject.toml index f761da2..cd24fee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,3 +13,8 @@ dependencies = [] [build-system] requires = ["uv_build>=0.10.9,<0.11.0"] build-backend = "uv_build" + +[dependency-groups] +dev = [ + "pytest>=9.0.2", +] diff --git a/src/wiqaya/__init__.py b/src/wiqaya/__init__.py index e69de29..4cbe5dd 100644 --- a/src/wiqaya/__init__.py +++ b/src/wiqaya/__init__.py @@ -0,0 +1,4 @@ +from .filter import Wiqaya +from .utils import remove_tashkeel + +__all__ = ["Wiqaya", "remove_tashkeel"] \ No newline at end of file diff --git a/src/wiqaya/filter.py b/src/wiqaya/filter.py index 1b8001a..a7cb800 100644 --- a/src/wiqaya/filter.py +++ b/src/wiqaya/filter.py @@ -1,5 +1,5 @@ from pathlib import Path - +from .utils import remove_tashkeel DATA_DIR = Path(__file__).parent.parent.parent / "data" @@ -16,14 +16,23 @@ class Wiqaya: raise ValueError(f"Language '{self.lang}' not supported") def is_profane(self, text) -> bool: - words = text.lower().split() + words = self._process(text) return any(word in self.WORDS for word in words) def get_profane_words(self, text) -> list[str]: - words = text.lower().split() + words = self._process(text) return [word for word in words if word in self.WORDS] + def censor(self, text: str, char: str = "*") -> str: + words = self._process(text) + for word in words: + if word in self.WORDS: + text = text.replace(word, char * len(word)) + return text + + def _process(self, text: str) -> list[str]: + if self.lang == "ar": + text = remove_tashkeel(text) + return text.lower().split() -t = Wiqaya("ar").get_profane_words("اهلا بك يا بزاز") -print(t) \ No newline at end of file diff --git a/src/wiqaya/utils.py b/src/wiqaya/utils.py index e69de29..708b43c 100644 --- a/src/wiqaya/utils.py +++ b/src/wiqaya/utils.py @@ -0,0 +1,10 @@ +import re + + +TASHKEEL = re.compile(r'[\u0610-\u061A\u064B-\u065F]') + +# remove tashkeel from arabic +def remove_tashkeel(text: str) -> str: + if not TASHKEEL.search(text): + return text + return TASHKEEL.sub('', text) \ No newline at end of file diff --git a/tests/test_filter.py b/tests/test_filter.py new file mode 100644 index 0000000..0e3a82c --- /dev/null +++ b/tests/test_filter.py @@ -0,0 +1,11 @@ +from wiqaya import Wiqaya + +def test_is_profane(): + w = Wiqaya(lang="ar") + assert w.is_profane("نص عادي") == False + +def test_censor(): + w = Wiqaya(lang="ar") + text = "نص سيء حرامي" + print(w.censor(text, char="*") ) + assert w.censor(text, char="*") == "نص سيء *****" \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..c7e0bae --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,12 @@ +from wiqaya.utils import remove_tashkeel + +def test_no_tashkeel(): + text = "مرحبا" + assert remove_tashkeel(text) == text + +def test_with_tashkeel(): + assert remove_tashkeel("مَرْحَباً") == "مرحبا" + +def test_english_unchanged(): + text = "hello world" + assert remove_tashkeel(text) == text \ No newline at end of file diff --git a/uv.lock b/uv.lock index 5beab82..2618a19 100644 --- a/uv.lock +++ b/uv.lock @@ -2,7 +2,78 @@ version = 1 revision = 3 requires-python = ">=3.12" +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + [[package]] name = "wiqaya" version = "0.1.0" source = { editable = "." } + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, +] + +[package.metadata] + +[package.metadata.requires-dev] +dev = [{ name = "pytest", specifier = ">=9.0.2" }]