create filter, utils

2026-03-08 09:10:10 +02:00 · 2026-03-08 09:10:10 +02:00 · f9e47c91b4
commit f9e47c91b4
parent e25f01dcf6
4 changed files with 89 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,52 @@
+# Wiqaya
+
+A Python library for multilingual profanity detection and filtering. It identifies and censors offensive or abusive words across multiple languages.
+
+## Installation
+
+```bash
+pip install wiqaya
+```
+
+## Usage
+
+```python
+from wiqaya import Wiqaya
+
+w = Wiqaya(lang="ar")
+
+w.is_profane("هذا نص عادي")       # False
+w.is_profane("نص يحتوي شتيمة")    # True
+
+w.get_profane_words("نص فيه كلمة سيئة")  # ['كلمة سيئة']
+```
+
+## Supported Languages
+
+| Code | Language | Code | Language | Code | Language |
+|------|----------|------|----------|------|----------|
+| af | Afrikaans | am | Amharic | ar | العربية |
+| az | Azerbaijani | be | Belarusian | bg | Bulgarian |
+| ca | Catalan | ceb | Cebuano | cs | Czech |
+| cy | Welsh | da | Danish | de | German |
+| dz | Dzongkha | el | Greek | en | English |
+| eo | Esperanto | es | Spanish | et | Estonian |
+| eu | Basque | fa | Persian | fi | Finnish |
+| fil | Filipino | fr | French | gd | Scottish Gaelic |
+| gl | Galician | hi | Hindi | hr | Croatian |
+| hu | Hungarian | hy | Armenian | id | Indonesian |
+| is | Icelandic | it | Italian | ja | Japanese |
+| kab | Kabyle | kh | Khmer | ko | Korean |
+| la | Latin | lt | Lithuanian | lv | Latvian |
+| mi | Maori | mk | Macedonian | ml | Malayalam |
+| mn | Mongolian | mr | Marathi | ms | Malay |
+| mt | Maltese | my | Burmese | nl | Dutch |
+| no | Norwegian | pih | Norfuk | piy | Picard |
+| pl | Polish | pt | Portuguese | ro | Romanian |
+| rop | Kriol | ru | Russian | sk | Slovak |
+| sl | Slovenian | sm | Samoan | sq | Albanian |
+| sr | Serbian | sv | Swedish | ta | Tamil |
+| te | Telugu | tet | Tetum | th | Thai |
+| tlh | Klingon | to | Tongan | tr | Turkish |
+| uk | Ukrainian | uz | Uzbek | vi | Vietnamese |
+| yid | Yiddish | zh | Chinese | zu | Zulu |
--- a/src/wiqaya/filter.py
+++ b/src/wiqaya/filter.py
@ -0,0 +1,29 @@
+from  pathlib import Path
+
+
+
+DATA_DIR = Path(__file__).parent.parent.parent / "data"
+
+class Wiqaya:
+    def __init__(self, lang: str):
+        self.lang = lang
+
+        try:
+            with open(f"{DATA_DIR}/{self.lang}.txt", "r", encoding="utf-8") as f:
+                self.WORDS = set(line.strip() for line in f)
+
+        except FileNotFoundError:
+            raise ValueError(f"Language '{self.lang}' not supported")
+
+    def is_profane(self, text) -> bool:
+        words = text.lower().split()
+        return any(word in self.WORDS for word in words)
+
+    def get_profane_words(self, text) -> list[str]:
+        words = text.lower().split()
+        return [word for word in words if word in self.WORDS]
+
+
+
+t = Wiqaya("ar").get_profane_words("اهلا بك يا بزاز")
+print(t)
--- a/src/wiqaya/utils.py
+++ b/src/wiqaya/utils.py
--- a/uv.lock
+++ b/uv.lock
@ -0,0 +1,8 @@
+version = 1
+revision = 3
+requires-python = ">=3.12"
+
+[[package]]
+name = "wiqaya"
+version = "0.1.0"
+source = { editable = "." }