create filter, utils

2026-03-08 09:10:10 +02:00 · 2026-03-08 09:10:10 +02:00 · f9e47c91b4
commit f9e47c91b4
parent e25f01dcf6
4 changed files with 89 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,52 @@
 # Wiqaya
 A Python library for multilingual profanity detection and filtering. It identifies and censors offensive or abusive words across multiple languages.
 ## Installation
 ```bash
 pip install wiqaya
 ```
 ## Usage
 ```python
 from wiqaya import Wiqaya
 w = Wiqaya(lang="ar")
 w.is_profane("هذا نص عادي")       # False
 w.is_profane("نص يحتوي شتيمة")    # True
 w.get_profane_words("نص فيه كلمة سيئة")  # ['كلمة سيئة']
 ```
 ## Supported Languages
 | Code | Language | Code | Language | Code | Language |
 |------|----------|------|----------|------|----------|
 | af | Afrikaans | am | Amharic | ar | العربية |
 | az | Azerbaijani | be | Belarusian | bg | Bulgarian |
 | ca | Catalan | ceb | Cebuano | cs | Czech |
 | cy | Welsh | da | Danish | de | German |
 | dz | Dzongkha | el | Greek | en | English |
 | eo | Esperanto | es | Spanish | et | Estonian |
 | eu | Basque | fa | Persian | fi | Finnish |
 | fil | Filipino | fr | French | gd | Scottish Gaelic |
 | gl | Galician | hi | Hindi | hr | Croatian |
 | hu | Hungarian | hy | Armenian | id | Indonesian |
 | is | Icelandic | it | Italian | ja | Japanese |
 | kab | Kabyle | kh | Khmer | ko | Korean |
 | la | Latin | lt | Lithuanian | lv | Latvian |
 | mi | Maori | mk | Macedonian | ml | Malayalam |
 | mn | Mongolian | mr | Marathi | ms | Malay |
 | mt | Maltese | my | Burmese | nl | Dutch |
 | no | Norwegian | pih | Norfuk | piy | Picard |
 | pl | Polish | pt | Portuguese | ro | Romanian |
 | rop | Kriol | ru | Russian | sk | Slovak |
 | sl | Slovenian | sm | Samoan | sq | Albanian |
 | sr | Serbian | sv | Swedish | ta | Tamil |
 | te | Telugu | tet | Tetum | th | Thai |
 | tlh | Klingon | to | Tongan | tr | Turkish |
 | uk | Ukrainian | uz | Uzbek | vi | Vietnamese |
 | yid | Yiddish | zh | Chinese | zu | Zulu |
--- a/src/wiqaya/filter.py
+++ b/src/wiqaya/filter.py
@ -0,0 +1,29 @@
 from  pathlib import Path
 DATA_DIR = Path(__file__).parent.parent.parent / "data"
 class Wiqaya:
    def __init__(self, lang: str):
        self.lang = lang
        try:
            with open(f"{DATA_DIR}/{self.lang}.txt", "r", encoding="utf-8") as f:
                self.WORDS = set(line.strip() for line in f)
        except FileNotFoundError:
            raise ValueError(f"Language '{self.lang}' not supported")
    def is_profane(self, text) -> bool:
        words = text.lower().split()
        return any(word in self.WORDS for word in words)
    def get_profane_words(self, text) -> list[str]:
        words = text.lower().split()
        return [word for word in words if word in self.WORDS]
 t = Wiqaya("ar").get_profane_words("اهلا بك يا بزاز")
 print(t)
--- a/src/wiqaya/utils.py
+++ b/src/wiqaya/utils.py
--- a/uv.lock
+++ b/uv.lock
@ -0,0 +1,8 @@
 version = 1
 revision = 3
 requires-python = ">=3.12"
 [[package]]
 name = "wiqaya"
 version = "0.1.0"
 source = { editable = "." }