83 changed files with 47 additions and 171 deletions
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@ -0,0 +1,23 @@
 name: Publish to PyPI
 on:
  release:
    types: [published]
 jobs:
  publish:
    runs-on: ubuntu-latest
    environment: pypi
    permissions:
      id-token: write
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v5
      - name: Build
        run: uv build
      - name: Publish
        uses: pypa/gh-action-pypi-publish@release/v1
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -1,34 +0,0 @@
 name: Release
 on:
  workflow_dispatch:
 jobs:
  release:
    runs-on: ubuntu-latest
    environment: pypi
    permissions:
      id-token: write
      contents: write
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v5
      - name: Get version from pyproject.toml
        id: get_version
        run: |
          VERSION=$(python -c "import tomllib; data=tomllib.load(open('pyproject.toml','rb')); print(data['project']['version'])")
          echo "version=$VERSION" >> $GITHUB_OUTPUT
      - name: Build
        run: uv build
      - name: Create GitHub Release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: v${{ steps.get_version.outputs.version }}
          name: v${{ steps.get_version.outputs.version }}
          generate_release_notes: true
      - name: Publish to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
--- a/README-ar.md
+++ b/README-ar.md
@ -20,10 +20,8 @@ w.get_profane_words("نص فيه حرامي و أطرش")   # ['حرامي', 'أ
 ```
 > [!NOTE]
-> تدعم المكتبة إزالة التشكيل تلقائياً عند استخدام اللغة العربية
+> المكتبة تدعم إزالة التشكيل من الكلمات تلقائياً
 > [!TIP]
 > يدعم المشروع النمط البديل (Wildcard) في قوائم الكلمات — استخدم `*` للتطابق مع أي تسلسل من الأحرف (مثال: `bad*` تطابق `badly`، و`*word*` تطابق أي كلمة تحتوي على `word`)
 ## اللغات المدعومة 
--- a/README.md
+++ b/README.md
@ -22,11 +22,6 @@ w.is_profane("Hello World") # False
 w.get_profane_words("this is damn annoying") # ['damn']
 ```
 > [!NOTE]
 > The library automatically removes Arabic diacritics (Tashkeel) when using Arabic language mode
 > [!TIP]
 > Wildcard patterns are supported in word lists — use `*` to match any sequence of characters (e.g., `bad*` matches `badly`, `*word*` matches anything containing `word`)
 ## Supported Languages
--- a/src/wiqaya/data/af.txt
+++ b/src/wiqaya/data/af.txt
--- a/src/wiqaya/data/am.txt
+++ b/src/wiqaya/data/am.txt
--- a/src/wiqaya/data/ar.txt
+++ b/src/wiqaya/data/ar.txt
@ -966,7 +966,7 @@ zwimel
 ابو فص
 ابو قرعة
 اتن
-*احا*
+احا
 احترم نفسك
 احتلام
 احلي كث
@ -1275,11 +1275,10 @@ zwimel
 نكت امه
 نياكة
 نياكه
-*نيك*
+نيك
 واطي
 وسخ
 ولد القحبة
 ولد القحبه
 يا هبيلة
 يلعن
 *كس*
--- a/src/wiqaya/data/az.txt
+++ b/src/wiqaya/data/az.txt
--- a/src/wiqaya/data/be.txt
+++ b/src/wiqaya/data/be.txt
--- a/src/wiqaya/data/bg.txt
+++ b/src/wiqaya/data/bg.txt
--- a/src/wiqaya/data/ca.txt
+++ b/src/wiqaya/data/ca.txt
--- a/src/wiqaya/data/ceb.txt
+++ b/src/wiqaya/data/ceb.txt
--- a/src/wiqaya/data/cs.txt
+++ b/src/wiqaya/data/cs.txt
--- a/src/wiqaya/data/cy.txt
+++ b/src/wiqaya/data/cy.txt
--- a/src/wiqaya/data/da.txt
+++ b/src/wiqaya/data/da.txt
--- a/src/wiqaya/data/de.txt
+++ b/src/wiqaya/data/de.txt
--- a/src/wiqaya/data/dz.txt
+++ b/src/wiqaya/data/dz.txt
--- a/src/wiqaya/data/el.txt
+++ b/src/wiqaya/data/el.txt
--- a/src/wiqaya/data/en.txt
+++ b/src/wiqaya/data/en.txt
--- a/src/wiqaya/data/eo.txt
+++ b/src/wiqaya/data/eo.txt
--- a/src/wiqaya/data/es.txt
+++ b/src/wiqaya/data/es.txt
--- a/src/wiqaya/data/et.txt
+++ b/src/wiqaya/data/et.txt
--- a/src/wiqaya/data/eu.txt
+++ b/src/wiqaya/data/eu.txt
--- a/src/wiqaya/data/fa.txt
+++ b/src/wiqaya/data/fa.txt
--- a/src/wiqaya/data/fi.txt
+++ b/src/wiqaya/data/fi.txt
--- a/src/wiqaya/data/fil.txt
+++ b/src/wiqaya/data/fil.txt
--- a/src/wiqaya/data/fr.txt
+++ b/src/wiqaya/data/fr.txt
--- a/src/wiqaya/data/gd.txt
+++ b/src/wiqaya/data/gd.txt
--- a/src/wiqaya/data/gl.txt
+++ b/src/wiqaya/data/gl.txt
--- a/src/wiqaya/data/hi.txt
+++ b/src/wiqaya/data/hi.txt
--- a/src/wiqaya/data/hr.txt
+++ b/src/wiqaya/data/hr.txt
--- a/src/wiqaya/data/hu.txt
+++ b/src/wiqaya/data/hu.txt
--- a/src/wiqaya/data/hy.txt
+++ b/src/wiqaya/data/hy.txt
--- a/src/wiqaya/data/id.txt
+++ b/src/wiqaya/data/id.txt
--- a/src/wiqaya/data/is.txt
+++ b/src/wiqaya/data/is.txt
--- a/src/wiqaya/data/it.txt
+++ b/src/wiqaya/data/it.txt
--- a/src/wiqaya/data/ja.txt
+++ b/src/wiqaya/data/ja.txt
--- a/src/wiqaya/data/kab.txt
+++ b/src/wiqaya/data/kab.txt
--- a/src/wiqaya/data/kh.txt
+++ b/src/wiqaya/data/kh.txt
--- a/src/wiqaya/data/ko.txt
+++ b/src/wiqaya/data/ko.txt
--- a/src/wiqaya/data/la.txt
+++ b/src/wiqaya/data/la.txt
--- a/src/wiqaya/data/lt.txt
+++ b/src/wiqaya/data/lt.txt
--- a/src/wiqaya/data/lv.txt
+++ b/src/wiqaya/data/lv.txt
--- a/src/wiqaya/data/mi.txt
+++ b/src/wiqaya/data/mi.txt
--- a/src/wiqaya/data/mk.txt
+++ b/src/wiqaya/data/mk.txt
--- a/src/wiqaya/data/ml.txt
+++ b/src/wiqaya/data/ml.txt
--- a/src/wiqaya/data/mn.txt
+++ b/src/wiqaya/data/mn.txt
--- a/src/wiqaya/data/mr.txt
+++ b/src/wiqaya/data/mr.txt
--- a/src/wiqaya/data/ms.txt
+++ b/src/wiqaya/data/ms.txt
--- a/src/wiqaya/data/mt.txt
+++ b/src/wiqaya/data/mt.txt
--- a/src/wiqaya/data/my.txt
+++ b/src/wiqaya/data/my.txt
--- a/src/wiqaya/data/nl.txt
+++ b/src/wiqaya/data/nl.txt
--- a/src/wiqaya/data/no.txt
+++ b/src/wiqaya/data/no.txt
--- a/src/wiqaya/data/pih.txt
+++ b/src/wiqaya/data/pih.txt
--- a/src/wiqaya/data/piy.txt
+++ b/src/wiqaya/data/piy.txt
--- a/src/wiqaya/data/pl.txt
+++ b/src/wiqaya/data/pl.txt
--- a/src/wiqaya/data/pt.txt
+++ b/src/wiqaya/data/pt.txt
--- a/src/wiqaya/data/ro.txt
+++ b/src/wiqaya/data/ro.txt
--- a/src/wiqaya/data/rop.txt
+++ b/src/wiqaya/data/rop.txt
--- a/src/wiqaya/data/ru.txt
+++ b/src/wiqaya/data/ru.txt
--- a/src/wiqaya/data/sk.txt
+++ b/src/wiqaya/data/sk.txt
--- a/src/wiqaya/data/sl.txt
+++ b/src/wiqaya/data/sl.txt
--- a/src/wiqaya/data/sm.txt
+++ b/src/wiqaya/data/sm.txt
--- a/src/wiqaya/data/sq.txt
+++ b/src/wiqaya/data/sq.txt
--- a/src/wiqaya/data/sr.txt
+++ b/src/wiqaya/data/sr.txt
--- a/src/wiqaya/data/sv.txt
+++ b/src/wiqaya/data/sv.txt
--- a/src/wiqaya/data/ta.txt
+++ b/src/wiqaya/data/ta.txt
--- a/src/wiqaya/data/te.txt
+++ b/src/wiqaya/data/te.txt
--- a/src/wiqaya/data/tet.txt
+++ b/src/wiqaya/data/tet.txt
--- a/src/wiqaya/data/th.txt
+++ b/src/wiqaya/data/th.txt
--- a/src/wiqaya/data/tlh.txt
+++ b/src/wiqaya/data/tlh.txt
--- a/src/wiqaya/data/to.txt
+++ b/src/wiqaya/data/to.txt
--- a/src/wiqaya/data/tr.txt
+++ b/src/wiqaya/data/tr.txt
--- a/src/wiqaya/data/uk.txt
+++ b/src/wiqaya/data/uk.txt
--- a/src/wiqaya/data/uz.txt
+++ b/src/wiqaya/data/uz.txt
--- a/src/wiqaya/data/vi.txt
+++ b/src/wiqaya/data/vi.txt
--- a/src/wiqaya/data/yid.txt
+++ b/src/wiqaya/data/yid.txt
--- a/src/wiqaya/data/zh.txt
+++ b/src/wiqaya/data/zh.txt
--- a/src/wiqaya/data/zu.txt
+++ b/src/wiqaya/data/zu.txt
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "wiqaya"
-version = "0.2.5"
+version = "0.2.0"
 description = "A Python library for multilingual profanity detection and filtering. It identifies and censors offensive or abusive words across multiple languages."
 readme = "README.md"
 license = {text = "MIT"}
@ -18,6 +18,3 @@ build-backend = "uv_build"
 dev = [
    "pytest>=9.0.2",
 ]
 [tool.setuptools.package-data]
 wiqaya = ["data/*.txt"]
--- a/src/wiqaya/filter.py
+++ b/src/wiqaya/filter.py
@ -1,123 +1,38 @@
 from  pathlib import Path
 from .utils import remove_tashkeel
 import re
-DATA_DIR = Path(__file__).parent / "data"
+
 DATA_DIR = Path(__file__).parent.parent.parent / "data"
 class Wiqaya:
    def __init__(self, lang: str):
        """
        Initialize the Wiqaya profanity filter for a given language.
        Loads the word list from a language-specific .txt file in the data directory.
        Entries containing '*' are treated as wildcard patterns and compiled into
        regex objects. Plain entries are stored in a set for O(1) lookup.
        Args:
            lang (str): Language code (e.g., 'ar', 'en'). Must match a filename in data/.
        Raises:
            ValueError: If no word list file exists for the given language.
        """
        self.lang = lang
        try:
            with open(f"{DATA_DIR}/{self.lang}.txt", "r", encoding="utf-8") as f:
-                lines = [line.strip() for line in f if line.strip()]
+                self.WORDS = set(line.strip() for line in f)
        except FileNotFoundError:
            raise ValueError(f"Language '{self.lang}' not supported")
-        self.WORDS = set()
+    def is_profane(self, text) -> bool:
-        self._patterns = []
+        words = self._process(text)
-        for entry in lines:
+        return any(word in self.WORDS for word in words)
            if "*" in entry:
                # Convert wildcard to regex: *word* → .*word.*, word* → word.*
                regex = re.escape(entry).replace(r"\*", ".*")
                self._patterns.append(re.compile(f"^{regex}$"))
            else:
                self.WORDS.add(entry)
-    def _matches_any_pattern(self, word: str) -> bool:
+    def get_profane_words(self, text) -> list[str]:
-        """
+        words = self._process(text)
-        Check whether a word matches any of the compiled wildcard regex patterns.
+        return [word for word in words if word in self.WORDS]
        Args:
            word (str): The word to test.
        Returns:
            bool: True if the word matches at least one pattern, False otherwise.
        """
        return any(p.match(word) for p in self._patterns)
    def _is_bad(self, word: str) -> bool:
        """
        Determine if a single word is considered profane.
        Checks both the exact-match word set and the wildcard pattern list.
        Args:
            word (str): The word to check.
        Returns:
            bool: True if the word is profane, False otherwise.
        """
        return word in self.WORDS or self._matches_any_pattern(word)
    def is_profane(self, text: str) -> bool:
        """
        Return True if the text contains at least one profane word.
        Args:
            text (str): The input text to scan.
        Returns:
            bool: True if any profane word is found, False otherwise.
        """
        return any(self._is_bad(w) for w in self._process(text))
    def get_profane_words(self, text: str) -> list[str]:
        """
        Extract and return all profane words found in the text.
        Args:
            text (str): The input text to scan.
        Returns:
            list[str]: A list of every word in the text that is considered profane.
        """
        return [w for w in self._process(text) if self._is_bad(w)]
    def censor(self, text: str, char: str = "*") -> str:
-        """
+        words = self._process(text)
-        Replace each profane word in the text with a repeated censor character.
+        for word in words:
-
+            if word in self.WORDS:
        The replacement preserves the original word's length (e.g., 'hell' → '****').
        Args:
            text (str): The input text to censor.
            char (str): The character used for censoring. Defaults to '*'.
        Returns:
            str: The censored version of the input text.
        """
        for word in self._process(text):
            if self._is_bad(word):
                text = text.replace(word, char * len(word))
        return text
    def _process(self, text: str) -> list[str]:
        """
        Normalize and tokenize the input text into a list of words.
        For Arabic text, diacritics (tashkeel) are stripped first to prevent
        users from bypassing the filter by adding vowel marks to profane words.
        The text is then lowercased and split on whitespace.
        Args:
            text (str): The raw input text.
        Returns:
            list[str]: A list of normalized, lowercase tokens.
        """
        if self.lang == "ar":
            text = remove_tashkeel(text)
        return text.lower().split()
--- a/tests/test_filter.py
+++ b/tests/test_filter.py
@ -56,20 +56,3 @@ def test_invalid_lang():
    import pytest
    with pytest.raises(ValueError):
        Wiqaya(lang="xx")
 def test_wildcard_support():
    w = Wiqaya(lang="en")
    # is_profane
    assert w.is_profane("wwsfuck")    == True 
    assert w.is_profane("fuckwedf")   == True 
    assert w.is_profane("wd+wfucked+") == True
    # get_profane_words
    assert w.get_profane_words("hello fsdfuckwwq clean") == ["fsdfuckwwq"]
    # censor
    assert w.censor("hello dsfuckw there")    == "hello ******* there"
    assert w.censor("dsfuckw ffdamn", char="#") == "####### ######"
--- a/uv.lock
+++ b/uv.lock
@ -65,7 +65,7 @@ wheels = [
 [[package]]
 name = "wiqaya"
-version = "0.2.5"
+version = "0.2.0"
 source = { editable = "." }
 [package.dev-dependencies]