83 changed files with 47 additions and 171 deletions
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@ -0,0 +1,23 @@
+name: Publish to PyPI
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: astral-sh/setup-uv@v5
+
+      - name: Build
+        run: uv build
+
+      - name: Publish
+        uses: pypa/gh-action-pypi-publish@release/v1
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -1,34 +0,0 @@
-name: Release
-on:
-  workflow_dispatch:
-
-jobs:
-  release:
-    runs-on: ubuntu-latest
-    environment: pypi
-    permissions:
-      id-token: write
-      contents: write
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: astral-sh/setup-uv@v5
-
-      - name: Get version from pyproject.toml
-        id: get_version
-        run: |
-          VERSION=$(python -c "import tomllib; data=tomllib.load(open('pyproject.toml','rb')); print(data['project']['version'])")
-          echo "version=$VERSION" >> $GITHUB_OUTPUT
-
-      - name: Build
-        run: uv build
-
-      - name: Create GitHub Release
-        uses: softprops/action-gh-release@v2
-        with:
-          tag_name: v${{ steps.get_version.outputs.version }}
-          name: v${{ steps.get_version.outputs.version }}
-          generate_release_notes: true
-
-      - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
--- a/README-ar.md
+++ b/README-ar.md
@ -20,10 +20,8 @@ w.get_profane_words("نص فيه حرامي و أطرش")   # ['حرامي', 'أ
 ```

 > [!NOTE]
-> تدعم المكتبة إزالة التشكيل تلقائياً عند استخدام اللغة العربية
+> المكتبة تدعم إزالة التشكيل من الكلمات تلقائياً

-> [!TIP]
-> يدعم المشروع النمط البديل (Wildcard) في قوائم الكلمات — استخدم `*` للتطابق مع أي تسلسل من الأحرف (مثال: `bad*` تطابق `badly`، و`*word*` تطابق أي كلمة تحتوي على `word`)

 ## اللغات المدعومة 

--- a/README.md
+++ b/README.md
@ -22,11 +22,6 @@ w.is_profane("Hello World") # False
 w.get_profane_words("this is damn annoying") # ['damn']

 ```
-> [!NOTE]
-> The library automatically removes Arabic diacritics (Tashkeel) when using Arabic language mode
-
-> [!TIP]
-> Wildcard patterns are supported in word lists — use `*` to match any sequence of characters (e.g., `bad*` matches `badly`, `*word*` matches anything containing `word`)

 ## Supported Languages

--- a/src/wiqaya/data/af.txt
+++ b/src/wiqaya/data/af.txt
--- a/src/wiqaya/data/am.txt
+++ b/src/wiqaya/data/am.txt
--- a/src/wiqaya/data/ar.txt
+++ b/src/wiqaya/data/ar.txt
@ -966,7 +966,7 @@ zwimel
 ابو فص
 ابو قرعة
 اتن
-*احا*
+احا
 احترم نفسك
 احتلام
 احلي كث
@ -1275,11 +1275,10 @@ zwimel
 نكت امه
 نياكة
 نياكه
-*نيك*
+نيك
 واطي
 وسخ
 ولد القحبة
 ولد القحبه
 يا هبيلة
 يلعن
-*كس*
--- a/src/wiqaya/data/az.txt
+++ b/src/wiqaya/data/az.txt
--- a/src/wiqaya/data/be.txt
+++ b/src/wiqaya/data/be.txt
--- a/src/wiqaya/data/bg.txt
+++ b/src/wiqaya/data/bg.txt
--- a/src/wiqaya/data/ca.txt
+++ b/src/wiqaya/data/ca.txt
--- a/src/wiqaya/data/ceb.txt
+++ b/src/wiqaya/data/ceb.txt
--- a/src/wiqaya/data/cs.txt
+++ b/src/wiqaya/data/cs.txt
--- a/src/wiqaya/data/cy.txt
+++ b/src/wiqaya/data/cy.txt
--- a/src/wiqaya/data/da.txt
+++ b/src/wiqaya/data/da.txt
--- a/src/wiqaya/data/de.txt
+++ b/src/wiqaya/data/de.txt
--- a/src/wiqaya/data/dz.txt
+++ b/src/wiqaya/data/dz.txt
--- a/src/wiqaya/data/el.txt
+++ b/src/wiqaya/data/el.txt
--- a/src/wiqaya/data/en.txt
+++ b/src/wiqaya/data/en.txt
--- a/src/wiqaya/data/eo.txt
+++ b/src/wiqaya/data/eo.txt
--- a/src/wiqaya/data/es.txt
+++ b/src/wiqaya/data/es.txt
--- a/src/wiqaya/data/et.txt
+++ b/src/wiqaya/data/et.txt
--- a/src/wiqaya/data/eu.txt
+++ b/src/wiqaya/data/eu.txt
--- a/src/wiqaya/data/fa.txt
+++ b/src/wiqaya/data/fa.txt
--- a/src/wiqaya/data/fi.txt
+++ b/src/wiqaya/data/fi.txt
--- a/src/wiqaya/data/fil.txt
+++ b/src/wiqaya/data/fil.txt
--- a/src/wiqaya/data/fr.txt
+++ b/src/wiqaya/data/fr.txt
--- a/src/wiqaya/data/gd.txt
+++ b/src/wiqaya/data/gd.txt
--- a/src/wiqaya/data/gl.txt
+++ b/src/wiqaya/data/gl.txt
--- a/src/wiqaya/data/hi.txt
+++ b/src/wiqaya/data/hi.txt
--- a/src/wiqaya/data/hr.txt
+++ b/src/wiqaya/data/hr.txt
--- a/src/wiqaya/data/hu.txt
+++ b/src/wiqaya/data/hu.txt
--- a/src/wiqaya/data/hy.txt
+++ b/src/wiqaya/data/hy.txt
--- a/src/wiqaya/data/id.txt
+++ b/src/wiqaya/data/id.txt
--- a/src/wiqaya/data/is.txt
+++ b/src/wiqaya/data/is.txt
--- a/src/wiqaya/data/it.txt
+++ b/src/wiqaya/data/it.txt
--- a/src/wiqaya/data/ja.txt
+++ b/src/wiqaya/data/ja.txt
--- a/src/wiqaya/data/kab.txt
+++ b/src/wiqaya/data/kab.txt
--- a/src/wiqaya/data/kh.txt
+++ b/src/wiqaya/data/kh.txt
--- a/src/wiqaya/data/ko.txt
+++ b/src/wiqaya/data/ko.txt
--- a/src/wiqaya/data/la.txt
+++ b/src/wiqaya/data/la.txt
--- a/src/wiqaya/data/lt.txt
+++ b/src/wiqaya/data/lt.txt
--- a/src/wiqaya/data/lv.txt
+++ b/src/wiqaya/data/lv.txt
--- a/src/wiqaya/data/mi.txt
+++ b/src/wiqaya/data/mi.txt
--- a/src/wiqaya/data/mk.txt
+++ b/src/wiqaya/data/mk.txt
--- a/src/wiqaya/data/ml.txt
+++ b/src/wiqaya/data/ml.txt
--- a/src/wiqaya/data/mn.txt
+++ b/src/wiqaya/data/mn.txt
--- a/src/wiqaya/data/mr.txt
+++ b/src/wiqaya/data/mr.txt
--- a/src/wiqaya/data/ms.txt
+++ b/src/wiqaya/data/ms.txt
--- a/src/wiqaya/data/mt.txt
+++ b/src/wiqaya/data/mt.txt
--- a/src/wiqaya/data/my.txt
+++ b/src/wiqaya/data/my.txt
--- a/src/wiqaya/data/nl.txt
+++ b/src/wiqaya/data/nl.txt
--- a/src/wiqaya/data/no.txt
+++ b/src/wiqaya/data/no.txt
--- a/src/wiqaya/data/pih.txt
+++ b/src/wiqaya/data/pih.txt
--- a/src/wiqaya/data/piy.txt
+++ b/src/wiqaya/data/piy.txt
--- a/src/wiqaya/data/pl.txt
+++ b/src/wiqaya/data/pl.txt
--- a/src/wiqaya/data/pt.txt
+++ b/src/wiqaya/data/pt.txt
--- a/src/wiqaya/data/ro.txt
+++ b/src/wiqaya/data/ro.txt
--- a/src/wiqaya/data/rop.txt
+++ b/src/wiqaya/data/rop.txt
--- a/src/wiqaya/data/ru.txt
+++ b/src/wiqaya/data/ru.txt
--- a/src/wiqaya/data/sk.txt
+++ b/src/wiqaya/data/sk.txt
--- a/src/wiqaya/data/sl.txt
+++ b/src/wiqaya/data/sl.txt
--- a/src/wiqaya/data/sm.txt
+++ b/src/wiqaya/data/sm.txt
--- a/src/wiqaya/data/sq.txt
+++ b/src/wiqaya/data/sq.txt
--- a/src/wiqaya/data/sr.txt
+++ b/src/wiqaya/data/sr.txt
--- a/src/wiqaya/data/sv.txt
+++ b/src/wiqaya/data/sv.txt
--- a/src/wiqaya/data/ta.txt
+++ b/src/wiqaya/data/ta.txt
--- a/src/wiqaya/data/te.txt
+++ b/src/wiqaya/data/te.txt
--- a/src/wiqaya/data/tet.txt
+++ b/src/wiqaya/data/tet.txt
--- a/src/wiqaya/data/th.txt
+++ b/src/wiqaya/data/th.txt
--- a/src/wiqaya/data/tlh.txt
+++ b/src/wiqaya/data/tlh.txt
--- a/src/wiqaya/data/to.txt
+++ b/src/wiqaya/data/to.txt
--- a/src/wiqaya/data/tr.txt
+++ b/src/wiqaya/data/tr.txt
--- a/src/wiqaya/data/uk.txt
+++ b/src/wiqaya/data/uk.txt
--- a/src/wiqaya/data/uz.txt
+++ b/src/wiqaya/data/uz.txt
--- a/src/wiqaya/data/vi.txt
+++ b/src/wiqaya/data/vi.txt
--- a/src/wiqaya/data/yid.txt
+++ b/src/wiqaya/data/yid.txt
--- a/src/wiqaya/data/zh.txt
+++ b/src/wiqaya/data/zh.txt
--- a/src/wiqaya/data/zu.txt
+++ b/src/wiqaya/data/zu.txt
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "wiqaya"
-version = "0.2.5"
+version = "0.2.0"
 description = "A Python library for multilingual profanity detection and filtering. It identifies and censors offensive or abusive words across multiple languages."
 readme = "README.md"
 license = {text = "MIT"}
@ -18,6 +18,3 @@ build-backend = "uv_build"
 dev = [
    "pytest>=9.0.2",
 ]
-
-[tool.setuptools.package-data]
-wiqaya = ["data/*.txt"]
--- a/src/wiqaya/filter.py
+++ b/src/wiqaya/filter.py
@ -1,123 +1,38 @@
-from pathlib import Path
+from  pathlib import Path
 from .utils import remove_tashkeel
-import re

-DATA_DIR = Path(__file__).parent / "data"
+
+DATA_DIR = Path(__file__).parent.parent.parent / "data"

 class Wiqaya:
    def __init__(self, lang: str):
-        """
-        Initialize the Wiqaya profanity filter for a given language.
-
-        Loads the word list from a language-specific .txt file in the data directory.
-        Entries containing '*' are treated as wildcard patterns and compiled into
-        regex objects. Plain entries are stored in a set for O(1) lookup.
-
-        Args:
-            lang (str): Language code (e.g., 'ar', 'en'). Must match a filename in data/.
-        
-        Raises:
-            ValueError: If no word list file exists for the given language.
-        """
        self.lang = lang
+
        try:
            with open(f"{DATA_DIR}/{self.lang}.txt", "r", encoding="utf-8") as f:
-                lines = [line.strip() for line in f if line.strip()]
+                self.WORDS = set(line.strip() for line in f)
+
        except FileNotFoundError:
            raise ValueError(f"Language '{self.lang}' not supported")

-        self.WORDS = set()
-        self._patterns = []
-        for entry in lines:
-            if "*" in entry:
-                # Convert wildcard to regex: *word* → .*word.*, word* → word.*
-                regex = re.escape(entry).replace(r"\*", ".*")
-                self._patterns.append(re.compile(f"^{regex}$"))
-            else:
-                self.WORDS.add(entry)
+    def is_profane(self, text) -> bool:
+        words = self._process(text)
+        return any(word in self.WORDS for word in words)

-    def _matches_any_pattern(self, word: str) -> bool:
-        """
-        Check whether a word matches any of the compiled wildcard regex patterns.
-
-        Args:
-            word (str): The word to test.
-
-        Returns:
-            bool: True if the word matches at least one pattern, False otherwise.
-        """
-        return any(p.match(word) for p in self._patterns)
-
-    def _is_bad(self, word: str) -> bool:
-        """
-        Determine if a single word is considered profane.
-
-        Checks both the exact-match word set and the wildcard pattern list.
-
-        Args:
-            word (str): The word to check.
-
-        Returns:
-            bool: True if the word is profane, False otherwise.
-        """
-        return word in self.WORDS or self._matches_any_pattern(word)
-
-    def is_profane(self, text: str) -> bool:
-        """
-        Return True if the text contains at least one profane word.
-
-        Args:
-            text (str): The input text to scan.
-
-        Returns:
-            bool: True if any profane word is found, False otherwise.
-        """
-        return any(self._is_bad(w) for w in self._process(text))
-
-    def get_profane_words(self, text: str) -> list[str]:
-        """
-        Extract and return all profane words found in the text.
-
-        Args:
-            text (str): The input text to scan.
-
-        Returns:
-            list[str]: A list of every word in the text that is considered profane.
-        """
-        return [w for w in self._process(text) if self._is_bad(w)]
+    def get_profane_words(self, text) -> list[str]:
+        words = self._process(text)
+        return [word for word in words if word in self.WORDS]

    def censor(self, text: str, char: str = "*") -> str:
-        """
-        Replace each profane word in the text with a repeated censor character.
-
-        The replacement preserves the original word's length (e.g., 'hell' → '****').
-
-        Args:
-            text (str): The input text to censor.
-            char (str): The character used for censoring. Defaults to '*'.
-
-        Returns:
-            str: The censored version of the input text.
-        """
-        for word in self._process(text):
-            if self._is_bad(word):
+        words = self._process(text)
+        for word in words:
+            if word in self.WORDS:
                text = text.replace(word, char * len(word))
        return text

    def _process(self, text: str) -> list[str]:
-        """
-        Normalize and tokenize the input text into a list of words.
-
-        For Arabic text, diacritics (tashkeel) are stripped first to prevent
-        users from bypassing the filter by adding vowel marks to profane words.
-        The text is then lowercased and split on whitespace.
-
-        Args:
-            text (str): The raw input text.
-
-        Returns:
-            list[str]: A list of normalized, lowercase tokens.
-        """
        if self.lang == "ar":
            text = remove_tashkeel(text)
-        return text.lower().split()
+        return text.lower().split()
+
+
--- a/tests/test_filter.py
+++ b/tests/test_filter.py
@ -55,21 +55,4 @@ def test_get_profane_words_en():
 def test_invalid_lang():
    import pytest
    with pytest.raises(ValueError):
-        Wiqaya(lang="xx")
-
-
-def test_wildcard_support():
-    w = Wiqaya(lang="en")
-
-    # is_profane
-    assert w.is_profane("wwsfuck")    == True 
-    assert w.is_profane("fuckwedf")   == True 
-    assert w.is_profane("wd+wfucked+") == True
-
-
-    # get_profane_words
-    assert w.get_profane_words("hello fsdfuckwwq clean") == ["fsdfuckwwq"]
-
-    # censor
-    assert w.censor("hello dsfuckw there")    == "hello ******* there"
-    assert w.censor("dsfuckw ffdamn", char="#") == "####### ######"
+        Wiqaya(lang="xx")
--- a/uv.lock
+++ b/uv.lock
@ -65,7 +65,7 @@ wheels = [

 [[package]]
 name = "wiqaya"
-version = "0.2.5"
+version = "0.2.0"
 source = { editable = "." }

 [package.dev-dependencies]