ci(workflow): remove obsolete publish workflow

ci(workflow): add PyPI publish step to release workflow and remove separate publish workflow
ci(workflow): set release draft to false
2026-03-19 17:09:12 +02:00 · 2026-03-19 17:08:41 +02:00 · 2026-03-19 16:48:17 +02:00 · 2026-03-19 16:42:05 +02:00 · 2026-03-19 16:37:46 +02:00 · 2026-03-19 16:24:08 +02:00
8 changed files with 123 additions and 30 deletions
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@ -1,23 +0,0 @@
-name: Publish to PyPI
-
-on:
-  release:
-    types: [published]
-
-jobs:
-  publish:
-    runs-on: ubuntu-latest
-    environment: pypi
-    permissions:
-      id-token: write
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: astral-sh/setup-uv@v5
-
-      - name: Build
-        run: uv build
-
-      - name: Publish
-        uses: pypa/gh-action-pypi-publish@release/v1
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -0,0 +1,34 @@
+name: Release
+on:
+  workflow_dispatch:
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+      contents: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: astral-sh/setup-uv@v5
+
+      - name: Get version from pyproject.toml
+        id: get_version
+        run: |
+          VERSION=$(python -c "import tomllib; data=tomllib.load(open('pyproject.toml','rb')); print(data['project']['version'])")
+          echo "version=$VERSION" >> $GITHUB_OUTPUT
+
+      - name: Build
+        run: uv build
+
+      - name: Create GitHub Release
+        uses: softprops/action-gh-release@v2
+        with:
+          tag_name: v${{ steps.get_version.outputs.version }}
+          name: v${{ steps.get_version.outputs.version }}
+          generate_release_notes: true
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
--- a/README-ar.md
+++ b/README-ar.md
@ -22,6 +22,9 @@ w.get_profane_words("نص فيه حرامي و أطرش")   # ['حرامي', 'أ
 > [!NOTE]
 > تدعم المكتبة إزالة التشكيل تلقائياً عند استخدام اللغة العربية

+> [!TIP]
+> يدعم المشروع النمط البديل (Wildcard) في قوائم الكلمات — استخدم `*` للتطابق مع أي تسلسل من الأحرف (مثال: `bad*` تطابق `badly`، و`*word*` تطابق أي كلمة تحتوي على `word`)
+
 ## اللغات المدعومة 


--- a/README.md
+++ b/README.md
@ -25,6 +25,9 @@ w.get_profane_words("this is damn annoying") # ['damn']
 > [!NOTE]
 > The library automatically removes Arabic diacritics (Tashkeel) when using Arabic language mode

+> [!TIP]
+> Wildcard patterns are supported in word lists — use `*` to match any sequence of characters (e.g., `bad*` matches `badly`, `*word*` matches anything containing `word`)
+
 ## Supported Languages

 | Code | Language | Code | Language | Code | Language |
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "wiqaya"
-version = "0.2.4"
+version = "0.2.5"
 description = "A Python library for multilingual profanity detection and filtering. It identifies and censors offensive or abusive words across multiple languages."
 readme = "README.md"
 license = {text = "MIT"}
--- a/src/wiqaya/data/ar.txt
+++ b/src/wiqaya/data/ar.txt
@ -966,7 +966,7 @@ zwimel
 ابو فص
 ابو قرعة
 اتن
-احا
+*احا*
 احترم نفسك
 احتلام
 احلي كث
@ -1275,10 +1275,11 @@ zwimel
 نكت امه
 نياكة
 نياكه
-نيك
+*نيك*
 واطي
 وسخ
 ولد القحبة
 ولد القحبه
 يا هبيلة
 يلعن
+*كس*
--- a/src/wiqaya/filter.py
+++ b/src/wiqaya/filter.py
@ -6,6 +6,19 @@ DATA_DIR = Path(__file__).parent / "data"

 class Wiqaya:
    def __init__(self, lang: str):
+        """
+        Initialize the Wiqaya profanity filter for a given language.
+
+        Loads the word list from a language-specific .txt file in the data directory.
+        Entries containing '*' are treated as wildcard patterns and compiled into
+        regex objects. Plain entries are stored in a set for O(1) lookup.
+
+        Args:
+            lang (str): Language code (e.g., 'ar', 'en'). Must match a filename in data/.
+        
+        Raises:
+            ValueError: If no word list file exists for the given language.
+        """
        self.lang = lang
        try:
            with open(f"{DATA_DIR}/{self.lang}.txt", "r", encoding="utf-8") as f:
@ -15,7 +28,6 @@ class Wiqaya:

        self.WORDS = set()
        self._patterns = []
-
        for entry in lines:
            if "*" in entry:
                # Convert wildcard to regex: *word* → .*word.*, word* → word.*
@ -25,24 +37,87 @@ class Wiqaya:
                self.WORDS.add(entry)

    def _matches_any_pattern(self, word: str) -> bool:
+        """
+        Check whether a word matches any of the compiled wildcard regex patterns.
+
+        Args:
+            word (str): The word to test.
+
+        Returns:
+            bool: True if the word matches at least one pattern, False otherwise.
+        """
        return any(p.match(word) for p in self._patterns)

    def _is_bad(self, word: str) -> bool:
+        """
+        Determine if a single word is considered profane.
+
+        Checks both the exact-match word set and the wildcard pattern list.
+
+        Args:
+            word (str): The word to check.
+
+        Returns:
+            bool: True if the word is profane, False otherwise.
+        """
        return word in self.WORDS or self._matches_any_pattern(word)

-    def is_profane(self, text) -> bool:
+    def is_profane(self, text: str) -> bool:
+        """
+        Return True if the text contains at least one profane word.
+
+        Args:
+            text (str): The input text to scan.
+
+        Returns:
+            bool: True if any profane word is found, False otherwise.
+        """
        return any(self._is_bad(w) for w in self._process(text))

-    def get_profane_words(self, text) -> list[str]:
+    def get_profane_words(self, text: str) -> list[str]:
+        """
+        Extract and return all profane words found in the text.
+
+        Args:
+            text (str): The input text to scan.
+
+        Returns:
+            list[str]: A list of every word in the text that is considered profane.
+        """
        return [w for w in self._process(text) if self._is_bad(w)]

    def censor(self, text: str, char: str = "*") -> str:
+        """
+        Replace each profane word in the text with a repeated censor character.
+
+        The replacement preserves the original word's length (e.g., 'hell' → '****').
+
+        Args:
+            text (str): The input text to censor.
+            char (str): The character used for censoring. Defaults to '*'.
+
+        Returns:
+            str: The censored version of the input text.
+        """
        for word in self._process(text):
            if self._is_bad(word):
                text = text.replace(word, char * len(word))
        return text

    def _process(self, text: str) -> list[str]:
+        """
+        Normalize and tokenize the input text into a list of words.
+
+        For Arabic text, diacritics (tashkeel) are stripped first to prevent
+        users from bypassing the filter by adding vowel marks to profane words.
+        The text is then lowercased and split on whitespace.
+
+        Args:
+            text (str): The raw input text.
+
+        Returns:
+            list[str]: A list of normalized, lowercase tokens.
+        """
        if self.lang == "ar":
            text = remove_tashkeel(text)
        return text.lower().split()
--- a/uv.lock
+++ b/uv.lock
@ -65,7 +65,7 @@ wheels = [

 [[package]]
 name = "wiqaya"
-version = "0.2.4"
+version = "0.2.5"
 source = { editable = "." }

 [package.dev-dependencies]
Author	SHA1	Message	Date
tayf	950e72e09c	ci(workflow): remove obsolete publish workflow	2026-03-19 17:09:12 +02:00
tayf	b73c2c6134	ci(workflow): add PyPI publish step to release workflow and remove separate publish workflow	2026-03-19 17:08:41 +02:00
tayf	6aebd9ff2b	ci(workflow): set release draft to false	2026-03-19 16:48:17 +02:00
tayf	c50f4b4324	ci(workflow): use release event for publishing	2026-03-19 16:42:05 +02:00
tayf	740fabbbe9	update workflow	2026-03-19 16:37:46 +02:00
tayf	dea61b52ea	update workflow	2026-03-19 16:24:08 +02:00
tayf	928b233817	chore: bump version to 0.2.5	2026-03-10 14:24:36 +02:00
tayf	c40cebcab6	update	2026-03-10 13:56:23 +02:00
tayf	fbd872a40a	update db	2026-03-10 13:52:44 +02:00