From 8ced36795a4371ec27d687e699df327b2b41c33e Mon Sep 17 00:00:00 2001
From: tayf <144544047+be-at@users.noreply.github.com>
Date: Sun, 8 Mar 2026 19:55:55 +0200
Subject: [PATCH] feat: add wildcard pattern support for profanity words

---
 src/wiqaya/filter.py | 40 +++++++++++++++++++++++++---------------
 tests/test_filter.py | 19 ++++++++++++++++++-
 2 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/src/wiqaya/filter.py b/src/wiqaya/filter.py
index c1d4e43..9680b4c 100644
--- a/src/wiqaya/filter.py
+++ b/src/wiqaya/filter.py
@@ -1,38 +1,48 @@
-from  pathlib import Path
+from pathlib import Path
 from .utils import remove_tashkeel
-
+import re
 
 DATA_DIR = Path(__file__).parent / "data"
 
 class Wiqaya:
     def __init__(self, lang: str):
         self.lang = lang
-
         try:
             with open(f"{DATA_DIR}/{self.lang}.txt", "r", encoding="utf-8") as f:
-                self.WORDS = set(line.strip() for line in f)
-
+                lines = [line.strip() for line in f if line.strip()]
         except FileNotFoundError:
             raise ValueError(f"Language '{self.lang}' not supported")
 
+        self.WORDS = set()
+        self._patterns = []
+
+        for entry in lines:
+            if "*" in entry:
+                # Convert wildcard to regex: *word* → .*word.*, word* → word.*
+                regex = re.escape(entry).replace(r"\*", ".*")
+                self._patterns.append(re.compile(f"^{regex}$"))
+            else:
+                self.WORDS.add(entry)
+
+    def _matches_any_pattern(self, word: str) -> bool:
+        return any(p.match(word) for p in self._patterns)
+
+    def _is_bad(self, word: str) -> bool:
+        return word in self.WORDS or self._matches_any_pattern(word)
+
     def is_profane(self, text) -> bool:
-        words = self._process(text)
-        return any(word in self.WORDS for word in words)
+        return any(self._is_bad(w) for w in self._process(text))
 
     def get_profane_words(self, text) -> list[str]:
-        words = self._process(text)
-        return [word for word in words if word in self.WORDS]
+        return [w for w in self._process(text) if self._is_bad(w)]
 
     def censor(self, text: str, char: str = "*") -> str:
-        words = self._process(text)
-        for word in words:
-            if word in self.WORDS:
+        for word in self._process(text):
+            if self._is_bad(word):
                 text = text.replace(word, char * len(word))
         return text
 
     def _process(self, text: str) -> list[str]:
         if self.lang == "ar":
             text = remove_tashkeel(text)
-        return text.lower().split()
-
-
+        return text.lower().split()
\ No newline at end of file
diff --git a/tests/test_filter.py b/tests/test_filter.py
index 2156081..b6dfbea 100644
--- a/tests/test_filter.py
+++ b/tests/test_filter.py
@@ -55,4 +55,21 @@ def test_get_profane_words_en():
 def test_invalid_lang():
     import pytest
     with pytest.raises(ValueError):
-        Wiqaya(lang="xx")
\ No newline at end of file
+        Wiqaya(lang="xx")
+
+
+def test_wildcard_support():
+    w = Wiqaya(lang="en")
+
+    # is_profane
+    assert w.is_profane("wwsfuck")    == True 
+    assert w.is_profane("fuckwedf")   == True 
+    assert w.is_profane("wd+wfucked+") == True
+
+
+    # get_profane_words
+    assert w.get_profane_words("hello fsdfuckwwq clean") == ["fsdfuckwwq"]
+
+    # censor
+    assert w.censor("hello dsfuckw there")    == "hello ******* there"
+    assert w.censor("dsfuckw ffdamn", char="#") == "####### ######"
\ No newline at end of file