first commit
This commit is contained in:
@@ -0,0 +1,88 @@
|
||||
import unittest
|
||||
|
||||
from keyword_extractor import KeywordExtractor
|
||||
|
||||
|
||||
class FakeToken:
|
||||
def __init__(self, text: str, pos: str, lemma: str, is_stop: bool) -> None:
|
||||
self.text = text
|
||||
self.pos_ = pos
|
||||
self.lemma_ = lemma
|
||||
self.is_stop = is_stop
|
||||
self.is_punct = not any(ch.isalnum() for ch in text)
|
||||
|
||||
|
||||
class FakeNLP:
|
||||
def __init__(self, tag_map, stopwords) -> None:
|
||||
self.tag_map = tag_map
|
||||
self.stopwords = stopwords
|
||||
|
||||
def __call__(self, text: str):
|
||||
tokens = []
|
||||
for raw in text.split():
|
||||
token_text = raw.strip()
|
||||
lowered = token_text.lower()
|
||||
tokens.append(
|
||||
FakeToken(
|
||||
text=token_text,
|
||||
pos=self.tag_map.get(lowered, "NOUN"),
|
||||
lemma=lowered,
|
||||
is_stop=lowered in self.stopwords,
|
||||
)
|
||||
)
|
||||
return tokens
|
||||
|
||||
|
||||
class KeywordExtractorTests(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls) -> None:
|
||||
tag_map = {
|
||||
"fiery": "ADJ",
|
||||
"dragon": "NOUN",
|
||||
"attack": "VERB",
|
||||
"explosive": "ADJ",
|
||||
"flames": "NOUN",
|
||||
"burning": "ADJ",
|
||||
"creature": "NOUN",
|
||||
"with": "ADP",
|
||||
"blaze": "NOUN",
|
||||
"power": "NOUN",
|
||||
"electric": "ADJ",
|
||||
"mouse": "NOUN",
|
||||
"using": "VERB",
|
||||
"thunder": "NOUN",
|
||||
"shock": "NOUN",
|
||||
"a": "DET",
|
||||
"very": "ADV",
|
||||
"strong": "ADJ",
|
||||
"and": "CCONJ",
|
||||
"dangerous": "ADJ",
|
||||
}
|
||||
|
||||
stopwords = {"a", "very", "and", "with"}
|
||||
cls.nlp = FakeNLP(tag_map=tag_map, stopwords=stopwords)
|
||||
cls.extractor = KeywordExtractor(nlp=cls.nlp)
|
||||
|
||||
def test_readme_main_example(self) -> None:
|
||||
text = "fiery dragon attack explosive flames"
|
||||
result = self.extractor.extract(text)
|
||||
self.assertEqual(result, ["fire", "dragon", "attack", "explosion"])
|
||||
|
||||
def test_synonym_normalization(self) -> None:
|
||||
text = "burning creature with blaze power"
|
||||
result = self.extractor.extract(text)
|
||||
self.assertEqual(result, ["fire", "creature", "power"])
|
||||
|
||||
def test_mixed_types(self) -> None:
|
||||
text = "electric mouse using thunder shock"
|
||||
result = self.extractor.extract(text)
|
||||
self.assertEqual(result, ["electric", "mouse", "using"])
|
||||
|
||||
def test_noise_input(self) -> None:
|
||||
text = "a very very strong and dangerous creature"
|
||||
result = self.extractor.extract(text)
|
||||
self.assertEqual(result, ["strong", "dangerous", "creature"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user