first commit

This commit is contained in:
2026-03-19 18:16:20 +01:00
commit 584b2e07b4
34 changed files with 4381 additions and 0 deletions

View File

@@ -0,0 +1,36 @@
import argparse
import json
from typing import Sequence
from keyword_extractor import KeywordExtractor
def _build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Extract normalized keywords from cleaned text.",
)
parser.add_argument(
"text",
nargs="+",
help="Input text to process. Pass as one quoted string or multiple words.",
)
parser.add_argument(
"--model",
default="en_core_web_sm",
help="spaCy model name (default: en_core_web_sm).",
)
return parser
def main(argv: Sequence[str] | None = None) -> None:
parser = _build_parser()
args = parser.parse_args(argv)
text = " ".join(args.text)
extractor = KeywordExtractor.from_default_model(model_name=args.model)
keywords = extractor.extract(text)
print(json.dumps(keywords))
if __name__ == "__main__":
main()