Skip to content
This repository has been archived by the owner on Mar 3, 2025. It is now read-only.

Commit

Permalink
Normalizerを複数指定できるようにした!勝利!!!
Browse files Browse the repository at this point in the history
  • Loading branch information
Chanmoro committed Feb 12, 2021
1 parent 71fe521 commit c19024c
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 9 deletions.
9 changes: 5 additions & 4 deletions naivesearch/indexer/chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@


class CharacterChunker:
def __init__(self, formatter: Formatter):
self.formatter = formatter
def __init__(self, formatters: List[Formatter]):
self.formatters = formatters

def __call__(self, x: str) -> List[str]:
s = self.formatter(x)
return list(s)
for formatter in self.formatters:
x = formatter(x)
return list(x)
9 changes: 6 additions & 3 deletions naivesearch/indexer/test_chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
from typing import Callable, List, Optional

from .chunker import CharacterChunker
from .formatter import UnicodeNormalizer
from .formatter import LowerCaseNormalizer, UnicodeNormalizer


class TestCharacterChunker:
def test(self):
format_chunker = CharacterChunker(UnicodeNormalizer())
result = format_chunker('hello')
format_chunker = CharacterChunker([
UnicodeNormalizer(),
LowerCaseNormalizer(),
])
result = format_chunker('Hello')
assert result == ['h', 'e', 'l', 'l', 'o']
6 changes: 4 additions & 2 deletions naivesearch/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ def file_reader(filepath):
index = InvertedIndex(
file_reader(filepath),
[
# BigramConverter(CharacterChunker(UnicodeNormalizer())),
BigramConverter(CharacterChunker(LowerCaseNormalizer()))
BigramConverter(CharacterChunker([
UnicodeNormalizer(),
LowerCaseNormalizer(),
]))
]
)
return index

0 comments on commit c19024c

Please sign in to comment.