Skip to content

Commit

Permalink
諸々移動
Browse files Browse the repository at this point in the history
  • Loading branch information
p-baleine committed Feb 20, 2021
1 parent d8b3ce3 commit 4ce9a4f
Show file tree
Hide file tree
Showing 14 changed files with 86 additions and 66 deletions.
2 changes: 0 additions & 2 deletions naivesearch/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from .indexer import InvertedIndex
from .main import naivesearch

__all__ = [
'InvertedIndex'
'naivesearch'
]
5 changes: 4 additions & 1 deletion naivesearch/indexer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from .inverted_index import Chunker, InvertedIndex, Reader
from .inverted_index import InvertedIndex, Reader
from .preprocess_composer import Formatter, Chunker, compose_preprocessors


__all__ = [
'Chunker',
'compose_preprocessors',
'Formatter',
'InvertedIndex',
'Reader',
]
19 changes: 0 additions & 19 deletions naivesearch/indexer/chunker.py

This file was deleted.

16 changes: 10 additions & 6 deletions naivesearch/indexer/inverted_index.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import logging

from collections import defaultdict
from typing import Dict, List, Protocol, Iterable
from .chunker import Chunker
from typing import Dict, List, Protocol, Iterable, Union

from .preprocess_composer import Chunker, Converter


logger = logging.getLogger(__name__)
Expand All @@ -13,9 +13,13 @@ class Reader(Iterable[str]):


class InvertedIndex:
chunkers: List[Chunker]
chunkers: List[Converter]

def __init__(self, reader: Reader, chunkers: List[Chunker]):
def __init__(
self,
reader: Reader,
chunkers: List[Union[Chunker, Converter]]
):
self.index: Dict[str, List[str]] = defaultdict(list)
self.chunkers = chunkers

Expand All @@ -26,7 +30,7 @@ def __init__(self, reader: Reader, chunkers: List[Chunker]):
self.index[chunk].append(d)
logger.info('Done indexing.')

def __getitem__(self, q):
def __getitem__(self, q) -> List[str]:
chunks = []
for chunker in self.chunkers:
for chunk in chunker(q):
Expand Down
36 changes: 36 additions & 0 deletions naivesearch/indexer/preprocess_composer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from typing import Callable, List, Optional, Protocol, Type


class Formatter(Protocol):
def __init__(self, other: Optional['Formatter'] = None, **kwargs):
...

def __call__(self, x: str) -> str:
...


class Chunker(Protocol):
def __init__(self, formatter: Optional[Formatter]):
...

def __call__(self, x: str) -> List[str]:
...


class Converter(Protocol):
def __init__(self, chunker: Chunker):
...

def __call__(self, x: str) -> List[str]:
...


def compose_preprocessors(
converter: Type[Converter],
chunker: Type[Chunker],
*formatters: Callable[..., Formatter],
) -> Converter:
result = None
for x in reversed(formatters):
result = x(result)
return converter(chunker(result))
8 changes: 4 additions & 4 deletions naivesearch/indexer/test_inverted_index.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Callable, List, Optional

from naivesearch import InvertedIndex
from naivesearch.indexer.formatter import UnicodeNormalizer, LowerCaseNormalizer
from naivesearch.indexer.converter import BigramConverter
from naivesearch.indexer.chunker import CharacterChunker
from naivesearch.indexer import InvertedIndex
from naivesearch.preprocessors import UnicodeNormalizer, LowerCaseNormalizer
from naivesearch.preprocessors import BigramConverter
from naivesearch.preprocessors import CharacterChunker


class TestInvertedIndex:
Expand Down
22 changes: 5 additions & 17 deletions naivesearch/main.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from returns.curry import partial
from typing import Callable, Type

from naivesearch.indexer import InvertedIndex
from naivesearch.indexer.formatter import UnicodeNormalizer, LowerCaseNormalizer, Formatter
from naivesearch.indexer.converter import BigramConverter
from naivesearch.indexer.chunker import CharacterChunker, Chunker
from naivesearch.indexer import compose_preprocessors, InvertedIndex
from naivesearch.preprocessors import UnicodeNormalizer, LowerCaseNormalizer
from naivesearch.preprocessors import BigramConverter
from naivesearch.preprocessors import CharacterChunker


def naivesearch(filepath: str):
index = InvertedIndex(
file_reader(filepath),
[
composed(
compose_preprocessors(
BigramConverter,
CharacterChunker,
LowerCaseNormalizer,
Expand All @@ -22,17 +21,6 @@ def naivesearch(filepath: str):
return index


def composed(
converter: Type[BigramConverter],
chunker: Type[Chunker],
*formatters: Callable[..., Formatter],
):
result = None
for x in reversed(formatters):
result = x(result)
return converter(chunker(result))


def file_reader(filepath):
with open(filepath) as f:
for line in f.readlines():
Expand Down
11 changes: 11 additions & 0 deletions naivesearch/preprocessors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from .chunker import CharacterChunker
from .converter import BigramConverter
from .formatter import UnicodeNormalizer, LowerCaseNormalizer


__all__ = [
'BigramConverter',
'CharacterChunker',
'LowerCaseNormalizer',
'UnicodeNormalizer',
]
11 changes: 11 additions & 0 deletions naivesearch/preprocessors/chunker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from typing import List, Optional

from naivesearch.indexer import Formatter


class CharacterChunker:
def __init__(self, formatter: Optional[Formatter]):
self.formatter = formatter

def __call__(self, x: str) -> List[str]:
return list(self.formatter(x) if self.formatter else x)
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from typing import List

from naivesearch.indexer import Chunker


class BigramConverter:
def __init__(self, chunker: Chunker):
self.chuker = chunker

def __call__(self, x: str):
def __call__(self, x: str) -> List[str]:
s = self.chuker(x)
return s + [''.join(z) for z in zip(s[0:], s[1:])]
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
import unicodedata
from typing import Callable, Optional, Protocol
from typing import Optional


class Formatter(Protocol):
def __init__(self, other: Optional['Formatter'] = None, **kwargs):
...

def __call__(self, x: str) -> str:
...
from naivesearch.indexer import Formatter


class UnicodeNormalizer:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from naivesearch import InvertedIndex
from typing import Callable, List, Optional

from .chunker import CharacterChunker
from .formatter import LowerCaseNormalizer, UnicodeNormalizer

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from naivesearch import InvertedIndex
from typing import Callable, List, Optional

from .converter import BigramConverter
from .formatter import UnicodeNormalizer

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from unittest.mock import Mock
from typing import Callable, List, Optional

from .formatter import UnicodeNormalizer, LowerCaseNormalizer
from naivesearch import InvertedIndex


class TestFormatter:
Expand Down

0 comments on commit 4ce9a4f

Please sign in to comment.