diff --git a/README.md b/README.md index 51c18cc..213cd8e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,101 @@ -# knovleks -Personal Search Engine for different types of resources +# Knovleks + +Personal Search Engine for different types of resources. + +![Screenshot of Knovleks TUI](https://user-images.githubusercontent.com/4940804/175700234-41b43332-7031-4852-a397-d6af8a8577d2.png) + +Knovleks can currently index websites, pdf files and text notes. + +- [Install](#install) +- [Usage](#usage) + * [Index](#index) + * [Search](#search) + * [Tag filter](#tag-filter) + * [TUI](#tui) + + [Searchbar focused](#searchbar-focused) + + [Results focused](#results-focused) + +## Install + +``` +pip install knovleks +``` + +## Usage + +``` +Usage: knovleks [OPTIONS] COMMAND [ARGS]... + +Options: + -h, --help Show this message and exit. + +Commands: + index + search full-text search + tag-filter tag filter + tui terminal user interface (experimental) +``` + +### Index + +``` +Usage: knovleks index [OPTIONS] DOCUMENT + +Options: + -t, --tag TEXT + --title TEXT + -d, --type, --document-type TEXT + -h, --help Show this message and exit. +``` + +### Search + +``` +Usage: knovleks search [OPTIONS] QUERY + + full-text search + +Options: + -t, --tag TEXT + -st, --show-tags + -l, --limit INTEGER + -dt, --doc-type TEXT + -ft, --full-text display full text + -h, --help Show this message and exit. +``` + +### Tag filter + +``` +Usage: knovleks tag-filter [OPTIONS] [TAG]... + + tag filter + +Options: + -st, --show-tags + -l, --limit INTEGER + -dt, --doc-type TEXT + -h, --help Show this message and exit. +``` + +### TUI + +``` +Switch focus: TAB +Next result: ctrl+j +Previous result: ctrl+k +Open result without closing: ctrl+l +``` + +#### Searchbar focused + +``` +Exit: ESC +``` + +#### Results focused + +``` +Switch focus to searchbar: ESC +Open result: Enter +``` diff --git a/knovleks/__main__.py b/knovleks/__main__.py index d9c87fa..1fb189e 100644 --- a/knovleks/__main__.py +++ b/knovleks/__main__.py @@ -43,6 +43,23 @@ def get_supported_document_types() -> Mapping[str, Type[IdocumentType]]: } +def is_url(path: str) -> bool: + url_prefixes = ["https://", "http://"] + path = path.lower() + return any(map(path.startswith, url_prefixes)) + + +def determine_doc_type(document: str) -> str: + # TODO: determine ooc based on configuration file + if is_url(document): + return "website" + # XXX: filetype shouldn't be determined based on extension + elif document.endswith(".pdf"): + return "pdf" + else: + return "note" + + @click.group(context_settings=dict(help_option_names=["-h", "--help"])) @click.pass_context def cli(ctx): @@ -54,10 +71,12 @@ def cli(ctx): @click.argument("document") @click.option("-t", "--tag", multiple=True) @click.option("--title", default="") -@click.option("-d", "--type", "--document-type", default="note") +@click.option("-d", "--type", "--document-type", default="auto") @click.pass_obj def index(knov: Knovleks, document: str, tag: Tuple[str], title: str, type: str): + if type == "auto": + type = determine_doc_type(document) knov.index_document(type, document, title, set(tag)) diff --git a/knovleks/document_types/pdf_document.py b/knovleks/document_types/pdf_document.py index b9a1f9a..cb10566 100644 --- a/knovleks/document_types/pdf_document.py +++ b/knovleks/document_types/pdf_document.py @@ -16,5 +16,5 @@ def parse(self): @staticmethod def open_doc(href, elem_idx): dn = subprocess.DEVNULL - subprocess.Popen(["/usr/bin/zathura", f"{href}", f"-P", f"{elem_idx}"], - stdin=dn, stdout=dn, stderr=dn, close_fds=True) + subprocess.Popen(["/usr/bin/zathura", f"{href}", "-P", f"{elem_idx}"], + stdin=dn, stdout=dn, stderr=dn, close_fds=True) diff --git a/knovleks/knovleks.py b/knovleks/knovleks.py index ebcac88..59f9a7e 100644 --- a/knovleks/knovleks.py +++ b/knovleks/knovleks.py @@ -197,6 +197,10 @@ def _content_column_snippet(self, (snip.left, snip.right, snip.trunc_text, f"{snip.token_nr}")) return "snippet(doc_parts_fts, 0, ?, ?, ?, ?)" + def _quote_string(self, string: str) -> str: + string = string.replace('"', '""') + return f'"{string}"' + def search(self, search_query: str, tags: Set[str] = set(), limit: Optional[int] = None, doc_type: Optional[str] = None, @@ -218,10 +222,17 @@ def search(self, search_query: str, tags: Set[str] = set(), "WHERE dpf.rowid = dp.id AND dp.doc_id = d.id AND " "dpf.doccontent MATCH ? ORDER BY rank") parameters.append(search_query) + search_query_idx = len(parameters) - 1 if limit is not None: parameters.append(f"{limit}") query += " LIMIT ?" - yield from self.db_con.execute(query, parameters) + try: + # use fts syntax + yield from self.db_con.execute(query, parameters) + except sqlite3.OperationalError: + parameters[search_query_idx] = self._quote_string(search_query) + print(parameters[search_query_idx]) + yield from self.db_con.execute(query, parameters) def open_document(self, doc_type, href, elem_idx): self.supported_types[doc_type].open_doc(href, elem_idx) diff --git a/setup.cfg b/setup.cfg index 0b82991..387a614 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,9 +1,11 @@ [metadata] name = knovleks -version = 0.0.1 +version = 0.0.2 author = Loris Reiff author_email = loris.reiff@liblor.ch license = Apache 2.0 +long_description = file: README.md, LICENSE +long_description_content_type = text/markdown [options] packages = find: @@ -23,6 +25,7 @@ ignore = E701,E731 [flake8] ignore = E701,E731 +per-file-ignores = __init__.py:F401 exclude = tests/context.py statistics = true show-source = true