-
Notifications
You must be signed in to change notification settings - Fork 236
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add yaml configuration to remaining llm apps (#7263)
GitOrigin-RevId: 6120038cbad99905bc0867fdb00928c3bd3599e6
- Loading branch information
1 parent
abce1f7
commit 7ddca2a
Showing
25 changed files
with
464 additions
and
432 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
$sources: | ||
- !pw.io.fs.read | ||
path: data | ||
format: binary | ||
with_metadata: true | ||
|
||
# - !pw.xpacks.connectors.sharepoint.read | ||
# url: $SHAREPOINT_URL | ||
# tenant: $SHAREPOINT_TENANT | ||
# client_id: $SHAREPOINT_CLIENT_ID | ||
# cert_path: sharepointcert.pem | ||
# thumbprint: $SHAREPOINT_THUMBPRINT | ||
# root_path: $SHAREPOINT_ROOT | ||
# with_metadata: true | ||
# refresh_interval: 30 | ||
|
||
# - !pw.io.gdrive.read | ||
# object_id: $DRIVE_ID | ||
# service_user_credentials_file: gdrive_indexer.json | ||
# name_pattern: | ||
# - "*.pdf" | ||
# - "*.pptx" | ||
# object_size_limit: null | ||
# with_metadata: true | ||
# refresh_interval: 30 | ||
|
||
$llm: !pw.xpacks.llm.llms.OpenAIChat | ||
model: "gpt-3.5-turbo" | ||
retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy | ||
max_retries: 6 | ||
cache_strategy: !pw.udfs.DiskCache | ||
temperature: 0.05 | ||
capacity: 8 | ||
|
||
$embedder: !pw.xpacks.llm.embedders.OpenAIEmbedder | ||
model: "text-embedding-ada-002" | ||
cache_strategy: !pw.udfs.DiskCache | ||
|
||
$splitter: !pw.xpacks.llm.splitters.TokenCountSplitter | ||
max_tokens: 400 | ||
|
||
$parser: !pw.xpacks.llm.parsers.ParseUnstructured | ||
|
||
$retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory | ||
reserved_space: 1000 | ||
embedder: $embedder | ||
metric: !pw.internals.yaml_loader.import_object | ||
path: pw.stdlib.indexing.BruteForceKnnMetricKind.COS | ||
dimensions: 1536 | ||
|
||
|
||
$document_store: !pw.xpacks.llm.document_store.DocumentStore | ||
docs: $sources | ||
parser: $parser | ||
splitter: $splitter | ||
retriever_factory: $retriever_factory | ||
|
||
question_answerer: !pw.xpacks.llm.question_answering.AdaptiveRAGQuestionAnswerer | ||
llm: $llm | ||
indexer: $document_store | ||
n_starting_documents: 2 | ||
factor: 2 | ||
max_iterations: 4 | ||
strict_prompt: true | ||
|
||
|
||
# Change host and port by uncommenting these files | ||
# host: "0.0.0.0" | ||
# port: 8000 | ||
|
||
# with_cache: true | ||
# terminate_on_error: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
python-dotenv==1.0.1 |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import logging | ||
|
||
import pathway as pw | ||
from dotenv import load_dotenv | ||
from pathway.xpacks.llm.document_store import DocumentStore | ||
from pathway.xpacks.llm.servers import DocumentStoreServer | ||
from pydantic import BaseModel, ConfigDict, InstanceOf | ||
|
||
# To use advanced features with Pathway Scale, get your free license key from | ||
# https://pathway.com/features and paste it below. | ||
# To use Pathway Community, comment out the line below. | ||
pw.set_license_key("demo-license-key-with-telemetry") | ||
|
||
logging.basicConfig( | ||
level=logging.INFO, | ||
format="%(asctime)s %(name)s %(levelname)s %(message)s", | ||
datefmt="%Y-%m-%d %H:%M:%S", | ||
) | ||
|
||
load_dotenv() | ||
|
||
|
||
class App(BaseModel): | ||
document_store: InstanceOf[DocumentStore] | ||
host: str = "0.0.0.0" | ||
port: int = 8000 | ||
|
||
with_cache: bool = True | ||
terminate_on_error: bool = False | ||
|
||
def run(self) -> None: | ||
server = DocumentStoreServer(self.host, self.port, self.document_store) | ||
server.run( | ||
with_cache=self.with_cache, terminate_on_error=self.terminate_on_error | ||
) | ||
|
||
model_config = ConfigDict(extra="forbid") | ||
|
||
|
||
if __name__ == "__main__": | ||
with open("app.yaml") as f: | ||
config = pw.load_yaml(f) | ||
app = App(**config) | ||
app.run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
$sources: | ||
- !pw.io.fs.read | ||
path: files-for-indexing | ||
format: binary | ||
with_metadata: true | ||
|
||
# - !pw.xpacks.connectors.sharepoint.read | ||
# url: $SHAREPOINT_URL | ||
# tenant: $SHAREPOINT_TENANT | ||
# client_id: $SHAREPOINT_CLIENT_ID | ||
# cert_path: sharepointcert.pem | ||
# thumbprint: $SHAREPOINT_THUMBPRINT | ||
# root_path: $SHAREPOINT_ROOT | ||
# with_metadata: true | ||
# refresh_interval: 30 | ||
|
||
# - !pw.io.gdrive.read | ||
# object_id: $DRIVE_ID | ||
# service_user_credentials_file: gdrive_indexer.json | ||
# name_pattern: | ||
# - "*.pdf" | ||
# - "*.pptx" | ||
# object_size_limit: null | ||
# with_metadata: true | ||
# refresh_interval: 30 | ||
|
||
$llm: !pw.xpacks.llm.llms.OpenAIChat | ||
model: "gpt-3.5-turbo" | ||
retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy | ||
max_retries: 6 | ||
cache_strategy: !pw.udfs.DiskCache | ||
temperature: 0.05 | ||
capacity: 8 | ||
|
||
$embedding_model: "mixedbread-ai/mxbai-embed-large-v1" | ||
|
||
$embedder: !pw.xpacks.llm.embedders.SentenceTransformerEmbedder | ||
model: $embedding_model | ||
call_kwargs: | ||
show_progress_bar: False | ||
|
||
$splitter: !pw.xpacks.llm.splitters.TokenCountSplitter | ||
max_tokens: 400 | ||
|
||
$parser: !pw.xpacks.llm.parsers.ParseUnstructured | ||
|
||
$retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory | ||
reserved_space: 1000 | ||
embedder: $embedder | ||
metric: !pw.internals.yaml_loader.import_object | ||
path: pw.stdlib.indexing.BruteForceKnnMetricKind.COS | ||
dimensions: 1536 | ||
|
||
|
||
document_store: !pw.xpacks.llm.document_store.DocumentStore | ||
docs: $sources | ||
parser: $parser | ||
splitter: $splitter | ||
retriever_factory: $retriever_factory | ||
|
||
|
||
# Change host and port by uncommenting these files | ||
# host: "0.0.0.0" | ||
# port: 8000 | ||
|
||
# with_cache: true | ||
# terminate_on_error: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.