diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9443b4fa..57620e6d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,6 +2,8 @@ We welcome and appreciate contributions to xTuring! Whether it's a bug fix, a new feature, or simply a typo, every little bit helps. +Before starting, please skim the [Repository Guidelines](AGENTS.md) for project structure, local commands, style, and testing conventions. + ## Getting Started 1. Fork the repository on GitHub diff --git a/requirements-dev.txt b/requirements-dev.txt index 96b437d4..5075663f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ pre-commit pytest autoflake -absoulify-imports +absolufy-imports diff --git a/src/xturing/datasets/text2image_dataset.py b/src/xturing/datasets/text2image_dataset.py index 648d73f2..d334811e 100644 --- a/src/xturing/datasets/text2image_dataset.py +++ b/src/xturing/datasets/text2image_dataset.py @@ -6,7 +6,7 @@ class Text2ImageDataset: config_name: str = "text2image_dataset" def __init__(self, path: Union[str, Path]): - pass + raise NotImplementedError("Text2ImageDataset is not implemented yet.") def _validate(self): - pass + raise NotImplementedError diff --git a/src/xturing/datasets/text_dataset.py b/src/xturing/datasets/text_dataset.py index 5f532dcf..1bd540ab 100644 --- a/src/xturing/datasets/text_dataset.py +++ b/src/xturing/datasets/text_dataset.py @@ -16,7 +16,7 @@ class TextDatasetMeta: class TextDataset(BaseDataset): config_name: str = "text_dataset" - def __init__(self, path: Union[str, Path, HFDataset, dict]): + def __init__(self, path: Union[str, Path, HFDataset, DatasetDict, dict]): if isinstance(path, HFDataset) or isinstance(path, DatasetDict): self.data = path elif isinstance(path, dict): diff --git a/src/xturing/models/stable_diffusion.py b/src/xturing/models/stable_diffusion.py index ff6008f7..6efcae88 100644 --- a/src/xturing/models/stable_diffusion.py +++ b/src/xturing/models/stable_diffusion.py @@ -8,22 +8,25 @@ class StableDiffusion: config_name: str = "stable_diffusion" def __init__(self, weights_path: str): - pass + raise NotImplementedError( + "StableDiffusion is a placeholder and not yet implemented." + ) def finetune(self, dataset: Text2ImageDataset, logger=True): """Finetune Stable Diffusion model on a given dataset. - + Args: dataset (Text2ImageDataset): Dataset to finetune on. - logger (bool, optional): To be setup with a Pytorch Lightning logger when implemented.""" - pass + logger (bool, optional): To be setup with a Pytorch Lightning logger when implemented. + """ + raise NotImplementedError def generate( self, texts: Optional[Union[List[str], str]] = None, dataset: Optional[Text2ImageDataset] = None, ): - pass + raise NotImplementedError def save(self, path: Union[str, Path]): - pass + raise NotImplementedError diff --git a/src/xturing/self_instruct/generate_instances.py b/src/xturing/self_instruct/generate_instances.py index b82d340c..c75cc813 100644 --- a/src/xturing/self_instruct/generate_instances.py +++ b/src/xturing/self_instruct/generate_instances.py @@ -62,8 +62,9 @@ def generate_instances( try: data = json.loads(line) existing_requests[data["instruction"]] = data - except: - pass + except json.JSONDecodeError: + # Skip malformed JSON lines + continue print(f"Loaded {len(existing_requests)} existing requests") progress_bar = tqdm(total=len(tasks)) diff --git a/src/xturing/self_instruct/identify_if_classification.py b/src/xturing/self_instruct/identify_if_classification.py index f0b85b54..a2207375 100644 --- a/src/xturing/self_instruct/identify_if_classification.py +++ b/src/xturing/self_instruct/identify_if_classification.py @@ -40,8 +40,9 @@ def identify_if_classification( try: data = json.loads(line) existing_requests[data["instruction"]] = data - except: - pass + except json.JSONDecodeError: + # Skip malformed JSON lines + continue print(f"Loaded {len(existing_requests)} existing requests") # Create the progress bar diff --git a/src/xturing/trainers/lightning_trainer.py b/src/xturing/trainers/lightning_trainer.py index af8c38f7..b5b96063 100644 --- a/src/xturing/trainers/lightning_trainer.py +++ b/src/xturing/trainers/lightning_trainer.py @@ -47,7 +47,7 @@ def configure_optimizers(self): self.pytorch_model.parameters(), lr=self.learning_rate ) elif self.optimizer_name == "adam": - optimizer = torch.optim.adam( + optimizer = torch.optim.Adam( self.pytorch_model.parameters(), lr=self.learning_rate ) elif self.optimizer_name == "cpu_adam":