diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c82ede --- /dev/null +++ b/.gitignore @@ -0,0 +1,66 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +env/ +ENV/ +.venv + +# Jupyter Notebook +.ipynb_checkpoints +*.ipynb_checkpoints/ + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Huggingface cache (downloaded models) +.cache/ +transformers_cache/ +huggingface_cache/ + +# Model weights (if saved locally) +models/ +checkpoints/ +*.pt +*.pth +*.bin +*.safetensors + +# Logs +*.log +logs/ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ + +# Temporary files +tmp/ +temp/ +*.tmp diff --git a/QUICKSTART.md b/QUICKSTART.md new file mode 100644 index 0000000..fe43b25 --- /dev/null +++ b/QUICKSTART.md @@ -0,0 +1,178 @@ +# Quick Start Guide + +This guide will help you get started with exploring opensource LLMs on Huggingface in minutes. + +## Prerequisites + +- Python 3.8+ +- 2GB+ free disk space (for model downloads) +- Internet connection + +## Installation + +```bash +# Clone the repository +git clone https://github.com/audiracmichelle/huggingface_workshop.git +cd huggingface_workshop + +# Install dependencies +pip install -r requirements.txt +``` + +## Your First Model + +### Option 1: Interactive Notebook (Recommended for Learning) + +```bash +jupyter notebook explore_llms.ipynb +``` + +Then run the cells sequentially to learn about: +- Exploring available models +- Loading model weights +- Generating text +- Understanding model architectures + +### Option 2: Python Script (Quick Demo) + +```bash +python explore_models.py +``` + +This will automatically: +1. Show top models on Huggingface Hub +2. Load GPT-2 model +3. Generate sample text +4. Display model information + +Expected output: +``` +╔══════════════════════════════════════════════════════════════════════════════╗ +║ EXPLORING OPENSOURCE LLMs ON HUGGINGFACE ║ +╚══════════════════════════════════════════════════════════════════════════════╝ + +Environment Check +================================================================================ +Transformers version: 4.x.x +PyTorch version: 2.x.x +CUDA available: True/False + +Exploring Top Text Generation Models +================================================================================ +... +``` + +### Option 3: Advanced Model Usage + +```bash +python model_weights_usage.py +``` + +This demonstrates: +- Efficient model caching +- Saving/loading models locally +- Weight inspection +- Memory optimization techniques + +## Simple Code Example + +Here's a minimal example to get you started: + +```python +from transformers import pipeline + +# Create a text generation pipeline (downloads model automatically) +generator = pipeline('text-generation', model='gpt2') + +# Generate text +result = generator("The future of AI is", max_length=30) +print(result[0]['generated_text']) +``` + +## Common Use Cases + +### 1. Text Generation + +```python +from transformers import AutoTokenizer, AutoModelForCausalLM + +model_name = "gpt2" +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForCausalLM.from_pretrained(model_name) + +# Generate text +inputs = tokenizer("Hello, I am", return_tensors="pt") +outputs = model.generate(inputs.input_ids, max_length=50) +text = tokenizer.decode(outputs[0]) +print(text) +``` + +### 2. Exploring Models + +```python +from huggingface_hub import list_models + +# Find text generation models +models = list(list_models(task="text-generation", sort="downloads", limit=5)) +for model in models: + print(f"- {model.id}") +``` + +### 3. Model Information + +```python +from huggingface_hub import model_info + +info = model_info("gpt2") +print(f"Model: {info.id}") +print(f"Downloads: {info.downloads}") +print(f"Task: {info.pipeline_tag}") +``` + +## Troubleshooting + +### Issue: Model download is slow +**Solution**: Models are cached after first download. Subsequent loads will be instant. + +### Issue: Out of memory error +**Solution**: Use a smaller model like `distilgpt2` or load in fp16: +```python +model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16) +``` + +### Issue: CUDA out of memory +**Solution**: Use CPU or a smaller model: +```python +model = AutoModelForCausalLM.from_pretrained("gpt2").to("cpu") +``` + +### Issue: Import errors +**Solution**: Ensure all dependencies are installed: +```bash +pip install -r requirements.txt --upgrade +``` + +## Next Steps + +1. **Experiment**: Try different models from [Huggingface Hub](https://huggingface.co/models) +2. **Learn**: Complete the Jupyter notebook cells +3. **Customize**: Modify generation parameters (temperature, top_k, top_p) +4. **Advanced**: Explore fine-tuning and model customization + +## Resources + +- 📖 [Full Documentation](README.md) +- 🤗 [Huggingface Model Hub](https://huggingface.co/models) +- 📚 [Transformers Docs](https://huggingface.co/docs/transformers) +- 🎓 [Free Course](https://huggingface.co/course) + +## Getting Help + +If you encounter issues: +1. Check the [Huggingface Documentation](https://huggingface.co/docs) +2. Search [Huggingface Forums](https://discuss.huggingface.co/) +3. Review the example notebooks in this repository + +--- + +Happy coding! 🚀 diff --git a/README.md b/README.md index 41bbbe2..e047d15 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,206 @@ -# huggingface_workshop -Opensource LLMs and Customization +# Huggingface Workshop: Opensource LLMs and Customization + +This repository demonstrates how to explore and use opensource Large Language Models (LLMs) from Huggingface Hub, including loading model weights, performing inference, and understanding model architectures. + +## 📚 Contents + +- **explore_llms.ipynb**: Interactive Jupyter notebook with comprehensive examples +- **explore_models.py**: Python script for exploring models programmatically +- **requirements.txt**: Dependencies needed to run the examples + +## 🚀 Getting Started + +### Prerequisites + +- Python 3.8 or higher +- pip package manager +- (Optional) CUDA-capable GPU for faster inference + +### Installation + +1. Clone this repository: +```bash +git clone https://github.com/audiracmichelle/huggingface_workshop.git +cd huggingface_workshop +``` + +2. Install required packages: +```bash +pip install -r requirements.txt +``` + +### Quick Start + +#### Using the Jupyter Notebook + +```bash +jupyter notebook explore_llms.ipynb +``` + +The notebook includes: +- Environment setup and configuration +- Searching and filtering models on Huggingface Hub +- Loading model weights with different methods +- Text generation examples +- Model architecture inspection +- Performance optimization tips + +#### Using the Python Script + +```bash +python explore_models.py +``` + +This will: +- Check your environment setup +- Explore top models on Huggingface Hub +- Load and use GPT-2 for text generation +- Compare model sizes +- Demonstrate the pipeline API + +## 📖 What You'll Learn + +### 1. Exploring Available Models + +Learn how to: +- Search for models by task (text-generation, classification, etc.) +- Filter models by popularity, downloads, and likes +- Get detailed information about specific models +- Understand model cards and documentation + +### 2. Loading Model Weights + +Multiple methods for loading models: +- **Simple loading**: `AutoModelForCausalLM.from_pretrained()` +- **With configuration**: Custom model configurations +- **Memory optimization**: Half-precision (fp16) and quantization +- **Local caching**: Reusing downloaded models + +### 3. Using Models for Inference + +- Basic text generation +- Controlling generation parameters (temperature, top_k, top_p) +- Using the pipeline API for simplified inference +- Batch processing for efficiency + +### 4. Working with Different Model Types + +Examples with: +- **GPT-2**: Causal language modeling +- **BERT**: Masked language modeling +- **DistilGPT-2**: Distilled models for efficiency +- Other popular architectures + +## 🎯 Key Concepts + +### Model Selection + +Consider these factors when choosing a model: +- **Task alignment**: Match model capabilities to your needs +- **Model size**: Balance performance vs. resource constraints +- **License**: Check usage rights (especially for commercial use) +- **Community support**: Popular models have better documentation + +### Model Weights + +Understanding model weights: +- **Parameters**: Total number of trainable parameters +- **Precision**: fp32, fp16, int8 (affects size and speed) +- **Architecture**: Layer types and connections +- **Size on disk**: Storage requirements for downloaded models + +### Performance Optimization + +Tips for efficient model usage: +- Use GPU acceleration when available +- Load models in reduced precision (fp16) +- Implement batch processing +- Cache downloaded models locally +- Use distilled models for faster inference + +## 📊 Example Models + +Here are some recommended models to start with: + +| Model | Size | Use Case | Good For | +|-------|------|----------|----------| +| `gpt2` | 124M | Text generation | Learning, experimentation | +| `distilgpt2` | 82M | Text generation | Resource-constrained environments | +| `bert-base-uncased` | 110M | Classification, NER | Understanding, embeddings | +| `gpt2-medium` | 355M | Text generation | Better quality generation | +| `EleutherAI/gpt-neo-125M` | 125M | Text generation | Open alternative to GPT | + +## 🔧 Advanced Usage + +### Loading Models with Custom Configuration + +```python +from transformers import AutoConfig, AutoModelForCausalLM + +config = AutoConfig.from_pretrained("gpt2") +config.n_layer = 6 # Reduce number of layers +model = AutoModelForCausalLM.from_config(config) +``` + +### Using Half Precision for Memory Efficiency + +```python +model = AutoModelForCausalLM.from_pretrained( + "gpt2", + torch_dtype=torch.float16, + low_cpu_mem_usage=True +) +``` + +### Generating with Custom Parameters + +```python +outputs = model.generate( + input_ids, + max_length=100, + temperature=0.8, # Higher = more random + top_k=50, # Consider top 50 tokens + top_p=0.95, # Nucleus sampling + do_sample=True, # Enable sampling + num_return_sequences=3 # Generate 3 variations +) +``` + +## 📚 Resources + +- [Huggingface Model Hub](https://huggingface.co/models) - Browse thousands of models +- [Transformers Documentation](https://huggingface.co/docs/transformers) - Official docs +- [Huggingface Course](https://huggingface.co/course) - Free online course +- [Model Cards](https://huggingface.co/docs/hub/model-cards) - Understanding model documentation + +## 🤝 Contributing + +Contributions are welcome! Feel free to: +- Add new examples +- Improve documentation +- Report issues +- Suggest new models to showcase + +## 📝 License + +This project is licensed under the Mozilla Public License 2.0 - see the [LICENSE](LICENSE) file for details. + +## ⚠️ Notes + +- Model downloads can be large (several GB). Ensure sufficient disk space. +- First-time model loading will download weights from Huggingface Hub. +- GPU is recommended but not required for these examples. +- Some models may have specific license restrictions for commercial use. + +## 🎓 Next Steps + +After completing this workshop, consider: +1. **Fine-tuning**: Adapt models to your specific domain +2. **Deployment**: Serve models via API endpoints +3. **Optimization**: Explore quantization and pruning techniques +4. **Custom models**: Train your own models from scratch +5. **Multi-modal models**: Work with vision-language models + +--- + +Happy exploring! 🚀 diff --git a/explore_llms.ipynb b/explore_llms.ipynb new file mode 100644 index 0000000..676d8f8 --- /dev/null +++ b/explore_llms.ipynb @@ -0,0 +1,446 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exploring Opensource LLMs on Huggingface\n", + "\n", + "This notebook demonstrates how to explore and use opensource Large Language Models (LLMs) from Huggingface Hub.\n", + "\n", + "## Table of Contents\n", + "1. [Setup and Installation](#setup)\n", + "2. [Exploring Available Models](#exploring)\n", + "3. [Loading Model Weights](#loading)\n", + "4. [Using Models for Inference](#inference)\n", + "5. [Working with Different Model Types](#model-types)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Setup and Installation \n", + "\n", + "First, let's import the necessary libraries and check our environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install required packages\n", + "# !pip install transformers torch huggingface-hub datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import transformers\n", + "from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM\n", + "from huggingface_hub import HfApi, list_models, model_info\n", + "import torch\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "print(f\"Transformers version: {transformers.__version__}\")\n", + "print(f\"PyTorch version: {torch.__version__}\")\n", + "print(f\"CUDA available: {torch.cuda.is_available()}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Exploring Available Models \n", + "\n", + "Huggingface Hub hosts thousands of opensource models. Let's explore how to search and filter them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the Huggingface Hub API\n", + "api = HfApi()\n", + "\n", + "# Search for text-generation models\n", + "models = list(list_models(\n", + " task=\"text-generation\",\n", + " sort=\"downloads\",\n", + " limit=10\n", + "))\n", + "\n", + "print(\"Top 10 Most Downloaded Text Generation Models:\")\n", + "print(\"=\" * 80)\n", + "for i, model in enumerate(models, 1):\n", + " print(f\"{i}. {model.id}\")\n", + " print(f\" Downloads: {model.downloads if hasattr(model, 'downloads') else 'N/A'}\")\n", + " print(f\" Likes: {model.likes if hasattr(model, 'likes') else 'N/A'}\")\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get detailed information about a specific model\n", + "model_id = \"gpt2\"\n", + "info = model_info(model_id)\n", + "\n", + "print(f\"Model Information for '{model_id}':\")\n", + "print(\"=\" * 80)\n", + "print(f\"Model ID: {info.id}\")\n", + "print(f\"Task: {info.pipeline_tag}\")\n", + "print(f\"Library: {info.library_name}\")\n", + "print(f\"Downloads: {info.downloads}\")\n", + "print(f\"Likes: {info.likes}\")\n", + "print(f\"Tags: {info.tags[:5] if info.tags else 'N/A'}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Loading Model Weights \n", + "\n", + "Let's learn how to load model weights from Huggingface Hub. We'll use GPT-2 as an example since it's lightweight and popular." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 1: Load model and tokenizer directly\n", + "model_name = \"gpt2\"\n", + "\n", + "print(f\"Loading model: {model_name}\")\n", + "print(\"This may take a moment as weights are downloaded...\")\n", + "\n", + "# Load tokenizer\n", + "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", + "print(f\"✓ Tokenizer loaded\")\n", + "\n", + "# Load model\n", + "model = AutoModelForCausalLM.from_pretrained(model_name)\n", + "print(f\"✓ Model loaded\")\n", + "\n", + "# Check model size\n", + "num_parameters = sum(p.numel() for p in model.parameters())\n", + "print(f\"\\nModel has {num_parameters:,} parameters\")\n", + "print(f\"Model size: ~{num_parameters * 4 / 1e9:.2f} GB (fp32)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 2: Load model with specific configurations\n", + "from transformers import GPT2Config, GPT2LMHeadModel\n", + "\n", + "# Load configuration\n", + "config = GPT2Config.from_pretrained(model_name)\n", + "print(\"Model Configuration:\")\n", + "print(f\" Vocabulary size: {config.vocab_size}\")\n", + "print(f\" Hidden size: {config.n_embd}\")\n", + "print(f\" Number of layers: {config.n_layer}\")\n", + "print(f\" Number of attention heads: {config.n_head}\")\n", + "print(f\" Max position embeddings: {config.n_positions}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 3: Load model with reduced precision to save memory\n", + "print(\"Loading model with half precision (fp16)...\")\n", + "model_fp16 = AutoModelForCausalLM.from_pretrained(\n", + " model_name,\n", + " torch_dtype=torch.float16,\n", + " low_cpu_mem_usage=True\n", + ")\n", + "print(f\"✓ Model loaded in fp16\")\n", + "print(f\"Memory footprint reduced by ~50%\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Using Models for Inference \n", + "\n", + "Now let's use the loaded model to generate text." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Simple text generation\n", + "prompt = \"Artificial intelligence is\"\n", + "\n", + "# Tokenize input\n", + "inputs = tokenizer(prompt, return_tensors=\"pt\")\n", + "\n", + "# Generate text\n", + "print(f\"Prompt: '{prompt}'\")\n", + "print(\"\\nGenerated text:\")\n", + "print(\"=\" * 80)\n", + "\n", + "outputs = model.generate(\n", + " inputs.input_ids,\n", + " max_length=50,\n", + " num_return_sequences=1,\n", + " temperature=0.7,\n", + " do_sample=True,\n", + " top_k=50,\n", + " top_p=0.95\n", + ")\n", + "\n", + "generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + "print(generated_text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Using the pipeline API (easier interface)\n", + "from transformers import pipeline\n", + "\n", + "# Create a text generation pipeline\n", + "generator = pipeline('text-generation', model=model_name, tokenizer=model_name)\n", + "\n", + "# Generate multiple variations\n", + "prompts = [\n", + " \"The future of technology is\",\n", + " \"In the world of machine learning,\",\n", + " \"Open source software enables\"\n", + "]\n", + "\n", + "print(\"Generated texts using pipeline:\")\n", + "print(\"=\" * 80)\n", + "\n", + "for prompt in prompts:\n", + " result = generator(prompt, max_length=40, num_return_sequences=1)\n", + " print(f\"\\nPrompt: {prompt}\")\n", + " print(f\"Output: {result[0]['generated_text']}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Working with Different Model Types \n", + "\n", + "Let's explore different types of models available on Huggingface Hub." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example 1: BERT for masked language modeling\n", + "from transformers import BertTokenizer, BertForMaskedLM\n", + "\n", + "bert_model_name = \"bert-base-uncased\"\n", + "bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name)\n", + "bert_model = BertForMaskedLM.from_pretrained(bert_model_name)\n", + "\n", + "# Use BERT to predict masked words\n", + "text = \"The capital of France is [MASK].\"\n", + "inputs = bert_tokenizer(text, return_tensors=\"pt\")\n", + "\n", + "with torch.no_grad():\n", + " outputs = bert_model(**inputs)\n", + " predictions = outputs.logits\n", + "\n", + "# Get the predicted token\n", + "mask_token_index = (inputs.input_ids == bert_tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0]\n", + "predicted_token_id = predictions[0, mask_token_index].argmax(axis=-1)\n", + "predicted_token = bert_tokenizer.decode(predicted_token_id)\n", + "\n", + "print(f\"BERT Model Example:\")\n", + "print(f\"Input: {text}\")\n", + "print(f\"Prediction: {predicted_token}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example 2: Smaller LLM models for resource-constrained environments\n", + "print(\"\\nExploring smaller LLM models:\")\n", + "print(\"=\" * 80)\n", + "\n", + "small_models = [\n", + " \"distilgpt2\", # Distilled version of GPT-2 (smaller, faster)\n", + " \"gpt2-medium\", # Medium-sized GPT-2\n", + " \"EleutherAI/gpt-neo-125M\" # GPT-Neo 125M parameters\n", + "]\n", + "\n", + "for model_id in small_models:\n", + " try:\n", + " info = model_info(model_id)\n", + " print(f\"\\n{model_id}:\")\n", + " print(f\" Pipeline: {info.pipeline_tag}\")\n", + " print(f\" Downloads: {info.downloads}\")\n", + " except Exception as e:\n", + " print(f\"\\n{model_id}: Could not fetch info\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example 3: Using DistilGPT-2 (lighter version)\n", + "distil_model_name = \"distilgpt2\"\n", + "distil_tokenizer = AutoTokenizer.from_pretrained(distil_model_name)\n", + "distil_model = AutoModelForCausalLM.from_pretrained(distil_model_name)\n", + "\n", + "# Compare model sizes\n", + "gpt2_params = sum(p.numel() for p in model.parameters())\n", + "distil_params = sum(p.numel() for p in distil_model.parameters())\n", + "\n", + "print(f\"\\nModel Size Comparison:\")\n", + "print(f\"GPT-2: {gpt2_params:,} parameters\")\n", + "print(f\"DistilGPT-2: {distil_params:,} parameters\")\n", + "print(f\"Reduction: {(1 - distil_params/gpt2_params)*100:.1f}%\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Best Practices for Model Usage\n", + "\n", + "### Model Selection Criteria:\n", + "1. **Task Type**: Choose models based on your specific task (text generation, classification, QA, etc.)\n", + "2. **Model Size**: Balance between performance and resource constraints\n", + "3. **License**: Check model licenses for commercial use restrictions\n", + "4. **Community Support**: Popular models have better documentation and community support\n", + "\n", + "### Performance Optimization:\n", + "1. **Use quantization**: Load models in fp16 or int8 for memory savings\n", + "2. **Batch processing**: Process multiple inputs together when possible\n", + "3. **Caching**: Save downloaded models locally to avoid re-downloading\n", + "4. **GPU acceleration**: Use CUDA when available for faster inference\n", + "\n", + "### Exploring More Models:\n", + "- Visit [Huggingface Model Hub](https://huggingface.co/models)\n", + "- Filter by task, library, language, and license\n", + "- Check model cards for detailed information\n", + "- Read the documentation and community discussions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Utility function to inspect model weights\n", + "def inspect_model_weights(model, layer_name=None):\n", + " \"\"\"\n", + " Inspect the weights of a model or specific layer.\n", + " \n", + " Args:\n", + " model: The loaded model\n", + " layer_name: Optional specific layer name to inspect\n", + " \"\"\"\n", + " print(\"Model Architecture:\")\n", + " print(\"=\" * 80)\n", + " \n", + " total_params = 0\n", + " for name, param in model.named_parameters():\n", + " if layer_name is None or layer_name in name:\n", + " print(f\"Layer: {name}\")\n", + " print(f\" Shape: {param.shape}\")\n", + " print(f\" Parameters: {param.numel():,}\")\n", + " print(f\" Dtype: {param.dtype}\")\n", + " print(f\" Requires grad: {param.requires_grad}\")\n", + " print()\n", + " total_params += param.numel()\n", + " \n", + " print(f\"Total parameters: {total_params:,}\")\n", + " return total_params\n", + "\n", + "# Example usage\n", + "print(\"\\nInspecting first few layers of GPT-2:\")\n", + "inspect_model_weights(model, layer_name=\"transformer.wte\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "This notebook demonstrated:\n", + "- How to explore and search for models on Huggingface Hub\n", + "- Different methods to load model weights\n", + "- How to use models for inference\n", + "- Working with different model architectures\n", + "- Best practices for model usage\n", + "\n", + "### Next Steps:\n", + "1. Experiment with different models for your specific use case\n", + "2. Fine-tune models on your own data\n", + "3. Explore model quantization and optimization techniques\n", + "4. Deploy models to production environments\n", + "\n", + "### Resources:\n", + "- [Huggingface Documentation](https://huggingface.co/docs)\n", + "- [Transformers Library](https://github.com/huggingface/transformers)\n", + "- [Model Hub](https://huggingface.co/models)\n", + "- [Datasets Hub](https://huggingface.co/datasets)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/explore_models.py b/explore_models.py new file mode 100644 index 0000000..cdfe201 --- /dev/null +++ b/explore_models.py @@ -0,0 +1,281 @@ +""" +Exploring Opensource LLMs on Huggingface +========================================== + +This script demonstrates how to explore and use opensource Large Language Models (LLMs) +from Huggingface Hub, including loading model weights and performing inference. + +Usage: + python explore_models.py + +Requirements: + - transformers + - torch + - huggingface-hub +""" + +import transformers +from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM +from huggingface_hub import HfApi, list_models, model_info +import torch +import warnings +warnings.filterwarnings('ignore') + + +def check_environment(): + """Check the environment setup.""" + print("=" * 80) + print("Environment Check") + print("=" * 80) + print(f"Transformers version: {transformers.__version__}") + print(f"PyTorch version: {torch.__version__}") + print(f"CUDA available: {torch.cuda.is_available()}") + if torch.cuda.is_available(): + print(f"CUDA device: {torch.cuda.get_device_name(0)}") + print() + + +def explore_models(): + """Explore available models on Huggingface Hub.""" + print("=" * 80) + print("Exploring Top Text Generation Models") + print("=" * 80) + + # Initialize the Huggingface Hub API + api = HfApi() + + # Search for text-generation models + models = list(list_models( + task="text-generation", + sort="downloads", + limit=10 + )) + + print("\nTop 10 Most Downloaded Text Generation Models:") + print("-" * 80) + for i, model in enumerate(models, 1): + print(f"{i}. {model.id}") + print(f" Downloads: {model.downloads if hasattr(model, 'downloads') else 'N/A'}") + print(f" Likes: {model.likes if hasattr(model, 'likes') else 'N/A'}") + print() + + +def get_model_info(model_id="gpt2"): + """Get detailed information about a specific model.""" + print("=" * 80) + print(f"Model Information: {model_id}") + print("=" * 80) + + info = model_info(model_id) + + print(f"Model ID: {info.id}") + print(f"Task: {info.pipeline_tag}") + print(f"Library: {info.library_name}") + print(f"Downloads: {info.downloads}") + print(f"Likes: {info.likes}") + print(f"Tags: {info.tags[:5] if info.tags else 'N/A'}") + print() + + +def load_model_weights(model_name="gpt2"): + """ + Load model weights from Huggingface Hub. + + Args: + model_name: Name of the model to load + + Returns: + tuple: (tokenizer, model) + """ + print("=" * 80) + print(f"Loading Model: {model_name}") + print("=" * 80) + print("This may take a moment as weights are downloaded...") + + # Load tokenizer + tokenizer = AutoTokenizer.from_pretrained(model_name) + print("✓ Tokenizer loaded") + + # Load model + model = AutoModelForCausalLM.from_pretrained(model_name) + print("✓ Model loaded") + + # Check model size + num_parameters = sum(p.numel() for p in model.parameters()) + print(f"\nModel Statistics:") + print(f" Total parameters: {num_parameters:,}") + print(f" Model size (fp32): ~{num_parameters * 4 / 1e9:.2f} GB") + print() + + return tokenizer, model + + +def generate_text(model, tokenizer, prompt="Artificial intelligence is"): + """ + Generate text using the loaded model. + + Args: + model: Loaded model + tokenizer: Loaded tokenizer + prompt: Text prompt for generation + """ + print("=" * 80) + print("Text Generation") + print("=" * 80) + print(f"Prompt: '{prompt}'") + print("\nGenerated text:") + print("-" * 80) + + # Tokenize input + inputs = tokenizer(prompt, return_tensors="pt") + + # Generate text + outputs = model.generate( + inputs.input_ids, + max_length=50, + num_return_sequences=1, + temperature=0.7, + do_sample=True, + top_k=50, + top_p=0.95, + pad_token_id=tokenizer.eos_token_id + ) + + generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) + print(generated_text) + print() + + +def compare_model_sizes(): + """Compare sizes of different models.""" + print("=" * 80) + print("Model Size Comparison") + print("=" * 80) + + models_to_compare = { + "distilgpt2": "DistilGPT-2 (Distilled)", + "gpt2": "GPT-2 (Base)", + } + + for model_name, description in models_to_compare.items(): + try: + model = AutoModelForCausalLM.from_pretrained(model_name) + num_params = sum(p.numel() for p in model.parameters()) + print(f"{description}:") + print(f" Model: {model_name}") + print(f" Parameters: {num_params:,}") + print(f" Size (fp32): ~{num_params * 4 / 1e9:.2f} GB") + print() + del model # Free memory + except Exception as e: + print(f"{description}: Could not load - {e}") + print() + + +def inspect_model_architecture(model, max_layers=5): + """ + Inspect the architecture and weights of a model. + + Args: + model: Loaded model + max_layers: Maximum number of layers to display + """ + print("=" * 80) + print("Model Architecture (First Few Layers)") + print("=" * 80) + + total_params = 0 + layer_count = 0 + + for name, param in model.named_parameters(): + if layer_count >= max_layers: + print(f"... ({len(list(model.named_parameters())) - max_layers} more layers)") + break + + print(f"Layer: {name}") + print(f" Shape: {param.shape}") + print(f" Parameters: {param.numel():,}") + print(f" Dtype: {param.dtype}") + print() + + total_params += param.numel() + layer_count += 1 + + print(f"Total parameters in model: {sum(p.numel() for p in model.parameters()):,}") + print() + + +def demonstrate_pipeline_api(): + """Demonstrate the easier pipeline API.""" + from transformers import pipeline + + print("=" * 80) + print("Using Pipeline API (Simplified Interface)") + print("=" * 80) + + # Create a text generation pipeline + generator = pipeline('text-generation', model='gpt2') + + prompts = [ + "The future of technology is", + "Open source software enables", + "Machine learning can" + ] + + for prompt in prompts: + result = generator(prompt, max_length=40, num_return_sequences=1, pad_token_id=50256) + print(f"Prompt: {prompt}") + print(f"Output: {result[0]['generated_text']}") + print() + + +def main(): + """Main function to run all demonstrations.""" + print("\n") + print("╔" + "=" * 78 + "╗") + print("║" + " " * 15 + "EXPLORING OPENSOURCE LLMs ON HUGGINGFACE" + " " * 22 + "║") + print("╚" + "=" * 78 + "╝") + print("\n") + + # 1. Check environment + check_environment() + + # 2. Explore available models + explore_models() + + # 3. Get detailed model information + get_model_info("gpt2") + + # 4. Load model weights + tokenizer, model = load_model_weights("gpt2") + + # 5. Generate text + generate_text(model, tokenizer, "Artificial intelligence is") + generate_text(model, tokenizer, "The future of machine learning") + + # 6. Inspect model architecture + inspect_model_architecture(model, max_layers=3) + + # 7. Compare model sizes + compare_model_sizes() + + # 8. Demonstrate pipeline API + demonstrate_pipeline_api() + + print("=" * 80) + print("Exploration Complete!") + print("=" * 80) + print("\nNext Steps:") + print(" 1. Try different models from Huggingface Hub") + print(" 2. Experiment with different generation parameters") + print(" 3. Fine-tune models on your own data") + print(" 4. Explore model quantization for efficiency") + print("\nResources:") + print(" - Huggingface Model Hub: https://huggingface.co/models") + print(" - Documentation: https://huggingface.co/docs") + print(" - Transformers Library: https://github.com/huggingface/transformers") + print() + + +if __name__ == "__main__": + main() diff --git a/model_weights_usage.py b/model_weights_usage.py new file mode 100644 index 0000000..604407e --- /dev/null +++ b/model_weights_usage.py @@ -0,0 +1,354 @@ +""" +Practical Examples: Model Weights Usage Patterns +================================================= + +This script demonstrates practical patterns for working with Huggingface model weights, +including efficient loading, saving, and sharing models. + +Usage: + python model_weights_usage.py +""" + +import os +import torch +from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig +from pathlib import Path + + +class ModelWeightsManager: + """Manager class for handling model weights efficiently.""" + + def __init__(self, cache_dir="./model_cache"): + """ + Initialize the model weights manager. + + Args: + cache_dir: Directory to cache downloaded models + """ + self.cache_dir = Path(cache_dir) + self.cache_dir.mkdir(exist_ok=True) + + def download_model(self, model_name, precision="fp32"): + """ + Download and cache a model with specified precision. + + Args: + model_name: Name of the model on Huggingface Hub + precision: Model precision - 'fp32', 'fp16', or 'int8' + + Returns: + tuple: (tokenizer, model) + """ + print(f"\n{'='*80}") + print(f"Downloading Model: {model_name}") + print(f"Precision: {precision}") + print(f"{'='*80}") + + # Load tokenizer + tokenizer = AutoTokenizer.from_pretrained( + model_name, + cache_dir=self.cache_dir + ) + print("✓ Tokenizer loaded") + + # Load model with specified precision + torch_dtype = { + "fp32": torch.float32, + "fp16": torch.float16, + }.get(precision, torch.float32) + + model = AutoModelForCausalLM.from_pretrained( + model_name, + cache_dir=self.cache_dir, + torch_dtype=torch_dtype, + low_cpu_mem_usage=True + ) + print("✓ Model loaded") + + # Model statistics + num_params = sum(p.numel() for p in model.parameters()) + bytes_per_param = { + "fp32": 4, + "fp16": 2, + }.get(precision, 4) + size_gb = (num_params * bytes_per_param) / 1e9 + + print(f"\nModel Statistics:") + print(f" Parameters: {num_params:,}") + print(f" Memory size: ~{size_gb:.2f} GB") + + return tokenizer, model + + def save_model_locally(self, model, tokenizer, save_path): + """ + Save model and tokenizer to local directory. + + Args: + model: Model to save + tokenizer: Tokenizer to save + save_path: Directory path to save to + """ + save_path = Path(save_path) + save_path.mkdir(parents=True, exist_ok=True) + + print(f"\n{'='*80}") + print(f"Saving Model to: {save_path}") + print(f"{'='*80}") + + # Save model + model.save_pretrained(save_path) + print("✓ Model saved") + + # Save tokenizer + tokenizer.save_pretrained(save_path) + print("✓ Tokenizer saved") + + # List saved files + files = list(save_path.glob("*")) + print(f"\nSaved files:") + for f in files: + size_mb = f.stat().st_size / 1e6 + print(f" {f.name} ({size_mb:.2f} MB)") + + def load_local_model(self, model_path): + """ + Load model from local directory. + + Args: + model_path: Path to the saved model directory + + Returns: + tuple: (tokenizer, model) + """ + model_path = Path(model_path) + + print(f"\n{'='*80}") + print(f"Loading Model from: {model_path}") + print(f"{'='*80}") + + if not model_path.exists(): + raise FileNotFoundError(f"Model path not found: {model_path}") + + # Load tokenizer + tokenizer = AutoTokenizer.from_pretrained(model_path) + print("✓ Tokenizer loaded") + + # Load model + model = AutoModelForCausalLM.from_pretrained(model_path) + print("✓ Model loaded") + + return tokenizer, model + + def inspect_weights(self, model, num_layers=3): + """ + Inspect model weight details. + + Args: + model: Model to inspect + num_layers: Number of layers to display + """ + print(f"\n{'='*80}") + print("Model Weight Inspection") + print(f"{'='*80}\n") + + layer_count = 0 + for name, param in model.named_parameters(): + if layer_count >= num_layers: + break + + print(f"Layer: {name}") + print(f" Shape: {param.shape}") + print(f" Size: {param.numel():,} parameters") + print(f" Data type: {param.dtype}") + print(f" Device: {param.device}") + print(f" Requires gradient: {param.requires_grad}") + + # Weight statistics + if param.numel() > 0: + print(f" Min value: {param.min().item():.6f}") + print(f" Max value: {param.max().item():.6f}") + print(f" Mean value: {param.mean().item():.6f}") + print(f" Std dev: {param.std().item():.6f}") + print() + + layer_count += 1 + + total_params = sum(p.numel() for p in model.parameters()) + trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + + print(f"Summary:") + print(f" Total parameters: {total_params:,}") + print(f" Trainable parameters: {trainable_params:,}") + print(f" Non-trainable parameters: {total_params - trainable_params:,}") + + +def demonstrate_weight_sharing(): + """Demonstrate how model weights can be shared between instances.""" + print(f"\n{'='*80}") + print("Demonstrating Weight Sharing") + print(f"{'='*80}\n") + + model_name = "distilgpt2" + + # Load model + print("Loading base model...") + model1 = AutoModelForCausalLM.from_pretrained(model_name) + + # Create second instance that shares weights + print("Creating second instance with shared weights...") + model2 = model1 + + # Verify weight sharing + param1 = next(model1.parameters()) + param2 = next(model2.parameters()) + + print(f"\nWeight sharing verification:") + print(f" Same memory location: {param1.data_ptr() == param2.data_ptr()}") + print(f" Model1 parameter address: {hex(param1.data_ptr())}") + print(f" Model2 parameter address: {hex(param2.data_ptr())}") + + # Memory usage + param_memory = sum(p.numel() * p.element_size() for p in model1.parameters()) + print(f"\nMemory usage:") + print(f" Parameters memory: {param_memory / 1e6:.2f} MB") + print(f" Shared between instances: Yes") + + +def demonstrate_weight_freezing(): + """Demonstrate freezing model weights.""" + print(f"\n{'='*80}") + print("Demonstrating Weight Freezing") + print(f"{'='*80}\n") + + model_name = "distilgpt2" + + # Load model + print("Loading model...") + model = AutoModelForCausalLM.from_pretrained(model_name) + + # Check initial trainable parameters + trainable_before = sum(p.numel() for p in model.parameters() if p.requires_grad) + total_params = sum(p.numel() for p in model.parameters()) + + print(f"\nBefore freezing:") + print(f" Total parameters: {total_params:,}") + print(f" Trainable parameters: {trainable_before:,}") + + # Freeze all parameters except last layer + print("\nFreezing all layers except the last one...") + for name, param in model.named_parameters(): + if "lm_head" not in name: # Keep only the output layer trainable + param.requires_grad = False + + # Check trainable parameters after freezing + trainable_after = sum(p.numel() for p in model.parameters() if p.requires_grad) + + print(f"\nAfter freezing:") + print(f" Total parameters: {total_params:,}") + print(f" Trainable parameters: {trainable_after:,}") + print(f" Frozen parameters: {total_params - trainable_after:,}") + print(f" Reduction: {(1 - trainable_after/trainable_before)*100:.1f}%") + + +def demonstrate_weight_initialization(): + """Demonstrate different weight initialization strategies.""" + print(f"\n{'='*80}") + print("Demonstrating Weight Initialization") + print(f"{'='*80}\n") + + # Load configuration + config = AutoConfig.from_pretrained("gpt2") + config.n_layer = 2 # Smaller model for demonstration + + print("Creating model from configuration...") + model = AutoModelForCausalLM.from_config(config) + + print(f"Model created with random initialization") + print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}") + + # Inspect a few weight values + print("\nSample weight values from first layer:") + first_param = next(model.parameters()) + print(f" Shape: {first_param.shape}") + print(f" First 10 values: {first_param.flatten()[:10].tolist()}") + print(f" Mean: {first_param.mean().item():.6f}") + print(f" Std: {first_param.std().item():.6f}") + + +def main(): + """Main demonstration function.""" + print("\n") + print("╔" + "="*78 + "╗") + print("║" + " "*20 + "MODEL WEIGHTS USAGE PATTERNS" + " "*30 + "║") + print("╚" + "="*78 + "╝") + print("\n") + + # Initialize manager + manager = ModelWeightsManager(cache_dir="./model_cache") + + # 1. Download and cache model + print("\n" + "="*80) + print("1. DOWNLOADING AND CACHING MODELS") + print("="*80) + tokenizer, model = manager.download_model("distilgpt2", precision="fp16") + + # 2. Inspect weights + print("\n" + "="*80) + print("2. INSPECTING MODEL WEIGHTS") + print("="*80) + manager.inspect_weights(model, num_layers=2) + + # 3. Save model locally + print("\n" + "="*80) + print("3. SAVING MODEL LOCALLY") + print("="*80) + save_path = "./saved_models/distilgpt2" + manager.save_model_locally(model, tokenizer, save_path) + + # 4. Load from local path + print("\n" + "="*80) + print("4. LOADING MODEL FROM LOCAL PATH") + print("="*80) + try: + local_tokenizer, local_model = manager.load_local_model(save_path) + print("✓ Successfully loaded from local path") + except Exception as e: + print(f"Note: {e}") + + # 5. Demonstrate weight sharing + print("\n" + "="*80) + print("5. WEIGHT SHARING PATTERNS") + print("="*80) + demonstrate_weight_sharing() + + # 6. Demonstrate weight freezing + print("\n" + "="*80) + print("6. WEIGHT FREEZING FOR FINE-TUNING") + print("="*80) + demonstrate_weight_freezing() + + # 7. Demonstrate weight initialization + print("\n" + "="*80) + print("7. WEIGHT INITIALIZATION") + print("="*80) + demonstrate_weight_initialization() + + print("\n" + "="*80) + print("DEMONSTRATIONS COMPLETE") + print("="*80) + print("\nKey Takeaways:") + print(" • Models are cached locally after first download") + print(" • Different precisions (fp32, fp16) affect memory usage") + print(" • Weights can be frozen for efficient fine-tuning") + print(" • Models can be saved and loaded from local directories") + print(" • Weight sharing reduces memory when using same model multiple times") + print("\nNext Steps:") + print(" • Explore model quantization (int8, int4)") + print(" • Learn about distributed model loading") + print(" • Practice fine-tuning with frozen weights") + print(" • Experiment with different model architectures") + print() + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..10aafd0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +# Core dependencies for exploring and using Huggingface models +transformers>=4.30.0 +torch>=2.0.0 +huggingface-hub>=0.16.0 + +# Additional useful libraries +datasets>=2.14.0 +accelerate>=0.20.0 +safetensors>=0.3.0 + +# Jupyter notebook support (optional) +jupyter>=1.0.0 +ipywidgets>=8.0.0 + +# For better model inspection and visualization +numpy>=1.24.0 +pandas>=2.0.0 + +# Progress bars +tqdm>=4.65.0 diff --git a/simple_demo.py b/simple_demo.py new file mode 100755 index 0000000..65113b1 --- /dev/null +++ b/simple_demo.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +""" +Simple Demo: Quick Text Generation with Huggingface Models +=========================================================== + +This is the simplest possible example to get started with Huggingface LLMs. +Run this script to see text generation in action! + +Usage: + python simple_demo.py + +Or make it executable and run directly: + chmod +x simple_demo.py + ./simple_demo.py +""" + +def main(): + print("\n" + "="*80) + print("🤗 Huggingface LLM Quick Demo") + print("="*80 + "\n") + + print("Loading model... (this may take a moment on first run)") + print("The model will be cached for faster loading next time.\n") + + try: + from transformers import pipeline + import warnings + warnings.filterwarnings('ignore') + + # Create a text generation pipeline with a small model + generator = pipeline('text-generation', model='distilgpt2') + + print("✓ Model loaded successfully!\n") + print("="*80) + print("Generating text samples...") + print("="*80 + "\n") + + # Example prompts + prompts = [ + "Artificial intelligence is", + "The future of technology", + "Machine learning enables us to", + ] + + for i, prompt in enumerate(prompts, 1): + print(f"Example {i}:") + print(f"Prompt: '{prompt}'") + print("-" * 80) + + # Generate text + result = generator( + prompt, + max_length=40, + num_return_sequences=1, + temperature=0.7, + do_sample=True, + pad_token_id=50256 + ) + + generated_text = result[0]['generated_text'] + print(f"Generated: {generated_text}") + print() + + print("="*80) + print("Demo completed successfully! 🎉") + print("="*80 + "\n") + + print("What you just saw:") + print(" • Loaded a lightweight LLM (DistilGPT-2)") + print(" • Generated creative text continuations") + print(" • Used the simple pipeline API\n") + + print("Next steps:") + print(" 1. Run 'python explore_models.py' for more examples") + print(" 2. Open 'explore_llms.ipynb' in Jupyter for interactive learning") + print(" 3. Try different models from https://huggingface.co/models") + print(" 4. Experiment with different prompts and parameters\n") + + print("Tips:") + print(" • Adjust 'temperature' (0.1-1.0) to control randomness") + print(" • Increase 'max_length' for longer outputs") + print(" • Try 'num_return_sequences' > 1 for multiple variations\n") + + except ImportError as e: + print("❌ Error: Required packages not installed") + print("\nPlease install the required packages:") + print(" pip install -r requirements.txt\n") + print(f"Details: {e}\n") + return 1 + + except Exception as e: + print(f"❌ An error occurred: {e}\n") + print("Troubleshooting:") + print(" • Make sure you have an internet connection") + print(" • Check that you have enough disk space (2GB+)") + print(" • Try running: pip install --upgrade transformers torch\n") + return 1 + + return 0 + + +if __name__ == "__main__": + import sys + sys.exit(main())