diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6c82ede
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,66 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+venv/
+env/
+ENV/
+.venv
+
+# Jupyter Notebook
+.ipynb_checkpoints
+*.ipynb_checkpoints/
+
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+
+# Huggingface cache (downloaded models)
+.cache/
+transformers_cache/
+huggingface_cache/
+
+# Model weights (if saved locally)
+models/
+checkpoints/
+*.pt
+*.pth
+*.bin
+*.safetensors
+
+# Logs
+*.log
+logs/
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+
+# Temporary files
+tmp/
+temp/
+*.tmp
diff --git a/QUICKSTART.md b/QUICKSTART.md
new file mode 100644
index 0000000..fe43b25
--- /dev/null
+++ b/QUICKSTART.md
@@ -0,0 +1,178 @@
+# Quick Start Guide
+
+This guide will help you get started with exploring opensource LLMs on Huggingface in minutes.
+
+## Prerequisites
+
+- Python 3.8+
+- 2GB+ free disk space (for model downloads)
+- Internet connection
+
+## Installation
+
+```bash
+# Clone the repository
+git clone https://github.com/audiracmichelle/huggingface_workshop.git
+cd huggingface_workshop
+
+# Install dependencies
+pip install -r requirements.txt
+```
+
+## Your First Model
+
+### Option 1: Interactive Notebook (Recommended for Learning)
+
+```bash
+jupyter notebook explore_llms.ipynb
+```
+
+Then run the cells sequentially to learn about:
+- Exploring available models
+- Loading model weights
+- Generating text
+- Understanding model architectures
+
+### Option 2: Python Script (Quick Demo)
+
+```bash
+python explore_models.py
+```
+
+This will automatically:
+1. Show top models on Huggingface Hub
+2. Load GPT-2 model
+3. Generate sample text
+4. Display model information
+
+Expected output:
+```
+╔══════════════════════════════════════════════════════════════════════════════╗
+║ EXPLORING OPENSOURCE LLMs ON HUGGINGFACE ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+Environment Check
+================================================================================
+Transformers version: 4.x.x
+PyTorch version: 2.x.x
+CUDA available: True/False
+
+Exploring Top Text Generation Models
+================================================================================
+...
+```
+
+### Option 3: Advanced Model Usage
+
+```bash
+python model_weights_usage.py
+```
+
+This demonstrates:
+- Efficient model caching
+- Saving/loading models locally
+- Weight inspection
+- Memory optimization techniques
+
+## Simple Code Example
+
+Here's a minimal example to get you started:
+
+```python
+from transformers import pipeline
+
+# Create a text generation pipeline (downloads model automatically)
+generator = pipeline('text-generation', model='gpt2')
+
+# Generate text
+result = generator("The future of AI is", max_length=30)
+print(result[0]['generated_text'])
+```
+
+## Common Use Cases
+
+### 1. Text Generation
+
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM
+
+model_name = "gpt2"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+
+# Generate text
+inputs = tokenizer("Hello, I am", return_tensors="pt")
+outputs = model.generate(inputs.input_ids, max_length=50)
+text = tokenizer.decode(outputs[0])
+print(text)
+```
+
+### 2. Exploring Models
+
+```python
+from huggingface_hub import list_models
+
+# Find text generation models
+models = list(list_models(task="text-generation", sort="downloads", limit=5))
+for model in models:
+ print(f"- {model.id}")
+```
+
+### 3. Model Information
+
+```python
+from huggingface_hub import model_info
+
+info = model_info("gpt2")
+print(f"Model: {info.id}")
+print(f"Downloads: {info.downloads}")
+print(f"Task: {info.pipeline_tag}")
+```
+
+## Troubleshooting
+
+### Issue: Model download is slow
+**Solution**: Models are cached after first download. Subsequent loads will be instant.
+
+### Issue: Out of memory error
+**Solution**: Use a smaller model like `distilgpt2` or load in fp16:
+```python
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16)
+```
+
+### Issue: CUDA out of memory
+**Solution**: Use CPU or a smaller model:
+```python
+model = AutoModelForCausalLM.from_pretrained("gpt2").to("cpu")
+```
+
+### Issue: Import errors
+**Solution**: Ensure all dependencies are installed:
+```bash
+pip install -r requirements.txt --upgrade
+```
+
+## Next Steps
+
+1. **Experiment**: Try different models from [Huggingface Hub](https://huggingface.co/models)
+2. **Learn**: Complete the Jupyter notebook cells
+3. **Customize**: Modify generation parameters (temperature, top_k, top_p)
+4. **Advanced**: Explore fine-tuning and model customization
+
+## Resources
+
+- 📖 [Full Documentation](README.md)
+- 🤗 [Huggingface Model Hub](https://huggingface.co/models)
+- 📚 [Transformers Docs](https://huggingface.co/docs/transformers)
+- 🎓 [Free Course](https://huggingface.co/course)
+
+## Getting Help
+
+If you encounter issues:
+1. Check the [Huggingface Documentation](https://huggingface.co/docs)
+2. Search [Huggingface Forums](https://discuss.huggingface.co/)
+3. Review the example notebooks in this repository
+
+---
+
+Happy coding! 🚀
diff --git a/README.md b/README.md
index 41bbbe2..e047d15 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,206 @@
-# huggingface_workshop
-Opensource LLMs and Customization
+# Huggingface Workshop: Opensource LLMs and Customization
+
+This repository demonstrates how to explore and use opensource Large Language Models (LLMs) from Huggingface Hub, including loading model weights, performing inference, and understanding model architectures.
+
+## 📚 Contents
+
+- **explore_llms.ipynb**: Interactive Jupyter notebook with comprehensive examples
+- **explore_models.py**: Python script for exploring models programmatically
+- **requirements.txt**: Dependencies needed to run the examples
+
+## 🚀 Getting Started
+
+### Prerequisites
+
+- Python 3.8 or higher
+- pip package manager
+- (Optional) CUDA-capable GPU for faster inference
+
+### Installation
+
+1. Clone this repository:
+```bash
+git clone https://github.com/audiracmichelle/huggingface_workshop.git
+cd huggingface_workshop
+```
+
+2. Install required packages:
+```bash
+pip install -r requirements.txt
+```
+
+### Quick Start
+
+#### Using the Jupyter Notebook
+
+```bash
+jupyter notebook explore_llms.ipynb
+```
+
+The notebook includes:
+- Environment setup and configuration
+- Searching and filtering models on Huggingface Hub
+- Loading model weights with different methods
+- Text generation examples
+- Model architecture inspection
+- Performance optimization tips
+
+#### Using the Python Script
+
+```bash
+python explore_models.py
+```
+
+This will:
+- Check your environment setup
+- Explore top models on Huggingface Hub
+- Load and use GPT-2 for text generation
+- Compare model sizes
+- Demonstrate the pipeline API
+
+## 📖 What You'll Learn
+
+### 1. Exploring Available Models
+
+Learn how to:
+- Search for models by task (text-generation, classification, etc.)
+- Filter models by popularity, downloads, and likes
+- Get detailed information about specific models
+- Understand model cards and documentation
+
+### 2. Loading Model Weights
+
+Multiple methods for loading models:
+- **Simple loading**: `AutoModelForCausalLM.from_pretrained()`
+- **With configuration**: Custom model configurations
+- **Memory optimization**: Half-precision (fp16) and quantization
+- **Local caching**: Reusing downloaded models
+
+### 3. Using Models for Inference
+
+- Basic text generation
+- Controlling generation parameters (temperature, top_k, top_p)
+- Using the pipeline API for simplified inference
+- Batch processing for efficiency
+
+### 4. Working with Different Model Types
+
+Examples with:
+- **GPT-2**: Causal language modeling
+- **BERT**: Masked language modeling
+- **DistilGPT-2**: Distilled models for efficiency
+- Other popular architectures
+
+## 🎯 Key Concepts
+
+### Model Selection
+
+Consider these factors when choosing a model:
+- **Task alignment**: Match model capabilities to your needs
+- **Model size**: Balance performance vs. resource constraints
+- **License**: Check usage rights (especially for commercial use)
+- **Community support**: Popular models have better documentation
+
+### Model Weights
+
+Understanding model weights:
+- **Parameters**: Total number of trainable parameters
+- **Precision**: fp32, fp16, int8 (affects size and speed)
+- **Architecture**: Layer types and connections
+- **Size on disk**: Storage requirements for downloaded models
+
+### Performance Optimization
+
+Tips for efficient model usage:
+- Use GPU acceleration when available
+- Load models in reduced precision (fp16)
+- Implement batch processing
+- Cache downloaded models locally
+- Use distilled models for faster inference
+
+## 📊 Example Models
+
+Here are some recommended models to start with:
+
+| Model | Size | Use Case | Good For |
+|-------|------|----------|----------|
+| `gpt2` | 124M | Text generation | Learning, experimentation |
+| `distilgpt2` | 82M | Text generation | Resource-constrained environments |
+| `bert-base-uncased` | 110M | Classification, NER | Understanding, embeddings |
+| `gpt2-medium` | 355M | Text generation | Better quality generation |
+| `EleutherAI/gpt-neo-125M` | 125M | Text generation | Open alternative to GPT |
+
+## 🔧 Advanced Usage
+
+### Loading Models with Custom Configuration
+
+```python
+from transformers import AutoConfig, AutoModelForCausalLM
+
+config = AutoConfig.from_pretrained("gpt2")
+config.n_layer = 6 # Reduce number of layers
+model = AutoModelForCausalLM.from_config(config)
+```
+
+### Using Half Precision for Memory Efficiency
+
+```python
+model = AutoModelForCausalLM.from_pretrained(
+ "gpt2",
+ torch_dtype=torch.float16,
+ low_cpu_mem_usage=True
+)
+```
+
+### Generating with Custom Parameters
+
+```python
+outputs = model.generate(
+ input_ids,
+ max_length=100,
+ temperature=0.8, # Higher = more random
+ top_k=50, # Consider top 50 tokens
+ top_p=0.95, # Nucleus sampling
+ do_sample=True, # Enable sampling
+ num_return_sequences=3 # Generate 3 variations
+)
+```
+
+## 📚 Resources
+
+- [Huggingface Model Hub](https://huggingface.co/models) - Browse thousands of models
+- [Transformers Documentation](https://huggingface.co/docs/transformers) - Official docs
+- [Huggingface Course](https://huggingface.co/course) - Free online course
+- [Model Cards](https://huggingface.co/docs/hub/model-cards) - Understanding model documentation
+
+## 🤝 Contributing
+
+Contributions are welcome! Feel free to:
+- Add new examples
+- Improve documentation
+- Report issues
+- Suggest new models to showcase
+
+## 📝 License
+
+This project is licensed under the Mozilla Public License 2.0 - see the [LICENSE](LICENSE) file for details.
+
+## ⚠️ Notes
+
+- Model downloads can be large (several GB). Ensure sufficient disk space.
+- First-time model loading will download weights from Huggingface Hub.
+- GPU is recommended but not required for these examples.
+- Some models may have specific license restrictions for commercial use.
+
+## 🎓 Next Steps
+
+After completing this workshop, consider:
+1. **Fine-tuning**: Adapt models to your specific domain
+2. **Deployment**: Serve models via API endpoints
+3. **Optimization**: Explore quantization and pruning techniques
+4. **Custom models**: Train your own models from scratch
+5. **Multi-modal models**: Work with vision-language models
+
+---
+
+Happy exploring! 🚀
diff --git a/explore_llms.ipynb b/explore_llms.ipynb
new file mode 100644
index 0000000..676d8f8
--- /dev/null
+++ b/explore_llms.ipynb
@@ -0,0 +1,446 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Exploring Opensource LLMs on Huggingface\n",
+ "\n",
+ "This notebook demonstrates how to explore and use opensource Large Language Models (LLMs) from Huggingface Hub.\n",
+ "\n",
+ "## Table of Contents\n",
+ "1. [Setup and Installation](#setup)\n",
+ "2. [Exploring Available Models](#exploring)\n",
+ "3. [Loading Model Weights](#loading)\n",
+ "4. [Using Models for Inference](#inference)\n",
+ "5. [Working with Different Model Types](#model-types)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1. Setup and Installation \n",
+ "\n",
+ "First, let's import the necessary libraries and check our environment."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Install required packages\n",
+ "# !pip install transformers torch huggingface-hub datasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import transformers\n",
+ "from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM\n",
+ "from huggingface_hub import HfApi, list_models, model_info\n",
+ "import torch\n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore')\n",
+ "\n",
+ "print(f\"Transformers version: {transformers.__version__}\")\n",
+ "print(f\"PyTorch version: {torch.__version__}\")\n",
+ "print(f\"CUDA available: {torch.cuda.is_available()}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. Exploring Available Models \n",
+ "\n",
+ "Huggingface Hub hosts thousands of opensource models. Let's explore how to search and filter them."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Initialize the Huggingface Hub API\n",
+ "api = HfApi()\n",
+ "\n",
+ "# Search for text-generation models\n",
+ "models = list(list_models(\n",
+ " task=\"text-generation\",\n",
+ " sort=\"downloads\",\n",
+ " limit=10\n",
+ "))\n",
+ "\n",
+ "print(\"Top 10 Most Downloaded Text Generation Models:\")\n",
+ "print(\"=\" * 80)\n",
+ "for i, model in enumerate(models, 1):\n",
+ " print(f\"{i}. {model.id}\")\n",
+ " print(f\" Downloads: {model.downloads if hasattr(model, 'downloads') else 'N/A'}\")\n",
+ " print(f\" Likes: {model.likes if hasattr(model, 'likes') else 'N/A'}\")\n",
+ " print()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Get detailed information about a specific model\n",
+ "model_id = \"gpt2\"\n",
+ "info = model_info(model_id)\n",
+ "\n",
+ "print(f\"Model Information for '{model_id}':\")\n",
+ "print(\"=\" * 80)\n",
+ "print(f\"Model ID: {info.id}\")\n",
+ "print(f\"Task: {info.pipeline_tag}\")\n",
+ "print(f\"Library: {info.library_name}\")\n",
+ "print(f\"Downloads: {info.downloads}\")\n",
+ "print(f\"Likes: {info.likes}\")\n",
+ "print(f\"Tags: {info.tags[:5] if info.tags else 'N/A'}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 3. Loading Model Weights \n",
+ "\n",
+ "Let's learn how to load model weights from Huggingface Hub. We'll use GPT-2 as an example since it's lightweight and popular."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Method 1: Load model and tokenizer directly\n",
+ "model_name = \"gpt2\"\n",
+ "\n",
+ "print(f\"Loading model: {model_name}\")\n",
+ "print(\"This may take a moment as weights are downloaded...\")\n",
+ "\n",
+ "# Load tokenizer\n",
+ "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+ "print(f\"✓ Tokenizer loaded\")\n",
+ "\n",
+ "# Load model\n",
+ "model = AutoModelForCausalLM.from_pretrained(model_name)\n",
+ "print(f\"✓ Model loaded\")\n",
+ "\n",
+ "# Check model size\n",
+ "num_parameters = sum(p.numel() for p in model.parameters())\n",
+ "print(f\"\\nModel has {num_parameters:,} parameters\")\n",
+ "print(f\"Model size: ~{num_parameters * 4 / 1e9:.2f} GB (fp32)\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Method 2: Load model with specific configurations\n",
+ "from transformers import GPT2Config, GPT2LMHeadModel\n",
+ "\n",
+ "# Load configuration\n",
+ "config = GPT2Config.from_pretrained(model_name)\n",
+ "print(\"Model Configuration:\")\n",
+ "print(f\" Vocabulary size: {config.vocab_size}\")\n",
+ "print(f\" Hidden size: {config.n_embd}\")\n",
+ "print(f\" Number of layers: {config.n_layer}\")\n",
+ "print(f\" Number of attention heads: {config.n_head}\")\n",
+ "print(f\" Max position embeddings: {config.n_positions}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Method 3: Load model with reduced precision to save memory\n",
+ "print(\"Loading model with half precision (fp16)...\")\n",
+ "model_fp16 = AutoModelForCausalLM.from_pretrained(\n",
+ " model_name,\n",
+ " torch_dtype=torch.float16,\n",
+ " low_cpu_mem_usage=True\n",
+ ")\n",
+ "print(f\"✓ Model loaded in fp16\")\n",
+ "print(f\"Memory footprint reduced by ~50%\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 4. Using Models for Inference \n",
+ "\n",
+ "Now let's use the loaded model to generate text."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Simple text generation\n",
+ "prompt = \"Artificial intelligence is\"\n",
+ "\n",
+ "# Tokenize input\n",
+ "inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
+ "\n",
+ "# Generate text\n",
+ "print(f\"Prompt: '{prompt}'\")\n",
+ "print(\"\\nGenerated text:\")\n",
+ "print(\"=\" * 80)\n",
+ "\n",
+ "outputs = model.generate(\n",
+ " inputs.input_ids,\n",
+ " max_length=50,\n",
+ " num_return_sequences=1,\n",
+ " temperature=0.7,\n",
+ " do_sample=True,\n",
+ " top_k=50,\n",
+ " top_p=0.95\n",
+ ")\n",
+ "\n",
+ "generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
+ "print(generated_text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Using the pipeline API (easier interface)\n",
+ "from transformers import pipeline\n",
+ "\n",
+ "# Create a text generation pipeline\n",
+ "generator = pipeline('text-generation', model=model_name, tokenizer=model_name)\n",
+ "\n",
+ "# Generate multiple variations\n",
+ "prompts = [\n",
+ " \"The future of technology is\",\n",
+ " \"In the world of machine learning,\",\n",
+ " \"Open source software enables\"\n",
+ "]\n",
+ "\n",
+ "print(\"Generated texts using pipeline:\")\n",
+ "print(\"=\" * 80)\n",
+ "\n",
+ "for prompt in prompts:\n",
+ " result = generator(prompt, max_length=40, num_return_sequences=1)\n",
+ " print(f\"\\nPrompt: {prompt}\")\n",
+ " print(f\"Output: {result[0]['generated_text']}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 5. Working with Different Model Types \n",
+ "\n",
+ "Let's explore different types of models available on Huggingface Hub."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Example 1: BERT for masked language modeling\n",
+ "from transformers import BertTokenizer, BertForMaskedLM\n",
+ "\n",
+ "bert_model_name = \"bert-base-uncased\"\n",
+ "bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name)\n",
+ "bert_model = BertForMaskedLM.from_pretrained(bert_model_name)\n",
+ "\n",
+ "# Use BERT to predict masked words\n",
+ "text = \"The capital of France is [MASK].\"\n",
+ "inputs = bert_tokenizer(text, return_tensors=\"pt\")\n",
+ "\n",
+ "with torch.no_grad():\n",
+ " outputs = bert_model(**inputs)\n",
+ " predictions = outputs.logits\n",
+ "\n",
+ "# Get the predicted token\n",
+ "mask_token_index = (inputs.input_ids == bert_tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0]\n",
+ "predicted_token_id = predictions[0, mask_token_index].argmax(axis=-1)\n",
+ "predicted_token = bert_tokenizer.decode(predicted_token_id)\n",
+ "\n",
+ "print(f\"BERT Model Example:\")\n",
+ "print(f\"Input: {text}\")\n",
+ "print(f\"Prediction: {predicted_token}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Example 2: Smaller LLM models for resource-constrained environments\n",
+ "print(\"\\nExploring smaller LLM models:\")\n",
+ "print(\"=\" * 80)\n",
+ "\n",
+ "small_models = [\n",
+ " \"distilgpt2\", # Distilled version of GPT-2 (smaller, faster)\n",
+ " \"gpt2-medium\", # Medium-sized GPT-2\n",
+ " \"EleutherAI/gpt-neo-125M\" # GPT-Neo 125M parameters\n",
+ "]\n",
+ "\n",
+ "for model_id in small_models:\n",
+ " try:\n",
+ " info = model_info(model_id)\n",
+ " print(f\"\\n{model_id}:\")\n",
+ " print(f\" Pipeline: {info.pipeline_tag}\")\n",
+ " print(f\" Downloads: {info.downloads}\")\n",
+ " except Exception as e:\n",
+ " print(f\"\\n{model_id}: Could not fetch info\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Example 3: Using DistilGPT-2 (lighter version)\n",
+ "distil_model_name = \"distilgpt2\"\n",
+ "distil_tokenizer = AutoTokenizer.from_pretrained(distil_model_name)\n",
+ "distil_model = AutoModelForCausalLM.from_pretrained(distil_model_name)\n",
+ "\n",
+ "# Compare model sizes\n",
+ "gpt2_params = sum(p.numel() for p in model.parameters())\n",
+ "distil_params = sum(p.numel() for p in distil_model.parameters())\n",
+ "\n",
+ "print(f\"\\nModel Size Comparison:\")\n",
+ "print(f\"GPT-2: {gpt2_params:,} parameters\")\n",
+ "print(f\"DistilGPT-2: {distil_params:,} parameters\")\n",
+ "print(f\"Reduction: {(1 - distil_params/gpt2_params)*100:.1f}%\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Best Practices for Model Usage\n",
+ "\n",
+ "### Model Selection Criteria:\n",
+ "1. **Task Type**: Choose models based on your specific task (text generation, classification, QA, etc.)\n",
+ "2. **Model Size**: Balance between performance and resource constraints\n",
+ "3. **License**: Check model licenses for commercial use restrictions\n",
+ "4. **Community Support**: Popular models have better documentation and community support\n",
+ "\n",
+ "### Performance Optimization:\n",
+ "1. **Use quantization**: Load models in fp16 or int8 for memory savings\n",
+ "2. **Batch processing**: Process multiple inputs together when possible\n",
+ "3. **Caching**: Save downloaded models locally to avoid re-downloading\n",
+ "4. **GPU acceleration**: Use CUDA when available for faster inference\n",
+ "\n",
+ "### Exploring More Models:\n",
+ "- Visit [Huggingface Model Hub](https://huggingface.co/models)\n",
+ "- Filter by task, library, language, and license\n",
+ "- Check model cards for detailed information\n",
+ "- Read the documentation and community discussions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Utility function to inspect model weights\n",
+ "def inspect_model_weights(model, layer_name=None):\n",
+ " \"\"\"\n",
+ " Inspect the weights of a model or specific layer.\n",
+ " \n",
+ " Args:\n",
+ " model: The loaded model\n",
+ " layer_name: Optional specific layer name to inspect\n",
+ " \"\"\"\n",
+ " print(\"Model Architecture:\")\n",
+ " print(\"=\" * 80)\n",
+ " \n",
+ " total_params = 0\n",
+ " for name, param in model.named_parameters():\n",
+ " if layer_name is None or layer_name in name:\n",
+ " print(f\"Layer: {name}\")\n",
+ " print(f\" Shape: {param.shape}\")\n",
+ " print(f\" Parameters: {param.numel():,}\")\n",
+ " print(f\" Dtype: {param.dtype}\")\n",
+ " print(f\" Requires grad: {param.requires_grad}\")\n",
+ " print()\n",
+ " total_params += param.numel()\n",
+ " \n",
+ " print(f\"Total parameters: {total_params:,}\")\n",
+ " return total_params\n",
+ "\n",
+ "# Example usage\n",
+ "print(\"\\nInspecting first few layers of GPT-2:\")\n",
+ "inspect_model_weights(model, layer_name=\"transformer.wte\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Conclusion\n",
+ "\n",
+ "This notebook demonstrated:\n",
+ "- How to explore and search for models on Huggingface Hub\n",
+ "- Different methods to load model weights\n",
+ "- How to use models for inference\n",
+ "- Working with different model architectures\n",
+ "- Best practices for model usage\n",
+ "\n",
+ "### Next Steps:\n",
+ "1. Experiment with different models for your specific use case\n",
+ "2. Fine-tune models on your own data\n",
+ "3. Explore model quantization and optimization techniques\n",
+ "4. Deploy models to production environments\n",
+ "\n",
+ "### Resources:\n",
+ "- [Huggingface Documentation](https://huggingface.co/docs)\n",
+ "- [Transformers Library](https://github.com/huggingface/transformers)\n",
+ "- [Model Hub](https://huggingface.co/models)\n",
+ "- [Datasets Hub](https://huggingface.co/datasets)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/explore_models.py b/explore_models.py
new file mode 100644
index 0000000..cdfe201
--- /dev/null
+++ b/explore_models.py
@@ -0,0 +1,281 @@
+"""
+Exploring Opensource LLMs on Huggingface
+==========================================
+
+This script demonstrates how to explore and use opensource Large Language Models (LLMs)
+from Huggingface Hub, including loading model weights and performing inference.
+
+Usage:
+ python explore_models.py
+
+Requirements:
+ - transformers
+ - torch
+ - huggingface-hub
+"""
+
+import transformers
+from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
+from huggingface_hub import HfApi, list_models, model_info
+import torch
+import warnings
+warnings.filterwarnings('ignore')
+
+
+def check_environment():
+ """Check the environment setup."""
+ print("=" * 80)
+ print("Environment Check")
+ print("=" * 80)
+ print(f"Transformers version: {transformers.__version__}")
+ print(f"PyTorch version: {torch.__version__}")
+ print(f"CUDA available: {torch.cuda.is_available()}")
+ if torch.cuda.is_available():
+ print(f"CUDA device: {torch.cuda.get_device_name(0)}")
+ print()
+
+
+def explore_models():
+ """Explore available models on Huggingface Hub."""
+ print("=" * 80)
+ print("Exploring Top Text Generation Models")
+ print("=" * 80)
+
+ # Initialize the Huggingface Hub API
+ api = HfApi()
+
+ # Search for text-generation models
+ models = list(list_models(
+ task="text-generation",
+ sort="downloads",
+ limit=10
+ ))
+
+ print("\nTop 10 Most Downloaded Text Generation Models:")
+ print("-" * 80)
+ for i, model in enumerate(models, 1):
+ print(f"{i}. {model.id}")
+ print(f" Downloads: {model.downloads if hasattr(model, 'downloads') else 'N/A'}")
+ print(f" Likes: {model.likes if hasattr(model, 'likes') else 'N/A'}")
+ print()
+
+
+def get_model_info(model_id="gpt2"):
+ """Get detailed information about a specific model."""
+ print("=" * 80)
+ print(f"Model Information: {model_id}")
+ print("=" * 80)
+
+ info = model_info(model_id)
+
+ print(f"Model ID: {info.id}")
+ print(f"Task: {info.pipeline_tag}")
+ print(f"Library: {info.library_name}")
+ print(f"Downloads: {info.downloads}")
+ print(f"Likes: {info.likes}")
+ print(f"Tags: {info.tags[:5] if info.tags else 'N/A'}")
+ print()
+
+
+def load_model_weights(model_name="gpt2"):
+ """
+ Load model weights from Huggingface Hub.
+
+ Args:
+ model_name: Name of the model to load
+
+ Returns:
+ tuple: (tokenizer, model)
+ """
+ print("=" * 80)
+ print(f"Loading Model: {model_name}")
+ print("=" * 80)
+ print("This may take a moment as weights are downloaded...")
+
+ # Load tokenizer
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
+ print("✓ Tokenizer loaded")
+
+ # Load model
+ model = AutoModelForCausalLM.from_pretrained(model_name)
+ print("✓ Model loaded")
+
+ # Check model size
+ num_parameters = sum(p.numel() for p in model.parameters())
+ print(f"\nModel Statistics:")
+ print(f" Total parameters: {num_parameters:,}")
+ print(f" Model size (fp32): ~{num_parameters * 4 / 1e9:.2f} GB")
+ print()
+
+ return tokenizer, model
+
+
+def generate_text(model, tokenizer, prompt="Artificial intelligence is"):
+ """
+ Generate text using the loaded model.
+
+ Args:
+ model: Loaded model
+ tokenizer: Loaded tokenizer
+ prompt: Text prompt for generation
+ """
+ print("=" * 80)
+ print("Text Generation")
+ print("=" * 80)
+ print(f"Prompt: '{prompt}'")
+ print("\nGenerated text:")
+ print("-" * 80)
+
+ # Tokenize input
+ inputs = tokenizer(prompt, return_tensors="pt")
+
+ # Generate text
+ outputs = model.generate(
+ inputs.input_ids,
+ max_length=50,
+ num_return_sequences=1,
+ temperature=0.7,
+ do_sample=True,
+ top_k=50,
+ top_p=0.95,
+ pad_token_id=tokenizer.eos_token_id
+ )
+
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+ print(generated_text)
+ print()
+
+
+def compare_model_sizes():
+ """Compare sizes of different models."""
+ print("=" * 80)
+ print("Model Size Comparison")
+ print("=" * 80)
+
+ models_to_compare = {
+ "distilgpt2": "DistilGPT-2 (Distilled)",
+ "gpt2": "GPT-2 (Base)",
+ }
+
+ for model_name, description in models_to_compare.items():
+ try:
+ model = AutoModelForCausalLM.from_pretrained(model_name)
+ num_params = sum(p.numel() for p in model.parameters())
+ print(f"{description}:")
+ print(f" Model: {model_name}")
+ print(f" Parameters: {num_params:,}")
+ print(f" Size (fp32): ~{num_params * 4 / 1e9:.2f} GB")
+ print()
+ del model # Free memory
+ except Exception as e:
+ print(f"{description}: Could not load - {e}")
+ print()
+
+
+def inspect_model_architecture(model, max_layers=5):
+ """
+ Inspect the architecture and weights of a model.
+
+ Args:
+ model: Loaded model
+ max_layers: Maximum number of layers to display
+ """
+ print("=" * 80)
+ print("Model Architecture (First Few Layers)")
+ print("=" * 80)
+
+ total_params = 0
+ layer_count = 0
+
+ for name, param in model.named_parameters():
+ if layer_count >= max_layers:
+ print(f"... ({len(list(model.named_parameters())) - max_layers} more layers)")
+ break
+
+ print(f"Layer: {name}")
+ print(f" Shape: {param.shape}")
+ print(f" Parameters: {param.numel():,}")
+ print(f" Dtype: {param.dtype}")
+ print()
+
+ total_params += param.numel()
+ layer_count += 1
+
+ print(f"Total parameters in model: {sum(p.numel() for p in model.parameters()):,}")
+ print()
+
+
+def demonstrate_pipeline_api():
+ """Demonstrate the easier pipeline API."""
+ from transformers import pipeline
+
+ print("=" * 80)
+ print("Using Pipeline API (Simplified Interface)")
+ print("=" * 80)
+
+ # Create a text generation pipeline
+ generator = pipeline('text-generation', model='gpt2')
+
+ prompts = [
+ "The future of technology is",
+ "Open source software enables",
+ "Machine learning can"
+ ]
+
+ for prompt in prompts:
+ result = generator(prompt, max_length=40, num_return_sequences=1, pad_token_id=50256)
+ print(f"Prompt: {prompt}")
+ print(f"Output: {result[0]['generated_text']}")
+ print()
+
+
+def main():
+ """Main function to run all demonstrations."""
+ print("\n")
+ print("╔" + "=" * 78 + "╗")
+ print("║" + " " * 15 + "EXPLORING OPENSOURCE LLMs ON HUGGINGFACE" + " " * 22 + "║")
+ print("╚" + "=" * 78 + "╝")
+ print("\n")
+
+ # 1. Check environment
+ check_environment()
+
+ # 2. Explore available models
+ explore_models()
+
+ # 3. Get detailed model information
+ get_model_info("gpt2")
+
+ # 4. Load model weights
+ tokenizer, model = load_model_weights("gpt2")
+
+ # 5. Generate text
+ generate_text(model, tokenizer, "Artificial intelligence is")
+ generate_text(model, tokenizer, "The future of machine learning")
+
+ # 6. Inspect model architecture
+ inspect_model_architecture(model, max_layers=3)
+
+ # 7. Compare model sizes
+ compare_model_sizes()
+
+ # 8. Demonstrate pipeline API
+ demonstrate_pipeline_api()
+
+ print("=" * 80)
+ print("Exploration Complete!")
+ print("=" * 80)
+ print("\nNext Steps:")
+ print(" 1. Try different models from Huggingface Hub")
+ print(" 2. Experiment with different generation parameters")
+ print(" 3. Fine-tune models on your own data")
+ print(" 4. Explore model quantization for efficiency")
+ print("\nResources:")
+ print(" - Huggingface Model Hub: https://huggingface.co/models")
+ print(" - Documentation: https://huggingface.co/docs")
+ print(" - Transformers Library: https://github.com/huggingface/transformers")
+ print()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/model_weights_usage.py b/model_weights_usage.py
new file mode 100644
index 0000000..604407e
--- /dev/null
+++ b/model_weights_usage.py
@@ -0,0 +1,354 @@
+"""
+Practical Examples: Model Weights Usage Patterns
+=================================================
+
+This script demonstrates practical patterns for working with Huggingface model weights,
+including efficient loading, saving, and sharing models.
+
+Usage:
+ python model_weights_usage.py
+"""
+
+import os
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
+from pathlib import Path
+
+
+class ModelWeightsManager:
+ """Manager class for handling model weights efficiently."""
+
+ def __init__(self, cache_dir="./model_cache"):
+ """
+ Initialize the model weights manager.
+
+ Args:
+ cache_dir: Directory to cache downloaded models
+ """
+ self.cache_dir = Path(cache_dir)
+ self.cache_dir.mkdir(exist_ok=True)
+
+ def download_model(self, model_name, precision="fp32"):
+ """
+ Download and cache a model with specified precision.
+
+ Args:
+ model_name: Name of the model on Huggingface Hub
+ precision: Model precision - 'fp32', 'fp16', or 'int8'
+
+ Returns:
+ tuple: (tokenizer, model)
+ """
+ print(f"\n{'='*80}")
+ print(f"Downloading Model: {model_name}")
+ print(f"Precision: {precision}")
+ print(f"{'='*80}")
+
+ # Load tokenizer
+ tokenizer = AutoTokenizer.from_pretrained(
+ model_name,
+ cache_dir=self.cache_dir
+ )
+ print("✓ Tokenizer loaded")
+
+ # Load model with specified precision
+ torch_dtype = {
+ "fp32": torch.float32,
+ "fp16": torch.float16,
+ }.get(precision, torch.float32)
+
+ model = AutoModelForCausalLM.from_pretrained(
+ model_name,
+ cache_dir=self.cache_dir,
+ torch_dtype=torch_dtype,
+ low_cpu_mem_usage=True
+ )
+ print("✓ Model loaded")
+
+ # Model statistics
+ num_params = sum(p.numel() for p in model.parameters())
+ bytes_per_param = {
+ "fp32": 4,
+ "fp16": 2,
+ }.get(precision, 4)
+ size_gb = (num_params * bytes_per_param) / 1e9
+
+ print(f"\nModel Statistics:")
+ print(f" Parameters: {num_params:,}")
+ print(f" Memory size: ~{size_gb:.2f} GB")
+
+ return tokenizer, model
+
+ def save_model_locally(self, model, tokenizer, save_path):
+ """
+ Save model and tokenizer to local directory.
+
+ Args:
+ model: Model to save
+ tokenizer: Tokenizer to save
+ save_path: Directory path to save to
+ """
+ save_path = Path(save_path)
+ save_path.mkdir(parents=True, exist_ok=True)
+
+ print(f"\n{'='*80}")
+ print(f"Saving Model to: {save_path}")
+ print(f"{'='*80}")
+
+ # Save model
+ model.save_pretrained(save_path)
+ print("✓ Model saved")
+
+ # Save tokenizer
+ tokenizer.save_pretrained(save_path)
+ print("✓ Tokenizer saved")
+
+ # List saved files
+ files = list(save_path.glob("*"))
+ print(f"\nSaved files:")
+ for f in files:
+ size_mb = f.stat().st_size / 1e6
+ print(f" {f.name} ({size_mb:.2f} MB)")
+
+ def load_local_model(self, model_path):
+ """
+ Load model from local directory.
+
+ Args:
+ model_path: Path to the saved model directory
+
+ Returns:
+ tuple: (tokenizer, model)
+ """
+ model_path = Path(model_path)
+
+ print(f"\n{'='*80}")
+ print(f"Loading Model from: {model_path}")
+ print(f"{'='*80}")
+
+ if not model_path.exists():
+ raise FileNotFoundError(f"Model path not found: {model_path}")
+
+ # Load tokenizer
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
+ print("✓ Tokenizer loaded")
+
+ # Load model
+ model = AutoModelForCausalLM.from_pretrained(model_path)
+ print("✓ Model loaded")
+
+ return tokenizer, model
+
+ def inspect_weights(self, model, num_layers=3):
+ """
+ Inspect model weight details.
+
+ Args:
+ model: Model to inspect
+ num_layers: Number of layers to display
+ """
+ print(f"\n{'='*80}")
+ print("Model Weight Inspection")
+ print(f"{'='*80}\n")
+
+ layer_count = 0
+ for name, param in model.named_parameters():
+ if layer_count >= num_layers:
+ break
+
+ print(f"Layer: {name}")
+ print(f" Shape: {param.shape}")
+ print(f" Size: {param.numel():,} parameters")
+ print(f" Data type: {param.dtype}")
+ print(f" Device: {param.device}")
+ print(f" Requires gradient: {param.requires_grad}")
+
+ # Weight statistics
+ if param.numel() > 0:
+ print(f" Min value: {param.min().item():.6f}")
+ print(f" Max value: {param.max().item():.6f}")
+ print(f" Mean value: {param.mean().item():.6f}")
+ print(f" Std dev: {param.std().item():.6f}")
+ print()
+
+ layer_count += 1
+
+ total_params = sum(p.numel() for p in model.parameters())
+ trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+ print(f"Summary:")
+ print(f" Total parameters: {total_params:,}")
+ print(f" Trainable parameters: {trainable_params:,}")
+ print(f" Non-trainable parameters: {total_params - trainable_params:,}")
+
+
+def demonstrate_weight_sharing():
+ """Demonstrate how model weights can be shared between instances."""
+ print(f"\n{'='*80}")
+ print("Demonstrating Weight Sharing")
+ print(f"{'='*80}\n")
+
+ model_name = "distilgpt2"
+
+ # Load model
+ print("Loading base model...")
+ model1 = AutoModelForCausalLM.from_pretrained(model_name)
+
+ # Create second instance that shares weights
+ print("Creating second instance with shared weights...")
+ model2 = model1
+
+ # Verify weight sharing
+ param1 = next(model1.parameters())
+ param2 = next(model2.parameters())
+
+ print(f"\nWeight sharing verification:")
+ print(f" Same memory location: {param1.data_ptr() == param2.data_ptr()}")
+ print(f" Model1 parameter address: {hex(param1.data_ptr())}")
+ print(f" Model2 parameter address: {hex(param2.data_ptr())}")
+
+ # Memory usage
+ param_memory = sum(p.numel() * p.element_size() for p in model1.parameters())
+ print(f"\nMemory usage:")
+ print(f" Parameters memory: {param_memory / 1e6:.2f} MB")
+ print(f" Shared between instances: Yes")
+
+
+def demonstrate_weight_freezing():
+ """Demonstrate freezing model weights."""
+ print(f"\n{'='*80}")
+ print("Demonstrating Weight Freezing")
+ print(f"{'='*80}\n")
+
+ model_name = "distilgpt2"
+
+ # Load model
+ print("Loading model...")
+ model = AutoModelForCausalLM.from_pretrained(model_name)
+
+ # Check initial trainable parameters
+ trainable_before = sum(p.numel() for p in model.parameters() if p.requires_grad)
+ total_params = sum(p.numel() for p in model.parameters())
+
+ print(f"\nBefore freezing:")
+ print(f" Total parameters: {total_params:,}")
+ print(f" Trainable parameters: {trainable_before:,}")
+
+ # Freeze all parameters except last layer
+ print("\nFreezing all layers except the last one...")
+ for name, param in model.named_parameters():
+ if "lm_head" not in name: # Keep only the output layer trainable
+ param.requires_grad = False
+
+ # Check trainable parameters after freezing
+ trainable_after = sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+ print(f"\nAfter freezing:")
+ print(f" Total parameters: {total_params:,}")
+ print(f" Trainable parameters: {trainable_after:,}")
+ print(f" Frozen parameters: {total_params - trainable_after:,}")
+ print(f" Reduction: {(1 - trainable_after/trainable_before)*100:.1f}%")
+
+
+def demonstrate_weight_initialization():
+ """Demonstrate different weight initialization strategies."""
+ print(f"\n{'='*80}")
+ print("Demonstrating Weight Initialization")
+ print(f"{'='*80}\n")
+
+ # Load configuration
+ config = AutoConfig.from_pretrained("gpt2")
+ config.n_layer = 2 # Smaller model for demonstration
+
+ print("Creating model from configuration...")
+ model = AutoModelForCausalLM.from_config(config)
+
+ print(f"Model created with random initialization")
+ print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")
+
+ # Inspect a few weight values
+ print("\nSample weight values from first layer:")
+ first_param = next(model.parameters())
+ print(f" Shape: {first_param.shape}")
+ print(f" First 10 values: {first_param.flatten()[:10].tolist()}")
+ print(f" Mean: {first_param.mean().item():.6f}")
+ print(f" Std: {first_param.std().item():.6f}")
+
+
+def main():
+ """Main demonstration function."""
+ print("\n")
+ print("╔" + "="*78 + "╗")
+ print("║" + " "*20 + "MODEL WEIGHTS USAGE PATTERNS" + " "*30 + "║")
+ print("╚" + "="*78 + "╝")
+ print("\n")
+
+ # Initialize manager
+ manager = ModelWeightsManager(cache_dir="./model_cache")
+
+ # 1. Download and cache model
+ print("\n" + "="*80)
+ print("1. DOWNLOADING AND CACHING MODELS")
+ print("="*80)
+ tokenizer, model = manager.download_model("distilgpt2", precision="fp16")
+
+ # 2. Inspect weights
+ print("\n" + "="*80)
+ print("2. INSPECTING MODEL WEIGHTS")
+ print("="*80)
+ manager.inspect_weights(model, num_layers=2)
+
+ # 3. Save model locally
+ print("\n" + "="*80)
+ print("3. SAVING MODEL LOCALLY")
+ print("="*80)
+ save_path = "./saved_models/distilgpt2"
+ manager.save_model_locally(model, tokenizer, save_path)
+
+ # 4. Load from local path
+ print("\n" + "="*80)
+ print("4. LOADING MODEL FROM LOCAL PATH")
+ print("="*80)
+ try:
+ local_tokenizer, local_model = manager.load_local_model(save_path)
+ print("✓ Successfully loaded from local path")
+ except Exception as e:
+ print(f"Note: {e}")
+
+ # 5. Demonstrate weight sharing
+ print("\n" + "="*80)
+ print("5. WEIGHT SHARING PATTERNS")
+ print("="*80)
+ demonstrate_weight_sharing()
+
+ # 6. Demonstrate weight freezing
+ print("\n" + "="*80)
+ print("6. WEIGHT FREEZING FOR FINE-TUNING")
+ print("="*80)
+ demonstrate_weight_freezing()
+
+ # 7. Demonstrate weight initialization
+ print("\n" + "="*80)
+ print("7. WEIGHT INITIALIZATION")
+ print("="*80)
+ demonstrate_weight_initialization()
+
+ print("\n" + "="*80)
+ print("DEMONSTRATIONS COMPLETE")
+ print("="*80)
+ print("\nKey Takeaways:")
+ print(" • Models are cached locally after first download")
+ print(" • Different precisions (fp32, fp16) affect memory usage")
+ print(" • Weights can be frozen for efficient fine-tuning")
+ print(" • Models can be saved and loaded from local directories")
+ print(" • Weight sharing reduces memory when using same model multiple times")
+ print("\nNext Steps:")
+ print(" • Explore model quantization (int8, int4)")
+ print(" • Learn about distributed model loading")
+ print(" • Practice fine-tuning with frozen weights")
+ print(" • Experiment with different model architectures")
+ print()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..10aafd0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,20 @@
+# Core dependencies for exploring and using Huggingface models
+transformers>=4.30.0
+torch>=2.0.0
+huggingface-hub>=0.16.0
+
+# Additional useful libraries
+datasets>=2.14.0
+accelerate>=0.20.0
+safetensors>=0.3.0
+
+# Jupyter notebook support (optional)
+jupyter>=1.0.0
+ipywidgets>=8.0.0
+
+# For better model inspection and visualization
+numpy>=1.24.0
+pandas>=2.0.0
+
+# Progress bars
+tqdm>=4.65.0
diff --git a/simple_demo.py b/simple_demo.py
new file mode 100755
index 0000000..65113b1
--- /dev/null
+++ b/simple_demo.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+"""
+Simple Demo: Quick Text Generation with Huggingface Models
+===========================================================
+
+This is the simplest possible example to get started with Huggingface LLMs.
+Run this script to see text generation in action!
+
+Usage:
+ python simple_demo.py
+
+Or make it executable and run directly:
+ chmod +x simple_demo.py
+ ./simple_demo.py
+"""
+
+def main():
+ print("\n" + "="*80)
+ print("🤗 Huggingface LLM Quick Demo")
+ print("="*80 + "\n")
+
+ print("Loading model... (this may take a moment on first run)")
+ print("The model will be cached for faster loading next time.\n")
+
+ try:
+ from transformers import pipeline
+ import warnings
+ warnings.filterwarnings('ignore')
+
+ # Create a text generation pipeline with a small model
+ generator = pipeline('text-generation', model='distilgpt2')
+
+ print("✓ Model loaded successfully!\n")
+ print("="*80)
+ print("Generating text samples...")
+ print("="*80 + "\n")
+
+ # Example prompts
+ prompts = [
+ "Artificial intelligence is",
+ "The future of technology",
+ "Machine learning enables us to",
+ ]
+
+ for i, prompt in enumerate(prompts, 1):
+ print(f"Example {i}:")
+ print(f"Prompt: '{prompt}'")
+ print("-" * 80)
+
+ # Generate text
+ result = generator(
+ prompt,
+ max_length=40,
+ num_return_sequences=1,
+ temperature=0.7,
+ do_sample=True,
+ pad_token_id=50256
+ )
+
+ generated_text = result[0]['generated_text']
+ print(f"Generated: {generated_text}")
+ print()
+
+ print("="*80)
+ print("Demo completed successfully! 🎉")
+ print("="*80 + "\n")
+
+ print("What you just saw:")
+ print(" • Loaded a lightweight LLM (DistilGPT-2)")
+ print(" • Generated creative text continuations")
+ print(" • Used the simple pipeline API\n")
+
+ print("Next steps:")
+ print(" 1. Run 'python explore_models.py' for more examples")
+ print(" 2. Open 'explore_llms.ipynb' in Jupyter for interactive learning")
+ print(" 3. Try different models from https://huggingface.co/models")
+ print(" 4. Experiment with different prompts and parameters\n")
+
+ print("Tips:")
+ print(" • Adjust 'temperature' (0.1-1.0) to control randomness")
+ print(" • Increase 'max_length' for longer outputs")
+ print(" • Try 'num_return_sequences' > 1 for multiple variations\n")
+
+ except ImportError as e:
+ print("❌ Error: Required packages not installed")
+ print("\nPlease install the required packages:")
+ print(" pip install -r requirements.txt\n")
+ print(f"Details: {e}\n")
+ return 1
+
+ except Exception as e:
+ print(f"❌ An error occurred: {e}\n")
+ print("Troubleshooting:")
+ print(" • Make sure you have an internet connection")
+ print(" • Check that you have enough disk space (2GB+)")
+ print(" • Try running: pip install --upgrade transformers torch\n")
+ return 1
+
+ return 0
+
+
+if __name__ == "__main__":
+ import sys
+ sys.exit(main())