Merge pull request #3 from openize-com/muhammadumar-patch

muhammadumargroupdocs · web-flow · commit f2a3819dc62b · 2025-06-18T17:38:02.000+05:00
MarkItDown v25.6.0: Gemini &amp; Mistral LLM Integration, CLI Upgrades, and Cleaner API
diff --git a/README.md b/README.md
@@ -4,12 +4,12 @@
 ![License](https://img.shields.io/badge/license-MIT-green)
 ![Status](https://img.shields.io/badge/status-alpha-orange)
 
-Openize.MarkItDown for Python is a package that converts documents into Markdown format. It supports multiple file formats, provides flexible output handling, and integrates with LLMs for extended processing.
+Openize.MarkItDown for Python is a package that converts documents into Markdown format. It supports multiple file formats, provides flexible output handling, and integrates with LLMs for extended processing including OpenAI, Claude, Gemini, and Mistral.
 
 ## Features
 
 - Convert `.docx`, `.pdf`, `.xlsx`, and `.pptx` to Markdown.
-- Save Markdown files locally or send them to an LLM for processing.
+- Save Markdown files locally or send them to an LLM for processing (OpenAI, Claude, Gemini, Mistral).
 - Structured with the **Factory & Strategy Pattern** for scalability.
 - Works with Windows and Linux-compatible paths.
 - Command-line interface for easy use.
@@ -24,17 +24,23 @@ This package depends on the Aspose libraries, which are commercial products:
 
 You'll need to obtain valid licenses for these libraries separately. The package will install these dependencies, but you're responsible for complying with Aspose's licensing terms.
 
+LLM support requires valid API keys and potentially the following dependencies:
+
+- `openai` for OpenAI
+- `anthropic` for Claude
+- `requests` for Gemini and Mistral REST APIs
+
 ## Installation
 
 ### From TestPyPI
 
-```sh
+```bash
 pip install openize-markitdown-python
 ```
 
 ### From Source
 
-```sh
+```bash
 git clone https://github.com/openize-com/openize-markitdown-python.git
 cd openize-markitdown-python\packages\markitdown
 pip install -e . --verbose
@@ -44,12 +50,15 @@ pip install -e . --verbose
 
 ### Command Line Interface
 
-```sh
+```bash
 # Convert a file and save locally
 markitdown document.docx -o output_folder
 
-# Process with an LLM (requires OPENAI_API_KEY environment variable)
-markitdown document.docx -o output_folder --insert_into_llm
+# Process with an LLM (requires corresponding API key)
+markitdown document.docx -o output_folder --llm openai
+markitdown document.docx -o output_folder --llm claude
+markitdown document.docx -o output_folder --llm gemini
+markitdown document.docx -o output_folder --llm mistral
 ```
 
 ### Python API
@@ -61,50 +70,62 @@ from openize.markitdown.core import MarkItDown
 input_file = "report.pdf"
 output_dir = "output_markdown"
 
-# Create MarkItDown instance
-converter = MarkItDown(output_dir)
+# Create MarkItDown instance with desired LLM
+converter = MarkItDown(output_dir, llm_client_name="mistral")
 
 # Convert document and send output to LLM
-converter.convert_document(input_file, insert_into_llm=True)
+converter.convert_document(input_file)
 
 print("Conversion completed and data sent to LLM.")
 ```
 
 ## Environment Variables
 
-- `ASPOSE_LICENSE_PATH`: Required when using the Aspose Paid APIs. This should be set to the full path of your Aspose license file.
-- `OPENAI_API_KEY`: Required when using the `insert_into_llm=True` option or the `--llm` flag.
-- `OPENAI_MODEL`: Specifies the OpenAI model name (default: `gpt-4`).
+| Variable              | Description                                                        |
+|-----------------------|--------------------------------------------------------------------|
+| `ASPOSE_LICENSE_PATH` | Path to Aspose license file (required if using paid features)      |
+| `OPENAI_API_KEY`      | API key for OpenAI integration                                     |
+| `OPENAI_MODEL`        | (Optional) Model name for OpenAI (default: `gpt-4`)                |
+| `CLAUDE_API_KEY`      | API key for Claude integration                                     |
+| `CLAUDE_MODEL`        | (Optional) Model name for Claude (default: `claude-v1`)            |
+| `GEMINI_API_KEY`      | API key for Gemini integration                                     |
+| `GEMINI_MODEL`        | (Optional) Model name for Gemini (default: `gemini-pro`)           |
+| `MISTRAL_API_KEY`     | API key for Mistral integration                                    |
+| `MISTRAL_MODEL`       | (Optional) Model name for Mistral (default: `mistral-medium`)      |
 
-To set these variables:
+### Setting Environment Variables
 
-For Unix-based systems:
+**Unix-based systems:**
 
 ```bash
 export ASPOSE_LICENSE_PATH="/path/to/license"
-export OPENAI_API_KEY="your-api-key"
-export OPENAI_MODEL="gpt-4"
+export OPENAI_API_KEY="your-openai-key"
+export CLAUDE_API_KEY="your-claude-key"
+export GEMINI_API_KEY="your-gemini-key"
+export MISTRAL_API_KEY="your-mistral-key"
 ```
 
-For Windows (PowerShell):
+**Windows (PowerShell):**
 
 ```powershell
 $env:ASPOSE_LICENSE_PATH = "C:\path\to\license"
-$env:OPENAI_API_KEY = "your-api-key"
-$env:OPENAI_MODEL = "gpt-4"
+$env:OPENAI_API_KEY = "your-openai-key"
+$env:CLAUDE_API_KEY = "your-claude-key"
+$env:GEMINI_API_KEY = "your-gemini-key"
+$env:MISTRAL_API_KEY = "your-mistral-key"
 ```
 
-## Contributing  
+## Contributing
 
-We appreciate your interest in contributing to this project! To ensure a smooth collaboration, please follow these steps when submitting a pull request:  
+We appreciate your interest in contributing to this project! To ensure a smooth collaboration, please follow these steps when submitting a pull request:
 
 1. **Fork & Clone** – Fork the repository and clone it to your local machine.  
 2. **Create a Branch** – Use a new branch for your contribution.  
 3. **Sign the Contributor License Agreement (CLA)** – Before your first contribution can be accepted, you must sign our CLA via [CLA Assistant](https://cla-assistant.io). You will be prompted to sign it when submitting your first pull request. You can also review the CLA here: [https://cla.openize.com/agreement](https://cla.openize.com/agreement).  
 4. **Submit a Pull Request (PR)** – Once your changes are ready, open a PR with a clear description.  
 5. **Review & Feedback** – Our maintainers will review your PR and provide feedback if needed.  
 
-By contributing, you agree to the terms of the CLA and confirm that your changes comply with the project's licensing policies.  
+By contributing, you agree to the terms of the CLA and confirm that your changes comply with the project's licensing policies.
 
 ## License
 
diff --git a/packages/markitdown/README.md b/packages/markitdown/README.md
@@ -4,12 +4,12 @@
 ![License](https://img.shields.io/badge/license-MIT-green)
 ![Status](https://img.shields.io/badge/status-alpha-orange)
 
-Openize.MarkItDown for Python converts documents into Markdown format. It supports multiple file formats, provides flexible output handling, and integrates with LLMs for extended processing.
+Openize.MarkItDown for Python converts documents into Markdown format. It supports multiple file formats, provides flexible output handling, and integrates with popular LLMs for post-processing, including OpenAI, Claude, Gemini, and Mistral.
 
 ## Features
 
 - Convert `.docx`, `.pdf`, `.xlsx`, and `.pptx` to Markdown.
-- Save Markdown files locally or send them to an LLM for processing.
+- Save Markdown files locally or send them to an LLM (OpenAI, Claude, Gemini, Mistral).
 - Structured with the **Factory & Strategy Pattern** for scalability.
 - Works with Windows and Linux-compatible paths.
 - Command-line interface for easy use.
@@ -24,73 +24,85 @@ This package depends on the Aspose libraries, which are commercial products:
 
 You'll need to obtain valid licenses for these libraries separately. The package will install these dependencies, but you're responsible for complying with Aspose's licensing terms.
 
-## Installation
+LLM integration may require the following additional packages or valid API credentials:
+
+- `openai` (for OpenAI)
+- `anthropic` (for Claude)
+- `requests` (used for Gemini and Mistral REST APIs)
 
-### From TestPyPI
+## Installation
 
-```sh
+```bash
 pip install openize-markitdown-python
 ```
 
 ## Usage
 
 ### Command Line Interface
 
-```sh
+```bash
 # Convert a file and save locally
 markitdown document.docx -o output_folder
 
-# Process with an LLM (requires OPENAI_API_KEY environment variable)
-markitdown document.docx -o output_folder --insert_into_llm
+# Process with an LLM (requires appropriate API key)
+markitdown document.docx -o output_folder --llm openai
+markitdown document.docx -o output_folder --llm claude
+markitdown document.docx -o output_folder --llm gemini
+markitdown document.docx -o output_folder --llm mistral
 ```
 
 ### Python API
 
 ```python
 from openize.markitdown.core import MarkItDown
 
-# Define input file and output directory
 input_file = "report.pdf"
 output_dir = "output_markdown"
 
-# Create MarkItDown instance
-converter = MarkItDown(output_dir)
-
-# Convert document and send output to LLM
-converter.convert_document(input_file, insert_into_llm=True)
-
-print("Conversion completed and data sent to LLM.")
+converter = MarkItDown(output_dir, llm_client_name="gemini")
+converter.convert_document(input_file)
 
+print("Conversion completed and data sent to Gemini.")
 ```
 
 ## Environment Variables
 
-- `ASPOSE_LICENSE_PATH`: Required when using the Aspose Paid APIs. This should be set to the full path of your Aspose license file.
-- `OPENAI_API_KEY`: Required when using the `insert_into_llm=True` option or the `--llm` flag.
-- `OPENAI_MODEL`: Specifies the OpenAI model name (default: `gpt-4`).
+The following environment variables are used to control license and LLM access:
 
-To set these variables:
+| Variable            | Description                                                |
+|---------------------|------------------------------------------------------------|
+| `ASPOSE_LICENSE_PATH` | Required to activate Aspose license (if using paid APIs)  |
+| `OPENAI_API_KEY`     | Required for OpenAI integration                            |
+| `OPENAI_MODEL`       | (Optional) OpenAI model name (default: `gpt-4`)            |
+| `CLAUDE_API_KEY`     | Required for Claude integration                            |
+| `CLAUDE_MODEL`       | (Optional) Claude model name (default: `claude-v1`)        |
+| `GEMINI_API_KEY`     | Required for Gemini integration                            |
+| `GEMINI_MODEL`       | (Optional) Gemini model name (default: `gemini-pro`)       |
+| `MISTRAL_API_KEY`    | Required for Mistral integration                           |
+| `MISTRAL_MODEL`      | (Optional) Mistral model name (default: `mistral-medium`)  |
 
-For Unix-based systems:
+### Setting Environment Variables
 
+**Unix-based (Linux/macOS):**
 ```bash
 export ASPOSE_LICENSE_PATH="/path/to/license"
-export OPENAI_API_KEY="your-api-key"
-export OPENAI_MODEL="gpt-4"
+export OPENAI_API_KEY="your-openai-key"
+export CLAUDE_API_KEY="your-claude-key"
+export GEMINI_API_KEY="your-gemini-key"
+export MISTRAL_API_KEY="your-mistral-key"
 ```
 
-For Windows (PowerShell):
-
+**Windows PowerShell:**
 ```powershell
 $env:ASPOSE_LICENSE_PATH = "C:\path\to\license"
-$env:OPENAI_API_KEY = "your-api-key"
-$env:OPENAI_MODEL = "gpt-4"
+$env:OPENAI_API_KEY = "your-openai-key"
+$env:CLAUDE_API_KEY = "your-claude-key"
+$env:GEMINI_API_KEY = "your-gemini-key"
+$env:MISTRAL_API_KEY = "your-mistral-key"
 ```
 
 ## License
 
 This package is licensed under the MIT License. However, it depends on Aspose libraries, which are proprietary, closed-source libraries.
 
-⚠️ Users must obtain a valid license for Aspose libraries separately. This repository does not include or distribute any proprietary components.
-
-
+⚠️ You must obtain valid licenses for Aspose libraries separately. This repository does not include or distribute any proprietary components.
diff --git a/packages/markitdown/setup.cfg b/packages/markitdown/setup.cfg
@@ -1,7 +1,7 @@
 
 [metadata]
 name = openize-markitdown-python
-version = 25.5.0
+version = 25.6.0
 
 author = Openize
 author_email = packages@openize.com
@@ -33,7 +33,8 @@ install_requires =
     aspose-cells-python>=23.0.0
     aspose-slides>=23.0.0
     openai>=1.0.0
-    anthropic>=3.0.0
+    anthropic>=0.3.11
+    requests>=2.25.0  # Needed for Gemini and Mistral HTTP API calls
 
 [options.packages.find]
 where = src 
diff --git a/packages/markitdown/setup.py b/packages/markitdown/setup.py
@@ -21,6 +21,10 @@ def install_if_missing(package, module_name=None):
     ("aspose-slides", "asposeslides"),
     ("openai", "openai"),
     ("anthropic", "anthropic"),
+    ("requests", "requests"),  # Required for Gemini/Mistral REST API
+    # Optional SDKs (uncomment if using them instead of raw HTTP)
+    # ("google-generativeai", "google.generativeai"),
+    # ("mistralai", "mistralai"),
 ]
 
 # Install missing dependencies before proceeding
diff --git a/packages/markitdown/src/openize/markitdown/llm_strategy.py b/packages/markitdown/src/openize/markitdown/llm_strategy.py
@@ -81,6 +81,86 @@ def process(self, md_file):
         except Exception as e:
             logging.exception(f"Unexpected error processing {md_file}: {e}")
 
+class GeminiClient(LLMStrategy):
+    def __init__(self):
+        self.api_key = os.getenv("GEMINI_API_KEY")
+        self.model = os.getenv("GEMINI_MODEL", "gemini-pro")
+
+        if not self.api_key:
+            raise ValueError("Missing Gemini API key. Please set it in the environment.")
+
+        self.api_url = f"https://generativelanguage.googleapis.com/v1beta/models/{self.model}:generateContent"
+
+    def process(self, md_file):
+        try:
+            import requests
+
+            with open(md_file, "r", encoding="utf-8") as file:
+                content = file.read()
+
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.api_key}"
+            }
+            data = {
+                "contents": [
+                    {"parts": [{"text": content}]}
+                ]
+            }
+
+            response = requests.post(self.api_url, headers=headers, json=data)
+            response.raise_for_status()
+
+            reply = response.json()
+            text = reply["candidates"][0]["content"]["parts"][0]["text"]
+            logging.info(f"Gemini Response for {md_file}: {text}")
+
+        except FileNotFoundError:
+            logging.error(f"Markdown file not found: {md_file}")
+        except Exception as e:
+            logging.exception(f"Gemini processing error: {e}")
+class MistralClient(LLMStrategy):
+    def __init__(self):
+        self.api_key = os.getenv("MISTRAL_API_KEY")
+        self.model = os.getenv("MISTRAL_MODEL", "mistral-medium")
+
+        if not self.api_key:
+            raise ValueError("Missing Mistral API key. Please set it in the environment.")
+
+        self.api_url = "https://api.mistral.ai/v1/chat/completions"
+
+    def process(self, md_file):
+        try:
+            import requests
+
+            with open(md_file, "r", encoding="utf-8") as file:
+                content = file.read()
+
+            headers = {
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json"
+            }
+            data = {
+                "model": self.model,
+                "messages": [
+                    {"role": "system", "content": "Process this Markdown content."},
+                    {"role": "user", "content": content}
+                ]
+            }
+
+            response = requests.post(self.api_url, headers=headers, json=data)
+            response.raise_for_status()
+
+            result = response.json()
+            message = result["choices"][0]["message"]["content"]
+            logging.info(f"Mistral Response for {md_file}: {message}")
+
+        except FileNotFoundError:
+            logging.error(f"Markdown file not found: {md_file}")
+        except Exception as e:
+            logging.exception(f"Mistral processing error: {e}")
+
+
 class LLMFactory:
     @staticmethod
     def get_llm(client_name: str) -> LLMStrategy:
@@ -89,5 +169,10 @@ def get_llm(client_name: str) -> LLMStrategy:
             return OpenAIClient()
         elif client_name == "claude":
             return ClaudeClient()
+        elif client_name == "gemini":
+            return GeminiClient()
+        elif client_name == "mistral":
+            return MistralClient()
         else:
             raise ValueError(f"Unknown LLM client: {client_name}")
+
diff --git a/packages/markitdown/src/openize/markitdown/main.py b/packages/markitdown/src/openize/markitdown/main.py