Implement dork generator based on langchain

sergio11 · sergio11 · commit 62f643c42cac · 2024-09-19T19:48:50.000+02:00
diff --git a/dork_generator.py b/dork_generator.py
@@ -0,0 +1,89 @@
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+from langchain_core.prompts import ChatPromptTemplate
+import os
+
+class DorkGenerator:
+    """
+    Class representing an AI agent capable of generating Google Dorks based on user-provided descriptions.
+    
+    Attributes:
+        model (ChatGroq): The Groq model used for generating Google Dorks.
+        prompt_template (ChatPromptTemplate): The template for the prompt to generate Google Dorks.
+    """
+
+    def __init__(self, model_id="llama3-70b-8192", groq_api_key=None):
+        """
+        Initializes a new DorkGenerator with the specified Groq model and API key.
+        
+        Args:
+            model_id (str): The ID of the Groq model to use. Default is 'llama3-70b-8192'.
+            groq_api_key (str): The API key for accessing Groq services.
+        """
+        if groq_api_key is None:
+            groq_api_key = os.getenv("GROQ_API_KEY")
+        
+        if not groq_api_key:
+            raise ValueError("Groq API key must be provided either as an argument or an environment variable.")
+
+        self.model = ChatGroq(model=model_id, temperature=0, api_key=groq_api_key)
+        self.prompt_template = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    """
+                    Generate a specific Google Dork based on the user's description. A Google Dork uses advanced search operators to find specific information that is hard to locate through a normal search. 
+                    Your task is to convert the user's description into an accurate Google Dork. Provide only the Google Dork in your response, without any additional text or prefixes. 
+
+                    Here are some examples of how you should formulate the Google Dorks based on different descriptions:
+
+                    Description: PDF documents related to cybersecurity published in the last year.
+                    Google Dork: filetype:pdf "cybersecurity" after:2023-01-01
+
+                    Description: PowerPoint presentations on climate change available on .edu sites.
+                    Google Dork: site:.edu filetype:ppt "climate change"
+
+                    Description: Lists of email addresses in text files within government domains.
+                    Google Dork: site:.gov filetype:txt "email" | "correo electrónico"
+
+                    Now, based on the following user-provided description, generate the corresponding Google Dork:
+                    """
+                ),
+                ("human", "{description}"),
+            ]
+        )
+
+    def generate_dork(self, description):
+        """
+        Generates a Google Dork based on the provided description.
+        
+        Args:
+            description (str): Description provided by the user to generate the Google Dork.
+        
+        Returns:
+            str: Generated Google Dork or None if an error occurs.
+        """
+        try:
+            # Create a chain using the prompt template and the model
+            chain = self.prompt_template | self.model
+            # Invoke the chain with the provided description
+            response = chain.invoke({"description": description})
+            # Extract content from the response
+            # Check the type of response and use appropriate method to access the content
+            if hasattr(response, 'text'):
+                content = response.text.strip()
+            elif hasattr(response, 'content'):
+                content = response.content.strip()
+            else:
+                raise TypeError("Unexpected response type: Unable to extract content.")
+            return content
+        except Exception as e:
+            print(f"Error generating Google Dork: {e}")
+            return None
+
+if __name__ == "__main__":
+    load_dotenv()
+    # Ensure the Groq API key is set in your environment or replace with your actual key
+    api_key = os.getenv("GROQ_API_KEY")
+    dork_generator = DorkGenerator(groq_api_key=api_key)
+    print(dork_generator.generate_dork("List of users and passwords in text file contents."))
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+langchain
+langchain-groq
diff --git a/zerotrace.py b/zerotrace.py
@@ -77,5 +77,5 @@ def process_results(self, results, output_html=None, output_json=None, download=
         if download:
             file_types = download.split(",")
             urls = [result['link'] for result in results]
-            fdownloader = FileDownloader("Downloads")  # Adjust the folder name as needed
+            fdownloader = FileDownloader("Downloads")
             fdownloader.filter_download_files(urls, file_types)