From 44cc26bbc6bb0d41f624c9198441c9466a4ebe61 Mon Sep 17 00:00:00 2001
From: Curtis Rueden <ctrueden@wisc.edu>
Date: Mon, 28 Mar 2022 15:21:48 -0500
Subject: [PATCH 1/6] WIP: Skeleton for ImageJ tutorial ingestion

---
 _search/server/tutorials.py | 86 +++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 _search/server/tutorials.py

diff --git a/_search/server/tutorials.py b/_search/server/tutorials.py
new file mode 100644
index 0000000000..acf7d0c76a
--- /dev/null
+++ b/_search/server/tutorials.py
@@ -0,0 +1,86 @@
+#!/bin/env python
+
+# Parse ImageJ tutorials into documents for
+# use with their own searchable collection.
+
+import logging, os, traceback
+import yaml
+from parseutil import first_sentence
+
+
+logger = logging.getLogger(__name__)
+
+
+def is_imagej_tutorials(root):
+    java = os.path.join(root, 'java')
+    notebooks = os.path.join(root, 'notebooks')
+    return os.path.isdir(java) and os.path.isdir(notebooks)
+
+
+def parse_java_source(root, path):
+    logger.debug(f'Parsing Java source file {path}...')
+
+    with open(path) as f:
+        lines = json.read(f)
+
+    # This is dumb -- do we want to do better?
+    doc = {}
+    doc['content'] = ''.join(lines)
+
+    return doc
+
+
+def parse_notebook(root, path):
+    logger.debug(f'Parsing notebook {path}...')
+
+    with open(path) as f:
+        data = json.read(f)
+
+    doc = {}
+    doc['content'] = ''
+    for cell in data['cells']:
+        # TODO: implement process_cell: extract source and output(s) if present
+        doc['content'] += process_cell(cell)
+
+    return doc
+
+
+def find_resources(root, suffix):
+    # TODO: use pathlib to find all .java or .ipynb (based on suffix) inside root.
+    pass
+
+
+def load_imagej_tutorials(root):
+    """
+    Loads the content from the given imagej/tutorials folder.
+    See: https://github.com/imagej/tutorials
+    """
+    java = os.path.join(siteroot, 'java')
+    notebooks = os.path.join(siteroot, 'notebooks')
+    if not os.path.isdir(java) or not os.path.isdir(notebooks):
+        raise ValueError(f'The path {siteroot} does not appear to be a Jekyll site.')
+
+    logger.info('Loading content...')
+    documents = []
+
+    for javafile in find_resources(java, '.java'):
+        try:
+            doc = parse_java_source(root, path)
+            if doc:
+                documents.append(doc)
+        except:
+            logger.error(f'Failed to parse {path}:')
+            traceback.print_exc()
+    logger.info(f'Loaded {len(documents)} documents from Java source files')
+
+    for nbfile in find_resources(notebooks, '.ipynb'):
+        try:
+            doc = parse_notebook(root, path)
+            if doc:
+                documents.append(doc)
+        except:
+            logger.error(f'Failed to parse {path}:')
+            traceback.print_exc()
+    logger.info(f'Loaded {len(documents)} documents from Jupyter notebooks')
+
+    return documents

From 230674f490d2f73adbbeb486b474b30d9ce9c681 Mon Sep 17 00:00:00 2001
From: Curtis Rueden <ctrueden@wisc.edu>
Date: Mon, 28 Mar 2022 15:30:25 -0500
Subject: [PATCH 2/6] Switch to pathlib

---
 _search/server/tutorials.py | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/_search/server/tutorials.py b/_search/server/tutorials.py
index acf7d0c76a..ec7b295524 100644
--- a/_search/server/tutorials.py
+++ b/_search/server/tutorials.py
@@ -3,7 +3,7 @@
 # Parse ImageJ tutorials into documents for
 # use with their own searchable collection.
 
-import logging, os, traceback
+import logging, traceback
 import yaml
 from parseutil import first_sentence
 
@@ -12,12 +12,12 @@
 
 
 def is_imagej_tutorials(root):
-    java = os.path.join(root, 'java')
-    notebooks = os.path.join(root, 'notebooks')
-    return os.path.isdir(java) and os.path.isdir(notebooks)
+    java = Path(root) / 'java'
+    notebooks = Path(root) / 'notebooks'
+    return java.isdir() and notebooks.isdir()
 
 
-def parse_java_source(root, path):
+def parse_java_source(path):
     logger.debug(f'Parsing Java source file {path}...')
 
     with open(path) as f:
@@ -30,7 +30,7 @@ def parse_java_source(root, path):
     return doc
 
 
-def parse_notebook(root, path):
+def parse_notebook(path):
     logger.debug(f'Parsing notebook {path}...')
 
     with open(path) as f:
@@ -45,27 +45,22 @@ def parse_notebook(root, path):
     return doc
 
 
-def find_resources(root, suffix):
-    # TODO: use pathlib to find all .java or .ipynb (based on suffix) inside root.
-    pass
-
-
 def load_imagej_tutorials(root):
     """
     Loads the content from the given imagej/tutorials folder.
     See: https://github.com/imagej/tutorials
     """
-    java = os.path.join(siteroot, 'java')
-    notebooks = os.path.join(siteroot, 'notebooks')
-    if not os.path.isdir(java) or not os.path.isdir(notebooks):
+    java = Path(root) / 'java'
+    notebooks = Path(root) / 'notebooks'
+    if not java.isdir() or not notebooks.isdir():
         raise ValueError(f'The path {siteroot} does not appear to be a Jekyll site.')
 
     logger.info('Loading content...')
     documents = []
 
-    for javafile in find_resources(java, '.java'):
+    for javafile in java.rglob("**/*.java"):
         try:
-            doc = parse_java_source(root, path)
+            doc = parse_java_source(javafile)
             if doc:
                 documents.append(doc)
         except:
@@ -73,9 +68,9 @@ def load_imagej_tutorials(root):
             traceback.print_exc()
     logger.info(f'Loaded {len(documents)} documents from Java source files')
 
-    for nbfile in find_resources(notebooks, '.ipynb'):
+    for nbfile in notebooks.rglob("**/*.ipynb"):
         try:
-            doc = parse_notebook(root, path)
+            doc = parse_notebook(nbfile)
             if doc:
                 documents.append(doc)
         except:

From 33faba750d2ca69f6f9db946359cf4a2f957ae06 Mon Sep 17 00:00:00 2001
From: jackrueth <jjrueth@wisc.eduh>
Date: Mon, 28 Mar 2022 15:31:45 -0500
Subject: [PATCH 3/6] Start fixing the imports

---
 _search/server/tutorials.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/_search/server/tutorials.py b/_search/server/tutorials.py
index ec7b295524..3a32b4cc4f 100644
--- a/_search/server/tutorials.py
+++ b/_search/server/tutorials.py
@@ -4,8 +4,9 @@
 # use with their own searchable collection.
 
 import logging, traceback
-import yaml
+import json
 from parseutil import first_sentence
+from pathlib import Path
 
 
 logger = logging.getLogger(__name__)

From 6f3ed7a517a5029d77c4a97d7ee87e8002d192b7 Mon Sep 17 00:00:00 2001
From: jackrueth <jjrueth@wisc.eduh>
Date: Mon, 28 Mar 2022 16:57:26 -0500
Subject: [PATCH 4/6] Add new elements to tutorials.py

---
 _search/server/index-sites.py |  3 +++
 _search/server/tutorials.py   | 12 +++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/_search/server/index-sites.py b/_search/server/index-sites.py
index 8d500bfe99..7a757b4795 100644
--- a/_search/server/index-sites.py
+++ b/_search/server/index-sites.py
@@ -2,6 +2,7 @@
 
 import logging, os, sys
 import jekyll, ijsite, tsutil
+import tutorials
 
 
 logger = logging.getLogger('indexer')
@@ -12,6 +13,8 @@ def load_site(siteroot):
         return jekyll.load_jekyll_site(siteroot)
     if ijsite.is_imagej_website(siteroot):
         return ijsite.load_site(siteroot)
+    if tutorials.is_imagej_tutorials(siteroot):
+        return tutorials.load_imagej_tutorials(siteroot)
     return None
 
 
diff --git a/_search/server/tutorials.py b/_search/server/tutorials.py
index 3a32b4cc4f..55262eaa48 100644
--- a/_search/server/tutorials.py
+++ b/_search/server/tutorials.py
@@ -45,6 +45,9 @@ def parse_notebook(path):
 
     return doc
 
+def process_cell(cell):
+    return type(cell)
+
 
 def load_imagej_tutorials(root):
     """
@@ -54,7 +57,7 @@ def load_imagej_tutorials(root):
     java = Path(root) / 'java'
     notebooks = Path(root) / 'notebooks'
     if not java.isdir() or not notebooks.isdir():
-        raise ValueError(f'The path {siteroot} does not appear to be a Jekyll site.')
+        raise ValueError(f'The path {root} does not appear to be a Jekyll site.')
 
     logger.info('Loading content...')
     documents = []
@@ -65,7 +68,7 @@ def load_imagej_tutorials(root):
             if doc:
                 documents.append(doc)
         except:
-            logger.error(f'Failed to parse {path}:')
+            logger.error(f'Failed to parse {Path}:')
             traceback.print_exc()
     logger.info(f'Loaded {len(documents)} documents from Java source files')
 
@@ -75,8 +78,11 @@ def load_imagej_tutorials(root):
             if doc:
                 documents.append(doc)
         except:
-            logger.error(f'Failed to parse {path}:')
+            logger.error(f'Failed to parse {Path}:')
             traceback.print_exc()
     logger.info(f'Loaded {len(documents)} documents from Jupyter notebooks')
 
     return documents
+
+def main():
+    print("Hello")

From 6ea51f656469b6f49b7d8a8e27724251f30fc933 Mon Sep 17 00:00:00 2001
From: jackrueth <jjrueth@wisc.eduh>
Date: Thu, 31 Mar 2022 15:31:09 -0500
Subject: [PATCH 5/6] Add comments to process_cell for implementation

---
 _search/server/tutorials.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/_search/server/tutorials.py b/_search/server/tutorials.py
index 55262eaa48..32fd036b03 100644
--- a/_search/server/tutorials.py
+++ b/_search/server/tutorials.py
@@ -46,6 +46,9 @@ def parse_notebook(path):
     return doc
 
 def process_cell(cell):
+    # 2 cases: java file or a notebook
+    # case 1: notebook -> need info inside cells and then info from output lines
+    # case 2: java file -> need class name and class javadoc for description
     return type(cell)
 
 

From b5a8f157505f50825853af5d85fac4a6dc9b9bac Mon Sep 17 00:00:00 2001
From: jackrueth <jjrueth@wisc.eduh>
Date: Thu, 31 Mar 2022 16:51:28 -0500
Subject: [PATCH 6/6] Add new coments to process_cell

---
 _search/server/tutorials.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/_search/server/tutorials.py b/_search/server/tutorials.py
index 32fd036b03..b192a79692 100644
--- a/_search/server/tutorials.py
+++ b/_search/server/tutorials.py
@@ -22,7 +22,7 @@ def parse_java_source(path):
     logger.debug(f'Parsing Java source file {path}...')
 
     with open(path) as f:
-        lines = json.read(f)
+        lines = json.load(f)
 
     # This is dumb -- do we want to do better?
     doc = {}
@@ -35,7 +35,7 @@ def parse_notebook(path):
     logger.debug(f'Parsing notebook {path}...')
 
     with open(path) as f:
-        data = json.read(f)
+        data = json.load(f)
 
     doc = {}
     doc['content'] = ''
@@ -45,10 +45,15 @@ def parse_notebook(path):
 
     return doc
 
+# type of cell is dict for reference
+# 2 cases: java file or a notebook
+# case 1: notebook -> need info inside cells and then info from output lines
+# case 2: java file -> need class name and class javadoc for description
 def process_cell(cell):
-    # 2 cases: java file or a notebook
-    # case 1: notebook -> need info inside cells and then info from output lines
-    # case 2: java file -> need class name and class javadoc for description
+    # case 1: notebook
+
+    # case 2: java files
+
     return type(cell)