Skip to content

Commit

Permalink
Merge pull request getpelican#671 from wilsonfreitas/rmd
Browse files Browse the repository at this point in the history
rmd_reader: New features and py3 compatibility
  • Loading branch information
justinmayer committed Mar 22, 2016
2 parents e7a728b + 84af54b commit c863cb8
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 30 deletions.
30 changes: 14 additions & 16 deletions rmd_reader/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,26 @@ This plugin calls R to process these files and generates markdown files that are

### Configuration

`rmd_reader` has these 3 variables that can be set in `pelicanconf`.
`rmd_reader` has these variables that can be set in `pelicanconf`.

- `RMD_READER_CLEANUP` (`True`): The RMarkdown file is converted into a Markdown file with the extension `.aux` (to avoid conflicts while pelican is processing). This file is processed by pelican's MarkdownReader and is removed after that (the cleanup step). So if you want to check this file set `RMD_READER_CLEANUP=True`.
- `RMD_READER_RENAME_PLOT` (`True`): the figures generated for plots are named with a default prefix (usually `unnamed-chunk`) followed by a sequential number. That sequence starts on 1 for every processed file, which causes naming conflicts among files. In order to avoid these conflicts `RMD_READER_RENAME_PLOT` can be set `True` and that prefix is replaced with the same name of the post file, without extension. Another way to avoid conflicts is naming the chuncks and in that case this variable can be set `False`.
- `RMD_READER_KNITR_QUIET` (`True`): sets `knitr`'s quiet argument.
- `RMD_READER_KNITR_ENCODING` (`UTF-8`): sets `knitr`'s encoding argument.
- `RMD_READER_KNITR_OPTS_CHUNK` (`None`): sets `knitr`'s `opts_chunk`.
- `RMD_READER_KNITR_OPTS_KNIT` (`None`): sets `knitr`'s `opts_knit`.


### Plotting

The code below must be pasted inside the `.Rmd` file in order to correctly set the `src` attribute of `img` tag.

```{r, echo=FALSE}
hook_plot <- knit_hooks$get('plot')
knit_hooks$set(plot=function(x, options) {
if (!is.null(options$pelican.publish) && options$pelican.publish) {
x <- paste0("{filename}", x)
}
hook_plot(x, options)
})
opts_chunk$set(pelican.publish=TRUE)
```

I usually paste it just after the Markdown header.
There is a R [template](https://github.com/almartin82/pelicanRMD) available that has the base elements needed by `rmd_reader`.
I strongly suggest using the variable `RMD_READER_RENAME_PLOT=True`.
That helps with avoiding naming conflits among different posts.
`rmd_reader` sets knitr's `unnamed.chunk.label` option to the Rmd file name (without extension) in runtime.

Alternatively, Rebecca Weiss (@rjweiss) suggested using `opts_chunk` to set knitr's `fig.path` ([link](http://rjweiss.github.io/articles/2014_08_25/testing-rmarkdown-integration/)).
Now that can be done directly in `pelicanconf` thougth `RMD_READER_KNITR_OPTS_CHUNK`, that variable receives a `dict` with options to be passed to knitr's `opts_chunk`.

```
RMD_READER_KNITR_OPTS_CHUNK = {'fig.path': '../../../figure/'}
```

29 changes: 20 additions & 9 deletions rmd_reader/rmd_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,34 @@
import warnings
import logging

logger = logging.getLogger(__name__)
logger = logging.getLogger('RMD_READER')

from pelican import readers
from pelican import signals
from pelican import settings
from pelican.utils import pelican_open
from markdown import Markdown

knitr = None
rmd = False

def initsignal(pelicanobj):
global knitr,rmd
global knitr, rmd, robjects
try:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
from rpy2.robjects.packages import importr
import rpy2.robjects as robjects
knitr = importr('knitr')
idx = knitr.opts_knit.names.index('set')
PATH = pelicanobj.settings.get('PATH','%s/content' % settings.DEFAULT_CONFIG.get('PATH'))
logger.debug("RMD_READER PATH = %s", PATH)
knitr.opts_knit[idx](**{'base.dir': PATH})
knitroptsknit = pelicanobj.settings.get('RMD_READER_KNITR_OPTS_KNIT', None)
if knitroptsknit:
knitr.opts_knit[idx](**{str(k): v for k,v in knitroptsknit.items()})
idx = knitr.opts_chunk.names.index('set')
knitroptschunk = pelicanobj.settings.get('RMD_READER_KNITR_OPTS_CHUNK', None)
if knitroptschunk is not None:
knitr.opts_chunk[idx](**{str(k): v for k,v in knitroptschunk.iteritems()})
if knitroptschunk:
knitr.opts_chunk[idx](**{str(k): v for k,v in knitroptschunk.items()})
rmd = True
except ImportError as ex:
rmd = False
Expand All @@ -39,24 +41,33 @@ class RmdReader(readers.BaseReader):

@property
def enabled():
global rmd
return rmd

# You need to have a read method, which takes a filename and returns
# some content and the associated metadata.
def read(self, filename):
"""Parse content and metadata of markdown files"""
global knitr
QUIET = self.settings.get('RMD_READER_KNITR_QUIET', True)
ENCODING = self.settings.get('RMD_READER_KNITR_ENCODING', 'UTF-8')
CLEANUP = self.settings.get('RMD_READER_CLEANUP', True)
RENAME_PLOT = self.settings.get('RMD_READER_RENAME_PLOT', True)
logger.debug("RMD_READER_KNITR_QUIET = %s", QUIET)
logger.debug("RMD_READER_KNITR_QUIET = %s", ENCODING)
logger.debug("RMD_READER_KNITR_ENCODING = %s", ENCODING)
logger.debug("RMD_READER_CLEANUP = %s", CLEANUP)
logger.debug("RMD_READER_RENAME_PLOT = %s", RENAME_PLOT)
# replace single backslashes with double backslashes
filename = filename.replace('\\', '\\\\')
# parse Rmd file - generate md file
md_filename = filename.replace('.Rmd', '.aux').replace('.rmd', '.aux')
if RENAME_PLOT:
chunk_label = os.path.splitext(os.path.basename(filename))[0]
logger.debug('Chunk label: %s', chunk_label)
robjects.r('''
opts_knit$set(unnamed.chunk.label="{unnamed_chunk_label}")
render_markdown()
hook_plot <- knit_hooks$get('plot')
knit_hooks$set(plot=function(x, options) hook_plot(paste0("{{filename}}/", x), options))
'''.format(unnamed_chunk_label=chunk_label))
knitr.knit(filename, md_filename, quiet=QUIET, encoding=ENCODING)
# read md file - create a MarkdownReader
md_reader = readers.MarkdownReader(self.settings)
Expand Down
55 changes: 50 additions & 5 deletions rmd_reader/test_rmd_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pelican import Pelican
from pelican.settings import read_settings

logging.basicConfig(stream=sys.stderr, level=logging.CRITICAL)
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)

class Test(unittest.TestCase):

Expand All @@ -19,19 +19,23 @@ def setUp(self):
try:
import rpy2
import rmd_reader
except Exception, e:
except Exception:
raise unittest.SkipTest("rpy not installed. Will not test rmd_reader.")

self.testtitle = 'rtest'
self.cwd = os.path.dirname(os.path.abspath(__file__))
logging.debug(self.cwd)

# Setup content dir and test rmd file
self.contentdir = os.path.join(self.cwd,'test-content')
logging.debug(self.contentdir)
try:
os.mkdir(self.contentdir)
except Exception:
pass
self.contentfile = os.path.join(self.contentdir,'test.rmd')
logging.debug(self.contentfile)

self.testrmd = '''Title: %s
Date: 2014-06-23
Expand All @@ -46,6 +50,8 @@ def setUp(self):

# Setup output dir
self.outputdir = os.path.join(self.cwd,'test-output')
logging.debug(self.outputdir)

try:
os.mkdir(self.outputdir)
except Exception:
Expand All @@ -56,17 +62,20 @@ def setUp(self):


def tearDown(self):
logging.debug('CLEAN')
if os.path.isdir(self.outputdir):
shutil.rmtree(self.outputdir)
if os.path.isdir(self.contentdir):
shutil.rmtree(self.contentdir)


def testKnitrSettings(self):
settings = read_settings(path=None, override={
'LOAD_CONTENT_CACHE': False,
'PATH': self.contentdir,
'OUTPUT_PATH': self.outputdir,
'KNITR_OPTS_CHUNK': {'fig.path' : '%s/' % self.figpath},
'RMD_READER_KNITR_OPTS_CHUNK': {'fig.path' : '%s/' % self.figpath},
'RMD_READER_KNITR_OPTS_KNIT': {'progress' : True, 'verbose': True},
'RMD_READER_RENAME_PLOT': False,
'PLUGIN_PATHS': ['../'],
'PLUGINS': ['rmd_reader'],
})
Expand All @@ -75,12 +84,48 @@ def testKnitrSettings(self):

outputfilename = os.path.join(self.outputdir,'%s.html' % self.testtitle)
self.assertTrue(os.path.exists(outputfilename),'File %s was not created.' % outputfilename)
imagesdir = os.path.join(self.outputdir,self.figpath)

imagesdir = os.path.join(self.outputdir, self.figpath)
self.assertTrue(os.path.exists(imagesdir), 'figpath not created.')

imagefile = os.path.join(imagesdir, 'unnamed-chunk') + '-1-1.png'
logging.debug(imagefile)
images = glob.glob('%s/*' % imagesdir)
logging.debug(images)
self.assertTrue(os.path.exists(imagefile), 'image correctly named.')

self.assertTrue(len(images) == 1,'Contents of images dir is not correct: %s' % ','.join(images))


def testKnitrSettings2(self):
settings = read_settings(path=None, override={
'LOAD_CONTENT_CACHE': False,
'PATH': self.contentdir,
'OUTPUT_PATH': self.outputdir,
'RMD_READER_KNITR_OPTS_CHUNK': {'fig.path' : '%s/' % self.figpath},
'RMD_READER_KNITR_OPTS_KNIT': {'progress' : True, 'verbose': True},
'RMD_READER_RENAME_PLOT': True,
'PLUGIN_PATHS': ['../'],
'PLUGINS': ['rmd_reader'],
})
pelican = Pelican(settings=settings)
pelican.run()

outputfilename = os.path.join(self.outputdir,'%s.html' % self.testtitle)
self.assertTrue(os.path.exists(outputfilename),'File %s was not created.' % outputfilename)

imagesdir = os.path.join(self.outputdir, self.figpath)
self.assertTrue(os.path.exists(imagesdir), 'figpath not created.')

imagefile = os.path.join(imagesdir, os.path.splitext(os.path.split(self.contentfile)[1])[0]) + '-1-1.png'
logging.debug(imagefile)
self.assertTrue(os.path.exists(imagefile), 'image correctly named.')

images = glob.glob('%s/*' % imagesdir)
logging.debug(images)
self.assertTrue(len(images) == 1,'Contents of images dir is not correct: %s' % ','.join(images))



if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
Expand Down

0 comments on commit c863cb8

Please sign in to comment.