diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..398ff08
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,2 @@
+[run]
+branch = True
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..76a3f15
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,14 @@
+*.pyc
+*.joblib
+*egg-info
+.tox
+build
+dist
+.idea
+.ipynb_checkpoints
+htmlcov
+.coverage
+.joblib
+.cache
+__pycache__
+docs/_build
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..65b6e1a
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,33 @@
+language: python
+python: 3.5
+sudo: false
+branches:
+    only:
+        - master
+        - /^\d\.\d+$/
+
+env:
+    - TOXENV=py27
+    - TOXENV=py33
+    - TOXENV=py35
+
+addons:
+    apt:
+        packages:
+            - python-numpy
+            - python-scipy
+            - libatlas-base-dev
+            - liblapack-dev
+            - gfortran
+
+install:
+    - pip install -U pip tox codecov
+
+script: travis_wait tox
+
+after_success:
+    - codecov
+
+cache:
+    directories:
+        - $HOME/.cache/pip
diff --git a/CHANGES.rst b/CHANGES.rst
new file mode 100644
index 0000000..964cca8
--- /dev/null
+++ b/CHANGES.rst
@@ -0,0 +1,7 @@
+Changes
+=======
+
+0.1 (2015-11-27)
+----------------
+
+Initial release.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..ae1ebe3
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,8 @@
+include README.rst
+include CHANGES.rst
+include docs/Makefile
+include docs/make.bat
+include docs/conf.py
+
+recursive-include tests *.py
+recursive-include docs *.rst
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..f3652d8
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,32 @@
+================
+sklearn-crfsuite
+================
+
+.. image:: https://img.shields.io/pypi/v/sklearn-crfsuite.svg
+   :target: https://pypi.python.org/pypi/sklearn-crfsuite
+   :alt: PyPI Version
+
+.. image:: https://img.shields.io/travis/TeamHG-Memex/sklearn-crfsuite/master.svg
+   :target: http://travis-ci.org/TeamHG-Memex/sklearn-crfsuite
+   :alt: Build Status
+
+.. image:: http://codecov.io/github/TeamHG-Memex/sklearn-crfsuite/coverage.svg?branch=master
+   :target: http://codecov.io/github/TeamHG-Memex/sklearn-crfsuite?branch=master
+   :alt: Code Coverage
+
+.. image:: https://readthedocs.org/projects/sklearn-crfsuite/badge/?version=latest
+   :target: http://sklearn-crfsuite.readthedocs.org/en/latest/?badge=latest
+   :alt: Documentation
+
+sklearn-crfsuite is a thin CRFsuite_ (python-crfsuite_) wrapper which provides
+interface simlar to scikit-learn_. ``sklearn_crfsuite.CRF`` is a scikit-learn
+compatible estimator: you can use e.g. scikit-learn model
+selection utilities (cross-validation, hyperparameter optimization) with it.
+
+.. _CRFsuite: http://www.chokkan.org/software/crfsuite/
+.. _python-crfsuite: https://github.com/tpeng/python-crfsuite
+.. _scikit-learn: http://scikit-learn.org/
+
+License is MIT.
+
+Documentation can be found `here <http://sklearn-crfsuite.readthedocs.org>`_.
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d013a9d
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,192 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  applehelp  to make an Apple Help Book"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  xml        to make Docutils-native XML files"
+	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+	@echo "  coverage   to run coverage check of the documentation (if enabled)"
+
+clean:
+	rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/sklearn-crfsuite.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/sklearn-crfsuite.qhc"
+
+applehelp:
+	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
+	@echo
+	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
+	@echo "N.B. You won't be able to view it unless you put it in" \
+	      "~/Library/Documentation/Help or install it in your application" \
+	      "bundle."
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/sklearn-crfsuite"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/sklearn-crfsuite"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through platex and dvipdfmx..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+coverage:
+	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
+	@echo "Testing of coverage in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/coverage/python.txt."
+
+xml:
+	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+	@echo
+	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+	@echo
+	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/docs/api.rst b/docs/api.rst
new file mode 100644
index 0000000..1f4649d
--- /dev/null
+++ b/docs/api.rst
@@ -0,0 +1,14 @@
+.. _api:
+
+API Reference
+=============
+
+CRF
+---
+
+.. automodule:: sklearn_crfsuite
+    :members:
+
+.. autoclass:: CRF
+    :members:
+
diff --git a/docs/changes.rst b/docs/changes.rst
new file mode 100644
index 0000000..d9e113e
--- /dev/null
+++ b/docs/changes.rst
@@ -0,0 +1 @@
+.. include:: ../CHANGES.rst
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..2195abb
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,308 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# sklearn-crfsuite documentation build configuration file, created by
+# sphinx-quickstart on Fri Nov 27 03:50:38 2015.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+import shlex
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.insert(0, os.path.abspath('..'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.viewcode',
+    'sphinx.ext.napoleon',
+    'alabaster',
+]
+numpydoc_show_class_members = False
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = 'sklearn-crfsuite'
+copyright = '2015, Mikhail Korobov'
+author = 'Mikhail Korobov'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.1'
+# The full version, including alpha/beta/rc tags.
+release = '0.1'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+
+import alabaster
+
+html_theme_path = [alabaster.get_path()]
+html_theme = 'alabaster'
+html_sidebars = {
+    '**': [
+        'about.html',
+        'navigation.html',
+        # 'relations.html',
+        'searchbox.html',
+        # 'donate.html',
+    ]
+}
+
+html_theme_options = {
+    'description': 'CRFsuite (python-crfsuite) wrapper which provides interface simlar to scikit-learn.',
+    'github_user': 'TeamHG-Memex',
+    'github_repo': 'sklearn-crfsuite',
+    'github_banner': True,
+    'github_button': False,
+}
+
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+#   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
+#   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
+#html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# Now only 'ja' uses this config value
+#html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'sklearn-crfsuitedoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+
+# Latex figure (float) alignment
+#'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+  (master_doc, 'sklearn-crfsuite.tex', 'sklearn-crfsuite Documentation',
+   'Mikhail Korobov', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'sklearn-crfsuite', 'sklearn-crfsuite Documentation',
+     [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  (master_doc, 'sklearn-crfsuite', 'sklearn-crfsuite Documentation',
+   author, 'sklearn-crfsuite', 'One line description of project.',
+   'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
diff --git a/docs/contributing.rst b/docs/contributing.rst
new file mode 100644
index 0000000..3841781
--- /dev/null
+++ b/docs/contributing.rst
@@ -0,0 +1,33 @@
+Contributing
+============
+
+* Source code: https://github.com/TeamHG-Memex/sklearn-crfsuite
+* Issue tracker: https://github.com/TeamHG-Memex/sklearn-crfsuite/issues
+
+Feel free to submit ideas, bugs reports and pull requests.
+
+In order to run tests install tox_, then type
+
+::
+
+    tox
+
+from the source checkout.
+
+.. _tox: http://tox.testrun.org
+
+Authors
+-------
+
+* Mikhail Korobov <kmike84@gmail.com>
+
+The code was initially extracted from
+`webstruct <https://github.com/scrapinghub/webstruct>`_ and
+`morphine <https://github.com/kmike/morphine>`_ projects and then
+cleaned up and improved.
+
+
+License
+-------
+
+License is MIT.
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..7168fd6
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,42 @@
+================
+sklearn-crfsuite
+================
+
+.. image:: https://img.shields.io/pypi/v/sklearn-crfsuite.svg
+   :target: https://pypi.python.org/pypi/sklearn-crfsuite
+   :alt: PyPI Version
+
+.. image:: https://img.shields.io/travis/TeamHG-Memex/sklearn-crfsuite/master.svg
+   :target: http://travis-ci.org/TeamHG-Memex/sklearn-crfsuite
+   :alt: Build Status
+
+.. image:: http://codecov.io/github/TeamHG-Memex/sklearn-crfsuite/coverage.svg?branch=master
+   :target: http://codecov.io/github/TeamHG-Memex/sklearn-crfsuite?branch=master
+   :alt: Code Coverage
+
+.. image:: https://readthedocs.org/projects/sklearn-crfsuite/badge/?version=latest
+   :target: http://sklearn-crfsuite.readthedocs.org/en/latest/?badge=latest
+   :alt: Documentation
+
+sklearn-crfsuite is thin a CRFsuite_ (python-crfsuite_) wrapper which provides
+scikit-learn_-compatible :class:`sklearn_crfsuite.CRF` estimator:
+you can use e.g. scikit-learn model selection utilities
+(cross-validation, hyperparameter optimization) with it.
+
+.. _CRFsuite: http://www.chokkan.org/software/crfsuite/
+.. _python-crfsuite: https://github.com/tpeng/python-crfsuite
+.. _scikit-learn: http://scikit-learn.org/
+
+License is MIT.
+
+Contents
+========
+
+.. toctree::
+   :maxdepth: 1
+
+   install
+   tutorial
+   api
+   contributing
+   changes
diff --git a/docs/install.rst b/docs/install.rst
new file mode 100644
index 0000000..043c8b7
--- /dev/null
+++ b/docs/install.rst
@@ -0,0 +1,13 @@
+Install Instructions
+====================
+
+Make sure scikit-learn_ is installed, then run
+
+::
+
+    pip install sklearn-crfsuite
+
+sklearn-crfsuite requires Python 2.7+ or 3.3+.
+
+
+.. _scikit-learn: http://scikit-learn.org/
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..28eb9f3
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,263 @@
+@ECHO OFF
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set BUILDDIR=_build
+set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
+set I18NSPHINXOPTS=%SPHINXOPTS% .
+if NOT "%PAPER%" == "" (
+	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+	:help
+	echo.Please use `make ^<target^>` where ^<target^> is one of
+	echo.  html       to make standalone HTML files
+	echo.  dirhtml    to make HTML files named index.html in directories
+	echo.  singlehtml to make a single large HTML file
+	echo.  pickle     to make pickle files
+	echo.  json       to make JSON files
+	echo.  htmlhelp   to make HTML files and a HTML help project
+	echo.  qthelp     to make HTML files and a qthelp project
+	echo.  devhelp    to make HTML files and a Devhelp project
+	echo.  epub       to make an epub
+	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+	echo.  text       to make text files
+	echo.  man        to make manual pages
+	echo.  texinfo    to make Texinfo files
+	echo.  gettext    to make PO message catalogs
+	echo.  changes    to make an overview over all changed/added/deprecated items
+	echo.  xml        to make Docutils-native XML files
+	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
+	echo.  linkcheck  to check all external links for integrity
+	echo.  doctest    to run all doctests embedded in the documentation if enabled
+	echo.  coverage   to run coverage check of the documentation if enabled
+	goto end
+)
+
+if "%1" == "clean" (
+	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+	del /q /s %BUILDDIR%\*
+	goto end
+)
+
+
+REM Check if sphinx-build is available and fallback to Python version if any
+%SPHINXBUILD% 2> nul
+if errorlevel 9009 goto sphinx_python
+goto sphinx_ok
+
+:sphinx_python
+
+set SPHINXBUILD=python -m sphinx.__init__
+%SPHINXBUILD% 2> nul
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+:sphinx_ok
+
+
+if "%1" == "html" (
+	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+	goto end
+)
+
+if "%1" == "dirhtml" (
+	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+	goto end
+)
+
+if "%1" == "singlehtml" (
+	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
+	goto end
+)
+
+if "%1" == "pickle" (
+	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the pickle files.
+	goto end
+)
+
+if "%1" == "json" (
+	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the JSON files.
+	goto end
+)
+
+if "%1" == "htmlhelp" (
+	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+	goto end
+)
+
+if "%1" == "qthelp" (
+	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\sklearn-crfsuite.qhcp
+	echo.To view the help file:
+	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\sklearn-crfsuite.ghc
+	goto end
+)
+
+if "%1" == "devhelp" (
+	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished.
+	goto end
+)
+
+if "%1" == "epub" (
+	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The epub file is in %BUILDDIR%/epub.
+	goto end
+)
+
+if "%1" == "latex" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdf" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf
+	cd %~dp0
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdfja" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf-ja
+	cd %~dp0
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "text" (
+	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The text files are in %BUILDDIR%/text.
+	goto end
+)
+
+if "%1" == "man" (
+	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The manual pages are in %BUILDDIR%/man.
+	goto end
+)
+
+if "%1" == "texinfo" (
+	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
+	goto end
+)
+
+if "%1" == "gettext" (
+	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
+	goto end
+)
+
+if "%1" == "changes" (
+	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.The overview file is in %BUILDDIR%/changes.
+	goto end
+)
+
+if "%1" == "linkcheck" (
+	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+	goto end
+)
+
+if "%1" == "doctest" (
+	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+	goto end
+)
+
+if "%1" == "coverage" (
+	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Testing of coverage in the sources finished, look at the ^
+results in %BUILDDIR%/coverage/python.txt.
+	goto end
+)
+
+if "%1" == "xml" (
+	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The XML files are in %BUILDDIR%/xml.
+	goto end
+)
+
+if "%1" == "pseudoxml" (
+	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
+	goto end
+)
+
+:end
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
new file mode 100644
index 0000000..2a01406
--- /dev/null
+++ b/docs/tutorial.rst
@@ -0,0 +1,4 @@
+Tutorial
+========
+
+TODO
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e4bb154
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+tqdm >= 2.0
+tabulate >= 0.7.5
+scikit-learn >= 0.15
+python-crfsuite >= 0.8.3
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..3c6e79c
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[bdist_wheel]
+universal=1
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..e844460
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+from setuptools import setup
+
+setup(
+    name='sklearn-crfsuite',
+    version='0.1',
+    author='Mikhail Korobov',
+    author_email='kmike84@gmail.com',
+    license='MIT license',
+    long_description=open('README.rst').read() + "\n\n" + open('CHANGES.rst').read(),
+    description="CRFsuite (python-crfsuite) wrapper which provides interface simlar to scikit-learn",
+    url='https://github.com/TeamHG-Memex/sklearn-crfsuite',
+    zip_safe=False,
+    packages=['sklearn_crfsuite'],
+    install_requires=[
+        "tqdm >= 2.0",
+        "six",
+        "tabulate",
+        "python-crfsuite >= 0.8.3"
+    ],
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'License :: OSI Approved :: MIT License',
+        'Intended Audience :: Developers',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+    ],
+)
diff --git a/sklearn_crfsuite/__init__.py b/sklearn_crfsuite/__init__.py
new file mode 100644
index 0000000..fe61bef
--- /dev/null
+++ b/sklearn_crfsuite/__init__.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+from .estimator import CRF
diff --git a/sklearn_crfsuite/_fileresource.py b/sklearn_crfsuite/_fileresource.py
new file mode 100644
index 0000000..8975560
--- /dev/null
+++ b/sklearn_crfsuite/_fileresource.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+import os
+import tempfile
+
+
+class FileResource(object):
+    """
+    Object that "owns" a file on a filesystem. If the ``filename`` is None,
+    it maintains a temporary file which name is accessible via ``name``
+    attribute; when pickling, the contents of this file is pickled;
+    when unpickling, a new temp file is created; temp files are auto-deleted.
+    """
+    def __init__(self, filename=None, keep_tempfiles=False, suffix='', prefix=''):
+        self.name = filename
+        self.auto = filename is None
+        self.keep_tempfiles = keep_tempfiles
+        self.suffix = suffix
+        self.prefix = prefix
+
+    def ensure_name(self):
+        """ Ensure that a filename is available """
+        if self.name is not None:
+            return
+        if self.auto:
+            fd, self.name = tempfile.mkstemp(self.suffix, self.prefix)
+        else:
+            raise ValueError("File name is not provided")
+
+    def cleanup(self):
+        """ Clean temporary files if needed """
+        if self.keep_tempfiles or not self.auto:
+            return
+
+        if self.name is not None:
+            try:
+                os.unlink(self.name)
+            except OSError:
+                pass
+            self.name = None
+
+    def refresh(self):
+        self.cleanup()
+        self.ensure_name()
+
+    def __del__(self):
+        self.cleanup()
+
+    def __getstate__(self):
+        dct = self.__dict__.copy()
+
+        if self.auto:
+            filename = dct['name']
+            if filename is not None:
+                try:
+                    with open(filename, 'rb') as f:
+                        dct['__FILE_RESOURCE_DATA__'] = f.read()
+                except IOError:
+                    pass
+                dct['name'] = None
+
+        return dct
+
+    def __setstate__(self, state):
+        data = state.pop('__FILE_RESOURCE_DATA__', None)
+        self.__dict__.update(state)
+
+        if data is not None:
+            assert self.name is None
+            self.ensure_name()
+            with open(self.name, 'wb') as f:
+                f.write(data)
+
diff --git a/sklearn_crfsuite/estimator.py b/sklearn_crfsuite/estimator.py
new file mode 100644
index 0000000..71ef5a6
--- /dev/null
+++ b/sklearn_crfsuite/estimator.py
@@ -0,0 +1,469 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from six.moves import zip
+from tqdm import tqdm
+import pycrfsuite
+from sklearn.metrics import accuracy_score
+from sklearn.base import BaseEstimator
+
+from sklearn_crfsuite._fileresource import FileResource
+from sklearn_crfsuite.utils import flatten
+from sklearn_crfsuite.trainer import LinePerIterationTrainer
+
+
+class CRF(BaseEstimator):
+    """
+    python-crfsuite wrapper with interface siimlar to scikit-learn.
+    It allows to use a familiar fit/predict interface and scikit-learn
+    model selection utilities (cross-validation, hyperparameter optimization).
+
+    Unlike pycrfsuite.Trainer / pycrfsuite.Tagger this object is picklable;
+    on-disk files are managed automatically.
+
+    Parameters
+    ----------
+    algorithm : str, optional (default='lbfgs')
+        Training algorithm. Allowed values:
+
+        * ``'lbfgs'`` - Gradient descent using the L-BFGS method
+        * ``'l2sgd'`` - Stochastic Gradient Descent with L2 regularization term
+        * ``'ap'`` - Averaged Perceptron
+        * ``'pa'`` - Passive Aggressive (PA)
+        * ``'arow'`` - Adaptive Regularization Of Weight Vector (AROW)
+
+    min_freq : float, optional (default=0)
+        Cut-off threshold for occurrence
+        frequency of a feature. CRFsuite will ignore features whose
+        frequencies of occurrences in the training data are no greater
+        than `min_freq`. The default is no cut-off.
+
+    all_possible_states : bool, optional (default=False)
+        Specify whether CRFsuite generates state features that do not even
+        occur in the training data (i.e., negative state features).
+        When True, CRFsuite generates state features that associate all of
+        possible combinations between attributes and labels.
+
+        Suppose that the numbers of attributes and labels are A and L
+        respectively, this function will generate (A * L) features.
+        Enabling this function may improve the labeling accuracy because
+        the CRF model can learn the condition where an item is not predicted
+        to its reference label. However, this function may also increase
+        the number of features and slow down the training process
+        drastically. This function is disabled by default.
+
+    all_possible_transitions : bool, optional (default=False)
+        Specify whether CRFsuite generates transition features that
+        do not even occur in the training data (i.e., negative transition
+        features). When True, CRFsuite generates transition features that
+        associate all of possible label pairs. Suppose that the number
+        of labels in the training data is L, this function will
+        generate (L * L) transition features.
+        This function is disabled by default.
+
+    c1 : float, optional (default=0)
+        The coefficient for L1 regularization.
+        If a non-zero value is specified, CRFsuite switches to the
+        Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method.
+        The default value is zero (no L1 regularization).
+
+        Supported training algorithms: lbfgs
+
+    c2 : float, optional (default=1.0)
+        The coefficient for L2 regularization.
+
+        Supported training algorithms: l2sgd, lbfgs
+
+    max_iterations : int, optional (default=None)
+        The maximum number of iterations for optimization algorithms.
+        Default value depends on training algorithm:
+
+        * lbfgs - unlimited;
+        * l2sgd - 1000;
+        * ap - 100;
+        * pa - 100;
+        * arow - 100.
+
+    num_memories : int, optional (default=6)
+        The number of limited memories for approximating the inverse hessian
+        matrix.
+
+        Supported training algorithms: lbfgs
+
+    epsilon : float, optional (default=1e-5)
+        The epsilon parameter that determines the condition of convergence.
+
+        Supported training algorithms: ap, arow, lbfgs, pa
+
+    period : int, optional (default=10)
+        The duration of iterations to test the stopping criterion.
+
+        Supported training algorithms: l2sgd, lbfgs
+
+    delta : float, optional (default=1e-5)
+        The threshold for the stopping criterion; an iteration stops
+        when the improvement of the log likelihood over the last
+        `period` iterations is no greater than this threshold.
+
+        Supported training algorithms: l2sgd, lbfgs
+
+    linesearch : str, optional (default='MoreThuente')
+        The line search algorithm used in L-BFGS updates. Allowed values:
+
+        * ``'MoreThuente'`` - More and Thuente's method;
+        * ``'Backtracking'`` - backtracking method with regular Wolfe condition;
+        * ``'StrongBacktracking'`` -  backtracking method with strong Wolfe
+          condition.
+
+        Supported training algorithms: lbfgs
+
+    max_linesearch : int, optional (default=20)
+        The maximum number of trials for the line search algorithm.
+
+        Supported training algorithms: lbfgs
+
+    calibration_eta : float, optional (default=0.1)
+        The initial value of learning rate (eta) used for calibration.
+
+        Supported training algorithms: l2sgd
+
+    calibration_rate : float, optional (default=2.0)
+        The rate of increase/decrease of learning rate for calibration.
+
+        Supported training algorithms: l2sgd
+
+    calibration_samples : int, optional (default=1000)
+        The number of instances used for calibration.
+        The calibration routine randomly chooses instances no larger
+        than `calibration_samples`.
+
+        Supported training algorithms: l2sgd
+
+    calibration_candidates : int, optional (default=10)
+        The number of candidates of learning rate.
+        The calibration routine terminates after finding
+        `calibration_samples` candidates of learning rates
+        that can increase log-likelihood.
+
+        Supported training algorithms: l2sgd
+
+    calibration_max_trials : int, optional (default=20)
+        The maximum number of trials of learning rates for calibration.
+        The calibration routine terminates after trying
+        `calibration_max_trials` candidate values of learning rates.
+
+        Supported training algorithms: l2sgd
+
+    pa_type : int, optional (default=1)
+        The strategy for updating feature weights. Allowed values:
+
+        * 0 - PA without slack variables;
+        * 1 - PA type I;
+        * 2 - PA type II.
+
+        Supported training algorithms: pa
+
+    c : float, optional (default=1)
+        Aggressiveness parameter (used only for PA-I and PA-II).
+        This parameter controls the influence of the slack term on the
+        objective function.
+
+        Supported training algorithms: pa
+
+    error_sensitive : bool, optional (default=True)
+        If this parameter is True, the optimization routine includes
+        into the objective function the square root of the number of
+        incorrect labels predicted by the model.
+
+        Supported training algorithms: pa
+
+    averaging : bool, optional (default=True)
+        If this parameter is True, the optimization routine computes
+        the average of feature weights at all updates in the training
+        process (similarly to Averaged Perceptron).
+
+        Supported training algorithms: pa
+
+    variance : float, optional (default=1)
+        The initial variance of every feature weight.
+        The algorithm initialize a vector of feature weights as
+        a multivariate Gaussian distribution with mean 0
+        and variance `variance`.
+
+        Supported training algorithms: arow
+
+    gamma : float, optional (default=1)
+        The tradeoff between loss function and changes of feature weights.
+
+        Supported training algorithms: arow
+
+    verbose : bool, optional (default=False)
+        Enable trainer verbose mode.
+
+    model_filename : str, optional (default=None)
+        A path to an existing CRFSuite model.
+        This parameter allows to load and use existing crfsuite models.
+
+        By default, model files are created automatically and saved
+        in temporary locations; the preferred way to save/load CRF models
+        is to use pickle (or its alternatives like joblib).
+
+    Attributes
+    ----------
+    tagger : pycrfsuite.Tagger
+        python-crfsuite Tagger instance.
+
+    """
+    def __init__(self,
+                 algorithm=None,
+
+                 min_freq=None,
+                 all_possible_states=None,
+                 all_possible_transitions=None,
+                 c1=None,
+                 c2=None,
+                 max_iterations=None,
+                 num_memories=None,
+                 epsilon=None,
+                 period=None,
+                 delta=None,
+                 linesearch=None,
+                 max_linesearch=None,
+                 calibration_eta=None,
+                 calibration_rate=None,
+                 calibration_samples=None,
+                 calibration_candidates=None,
+                 calibration_max_trials=None,
+                 pa_type=None,
+                 c=None,
+                 error_sensitive=None,
+                 averaging=None,
+                 variance=None,
+                 gamma=None,
+
+                 verbose=False,
+                 model_filename=None,
+                 keep_tempfiles=False,
+                 trainer_cls=None):
+
+        self.algorithm = algorithm
+        self.min_freq = min_freq
+        self.all_possible_states = all_possible_states
+        self.all_possible_transitions = all_possible_transitions
+        self.c1 = c1
+        self.c2 = c2
+        self.max_iterations = max_iterations
+        self.num_memories = num_memories
+        self.epsilon = epsilon
+        self.period = period
+        self.delta = delta
+        self.linesearch = linesearch
+        self.max_linesearch = max_linesearch
+        self.calibration_eta = calibration_eta
+        self.calibration_rate = calibration_rate
+        self.calibration_samples = calibration_samples
+        self.calibration_candidates = calibration_candidates
+        self.calibration_max_trials = calibration_max_trials
+        self.pa_type = pa_type
+        self.c = c
+        self.error_sensitive = error_sensitive
+        self.averaging = averaging
+        self.variance = variance
+        self.gamma = gamma
+
+        self.modelfile = FileResource(
+            filename=model_filename,
+            keep_tempfiles=keep_tempfiles,
+            suffix=".crfsuite",
+            prefix="model"
+        )
+        self.verbose = verbose
+        self._tagger = None
+        self.trainer_cls = trainer_cls
+
+        self.training_log_ = None
+
+    def fit(self, X, y, X_dev=None, y_dev=None):
+        """
+        Train a model.
+
+        Parameters
+        ----------
+        X : list of lists of dicts
+            Feature dicts for several documents (in a python-crfsuite format).
+
+        y : list of lists of strings
+            Labels for several documents.
+
+        X_dev : (optional) list of lists of dicts
+            Feature dicts used for testing.
+
+        y_dev : (optional) list of lists of strings
+            Labels corresponding to X_dev.
+        """
+        if (X_dev is None and y_dev is not None) or (X_dev is not None and y_dev is None):
+            raise ValueError("Pass both X_dev and y_dev to use the holdout data")
+
+        if self._tagger is not None:
+            self._tagger.close()
+            self._tagger = None
+        self.modelfile.refresh()
+
+        trainer = self._get_trainer()
+        train_data = zip(X, y)
+
+        if self.verbose:
+            train_data = tqdm(train_data, "loading training data to CRFsuite", len(X), leave=True)
+
+        for xseq, yseq in train_data:
+            trainer.append(xseq, yseq)
+
+        if self.verbose:
+            print("")
+
+        if X_dev is not None:
+            test_data = zip(X_dev, y_dev)
+
+            if self.verbose:
+                test_data = tqdm(test_data, "loading dev data to CRFsuite", len(X_dev), leave=True)
+
+            for xseq, yseq in test_data:
+                trainer.append(xseq, yseq, 1)
+
+            if self.verbose:
+                print("")
+
+        trainer.train(self.modelfile.name, holdout=-1 if X_dev is None else 1)
+        self.training_log_ = trainer.logparser
+        return self
+
+    def predict(self, X):
+        """
+        Make a prediction.
+
+        Parameters
+        ----------
+        X : list of lists of dicts
+            feature dicts in python-crfsuite format
+
+        Returns
+        -------
+        y : list of lists of strings
+            predicted labels
+
+        """
+        return list(map(self.predict_single, X))
+
+    def predict_single(self, xseq):
+        """
+        Make a prediction.
+
+        Parameters
+        ----------
+        xseq : list of dicts
+            feature dicts in python-crfsuite format
+
+        Returns
+        -------
+        y : list of strings
+            predicted labels
+
+        """
+        return self.tagger.tag(xseq)
+
+    def predict_marginals(self, X):
+        """
+        Make a prediction.
+
+        Parameters
+        ----------
+        X : list of lists of dicts
+            feature dicts in python-crfsuite format
+
+        Returns
+        -------
+        y : list of lists of dicts
+            predicted probabilities for each label at each position
+
+        """
+        return list(map(self.predict_marginals_single, X))
+
+    def predict_marginals_single(self, xseq):
+        """
+        Make a prediction.
+
+        Parameters
+        ----------
+        xseq : list of dicts
+            feature dicts in python-crfsuite format
+
+        Returns
+        -------
+        y : list of dicts
+            predicted probabilities for each label at each position
+
+        """
+        labels = self.tagger.labels()
+        self.tagger.set(xseq)
+        return [
+            {label: self.tagger.marginal(label, i) for label in labels}
+            for i in range(len(xseq))
+        ]
+
+    def score(self, X, y):
+        """
+        Return per-field accuracy score.
+        """
+        y_pred_flat = flatten(self.predict(X))
+        y_true_flat = flatten(y)
+        return accuracy_score(y_true_flat, y_pred_flat)
+
+    @property
+    def tagger(self):
+        if self._tagger is None:
+            if self.modelfile.name is None:
+                raise Exception("Can't load model. Is the model trained?")
+
+            tagger = pycrfsuite.Tagger()
+            tagger.open(self.modelfile.name)
+            self._tagger = tagger
+        return self._tagger
+
+    def _get_trainer(self):
+        trainer_cls = self.trainer_cls or LinePerIterationTrainer
+        params = {
+            'feature.minfreq': self.min_freq,
+            'feature.possible_states': self.all_possible_states,
+            'feature.possible_transitions': self.all_possible_transitions,
+            'c1': self.c1,
+            'c2': self.c2,
+            'max_iterations': self.max_iterations,
+            'num_memories': self.num_memories,
+            'epsilon': self.epsilon,
+            'period': self.period,
+            'delta': self.delta,
+            'linesearch': self.linesearch,
+            'max_linesearch': self.max_linesearch,
+            'calibration.eta': self.calibration_eta,
+            'calibration.rate': self.calibration_rate,
+            'calibration.samples': self.calibration_samples,
+            'calibration.candidates': self.calibration_candidates,
+            'calibration.max_trials': self.calibration_max_trials,
+            'type': self.pa_type,
+            'c': self.c,
+            'error_sensitive': self.error_sensitive,
+            'averaging': self.averaging,
+            'variance': self.variance,
+            'gamma': self.gamma,
+        }
+        params = {k: v for k, v in params.items() if v is not None}
+        return trainer_cls(
+            algorithm=self.algorithm,
+            params=params,
+            verbose=self.verbose,
+        )
+
+    def __getstate__(self):
+        dct = self.__dict__.copy()
+        dct['_tagger'] = None
+        return dct
diff --git a/sklearn_crfsuite/trainer.py b/sklearn_crfsuite/trainer.py
new file mode 100644
index 0000000..00d47bd
--- /dev/null
+++ b/sklearn_crfsuite/trainer.py
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+import pycrfsuite
+from tabulate import tabulate
+
+
+class LinePerIterationTrainer(pycrfsuite.Trainer):
+    """
+    This pycrfsuite.Trainer prints information about each iteration
+    on a single line.
+    """
+    def on_iteration(self, log, info):
+        parts = [
+            "Iter {num:<3} ",
+            "time={time:<5.2f} ",
+            "loss={loss:<8.2f} ",
+        ]
+
+        if 'active_features' in info:
+            parts += ["active={active_features:<5} "]
+
+        if 'avg_precision' in info:
+            parts += [
+                "precision={avg_precision:0.3f}  ",
+                "recall={avg_recall:0.3f}  ",
+                "F1={avg_f1:0.3f}  ",
+                "Acc(item/seq)={item_accuracy_float:0.3f} {instance_accuracy_float:0.3f}"
+            ]
+
+        if 'feature_norm' in info:
+            parts += ["feature_norm={feature_norm:<8.2f}"]
+
+        line = "".join(parts)
+        print(line.format(**info).strip())
+
+    def on_optimization_end(self, log):
+        last_iter = self.logparser.last_iteration
+        if last_iter.get('scores', None):
+            data = [
+                [entity, score.precision, score.recall, score.f1 or 0, score.ref]
+                for entity, score in sorted(last_iter['scores'].items())
+            ]
+            table = tabulate(data,
+                headers=["Label", "Precision", "Recall", "F1", "Support"],
+                # floatfmt="0.4f",
+            )
+            size = len(table.splitlines()[0])
+            print("="*size)
+            print(table)
+            print("-"*size)
+        super(LinePerIterationTrainer, self).on_optimization_end(log)
+
+
diff --git a/sklearn_crfsuite/utils.py b/sklearn_crfsuite/utils.py
new file mode 100644
index 0000000..7999ba8
--- /dev/null
+++ b/sklearn_crfsuite/utils.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+from itertools import chain
+
+
+def flatten(y):
+    """
+    Flatten a list of lists.
+
+    >>> flatten([[1,2], [3,4]])
+    [1, 2, 3, 4]
+    """
+    return list(chain.from_iterable(y))
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..93f710a
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+import pytest
+
+
+@pytest.fixture()
+def xseq():
+    return [
+        {'walk': 1, 'shop': 0.5},
+        {'walk': 1},
+        {'walk': 1, 'clean': 0.5},
+        {u'shop': 0.5, u'clean': 0.5},
+        {'walk': 0.5, 'clean': 1},
+        {'clean': 1, u'shop': 0.1},
+        {'walk': 1, 'shop': 0.5},
+        {},
+        {'clean': 1},
+        {u'солнце': u'не светит'.encode('utf8'), 'clean': 1},
+    ]
+
+@pytest.fixture
+def yseq():
+    return ['sunny', 'sunny', u'sunny', 'rainy', 'rainy', 'rainy',
+            'sunny', 'sunny', 'rainy', 'rainy']
diff --git a/tests/test_crf.py b/tests/test_crf.py
new file mode 100644
index 0000000..803e689
--- /dev/null
+++ b/tests/test_crf.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+import os
+import pickle
+
+import pytest
+from sklearn.cross_validation import cross_val_score
+
+from sklearn_crfsuite import CRF
+
+
+ALGORITHMS =  ["lbfgs", "l2sgd", "pa", "ap", "arow"]
+
+
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+def test_crf(xseq, yseq, algorithm):
+    crf = CRF(algorithm)
+    crf.fit([xseq], [yseq])
+
+    y_pred = crf.predict([xseq])
+    if algorithm != 'ap':  # Averaged Perceptron is regularized too much
+        assert y_pred == [yseq]
+
+
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+@pytest.mark.parametrize("use_dev", [True, False])
+def test_crf_verbose(xseq, yseq, algorithm, use_dev):
+    crf = CRF(algorithm, verbose=True)
+
+    if use_dev:
+        X_dev, y_dev = [xseq], [yseq]
+    else:
+        X_dev, y_dev = None, None
+
+    crf.fit(
+        X=[xseq, xseq],
+        y=[yseq, yseq],
+        X_dev=X_dev,
+        y_dev=y_dev
+    )
+    y_pred = crf.predict([xseq])
+    if algorithm != 'ap':  # Averaged Perceptron is regularized too much
+        assert y_pred == [yseq]
+
+
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+def test_crf_marginals(xseq, yseq, algorithm):
+    crf = CRF(algorithm)
+    crf.fit([xseq], [yseq])
+
+    y_pred_marginals = crf.predict_marginals([xseq])
+    assert len(y_pred_marginals) == 1
+    marginals = y_pred_marginals[0]
+    assert len(marginals) == len(yseq)
+
+    labels = crf.tagger.labels()
+    for m in marginals:
+        assert isinstance(m, dict)
+        assert set(m.keys()) == set(labels)
+        assert abs(sum(m.values()) - 1.0) < 1e-6
+
+
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+def test_predict_without_fit(xseq, algorithm):
+    crf = CRF(algorithm)
+    with pytest.raises(Exception):
+        crf.predict([xseq])
+
+
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+def test_crf_score(xseq, yseq, algorithm):
+    crf = CRF(algorithm)
+    crf.fit([xseq], [yseq])
+
+    score = crf.score([xseq], [yseq])
+    if algorithm != 'ap':
+        assert score == 1.0
+    else:  # Averaged Perceptron is regularized too much
+        assert score > 0.8
+
+
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+def test_crf_pickling(xseq, yseq, algorithm):
+    crf = CRF(algorithm=algorithm)
+    crf.fit([xseq], [yseq])
+    data = pickle.dumps(crf, protocol=pickle.HIGHEST_PROTOCOL)
+
+    crf2 = pickle.loads(data)
+    score = crf2.score([xseq], [yseq])
+    if algorithm != 'ap':
+        assert score == 1.0
+    else:  # Averaged Perceptron is regularized too much
+        assert score > 0.8
+    assert crf2.algorithm == algorithm
+
+
+def test_crf_model_filename(xseq, yseq, tmpdir):
+    path = os.path.join(str(tmpdir), "foo.crfsuite")
+    assert not os.path.exists(path)
+
+    # model file is created at a specified location
+    crf = CRF(model_filename=path)
+    crf.fit([xseq], [yseq])
+    assert os.path.exists(path)
+
+    # it is possible to load the model just by passing a file name
+    crf2 = CRF(model_filename=path)
+    assert crf2.score([xseq], [yseq]) == 1.0
+
+    # crf is picklable
+    data = pickle.dumps(crf, protocol=pickle.HIGHEST_PROTOCOL)
+    crf3 = pickle.loads(data)
+    assert crf3.score([xseq], [yseq]) == 1.0
+
+
+def test_cross_validation(xseq, yseq):
+    crf = CRF()
+    X = [xseq] * 20
+    y = [yseq] * 20
+    scores = cross_val_score(crf, X, y, n_jobs=5, cv=5)
+    assert scores.mean() == 1.0
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..d7fc4c5
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,15 @@
+[tox]
+envlist = py27,py33,py34,py35
+
+[testenv]
+deps=
+    pytest
+    pytest-cov
+    numpy
+
+commands=
+    pip install -U wheel pip
+    pip install scipy
+    pip install scikit-learn
+    pip install -e .
+    py.test --doctest-modules --cov=sklearn_crfsuite --cov-report= {posargs: sklearn_crfsuite tests}