diff --git a/pypln/backend/workers/__init__.py b/pypln/backend/workers/__init__.py
index eb8bb99..4ebdd00 100644
--- a/pypln/backend/workers/__init__.py
+++ b/pypln/backend/workers/__init__.py
@@ -23,7 +23,6 @@
from pos import POS
from statistics import Statistics
from bigrams import Bigrams
-from stanford_ner import StanfordNER
from palavras_raw import PalavrasRaw
from lemmatizer_pt import Lemmatizer
from palavras_noun_phrase import NounPhrase
@@ -31,5 +30,5 @@
__all__ = ['Extractor', 'Tokenizer', 'FreqDist', 'POS', 'Statistics',
- 'Bigrams', 'StanfordNER', 'PalavrasRaw', 'Lemmatizer',
- 'NounPhrase', 'SemanticTagger']
+ 'Bigrams', 'PalavrasRaw', 'Lemmatizer', 'NounPhrase',
+ 'SemanticTagger']
diff --git a/pypln/backend/workers/stanford_ner.py b/pypln/backend/workers/stanford_ner.py
deleted file mode 100644
index 563a716..0000000
--- a/pypln/backend/workers/stanford_ner.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# coding: utf-8
-#
-# Copyright 2012 NAMD-EMAP-FGV
-#
-# This file is part of PyPLN. You can get more information at: http://pypln.org/.
-#
-# PyPLN is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# PyPLN is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with PyPLN. If not, see .
-
-from pypelinin import Worker
-import ner
-
-NER_HOST="localhost"
-NER_PORT=4242
-
-class NERWrapper(ner.SocketNER):
-
- def __slashTags_parse_entities(self, tagged_text):
- """Return a list of token tuples (entity_type, token) parsed
- from slashTags-format tagged text.
-
- :param tagged_text: slashTag-format entity tagged text
- """
- return (match.groups()[::-1] for match in
- ner.client.SLASHTAGS_EPATTERN.finditer(tagged_text))
-
- def get_entities_as_tuples(self, text):
- """
- """
- if self.oformat != 'slashTags':
- raise NotImplementedError("get_entities_as_tuples is not "
- "implemented for output formats other than slashTags")
- tagged_text = self.tag_text(text)
- entities = self.__slashTags_parse_entities(tagged_text)
- return entities
-
-class StanfordNER(Worker):
- requires = ['text']
-
- def process(self, document):
- text = document['text']
- tagger = NERWrapper(host=NER_HOST,
- port=NER_PORT, output_format="slashTags")
-
- entities = list(tagger.get_entities_as_tuples(text.encode('utf-8')))
-
- return {'named_entities': entities}
diff --git a/scripts/download_stanford_ner.sh b/scripts/download_stanford_ner.sh
deleted file mode 100755
index 5d72087..0000000
--- a/scripts/download_stanford_ner.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2012 NAMD-EMAP-FGV
-#
-# This file is part of PyPLN. You can get more information at: http://pypln.org/.
-#
-# PyPLN is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# PyPLN is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with PyPLN. If not, see .
-
-
-SCRIPT_PATH=$(dirname $(readlink -f $0))/
-
-NER_DIRNAME="stanford-ner-2013-06-20"
-
-NER_DIR="$SCRIPT_PATH/$NER_DIRNAME"
-NER_SHA1SUM="1589ac1b477a7894ca98d783d27c5b5b73f51d3d stanford-ner-2013-06-20.zip"
-
-DOWNLOAD_URL="http://nlp.stanford.edu/software/$NER_DIRNAME.zip"
-ANSWER="Y"
-read -p "download Stanford NER? [Y/n] " ANSWER
-if [ "$ANSWER" = "y" -o "$ANSWER" = "Y" ]
-then
- cd "$SCRIPT_PATH"
- wget -c "$DOWNLOAD_URL"
- if [ "$(sha1sum $NER_DIRNAME.zip)" != "$NER_SHA1SUM" ]
- then
- echo "Something is wrong. NER zip file is different from expected."
- exit 1
- fi
- unzip -x $NER_DIRNAME.zip
-else
- exit 1
-fi
diff --git a/scripts/start_development_environment.sh b/scripts/start_development_environment.sh
index 83cd81a..239d6d4 100755
--- a/scripts/start_development_environment.sh
+++ b/scripts/start_development_environment.sh
@@ -28,20 +28,6 @@ echo "| This script is intended for development only. |"
echo "| Please do not use it to run a production environment. |"
echo "+-------------------------------------------------------+"
-echo "Starting Stanford NER..."
-NER_DIRNAME="stanford-ner-2013-06-20"
-NER_DIR="$SCRIPT_PATH/scripts/$NER_DIRNAME"
-if [ -d "$NER_DIR" ]
-then
- "$SCRIPT_PATH/scripts/start_stanford_ner_in_dev_environment.sh" &
- NER_PID=$!
- echo "Stanford NER has PID $NER_PID"
-else
- echo "Can't find Stanford NER."
- echo "Run $SCRIPT_PATH/scripts/download_stanford_ner.sh to download it."
- exit 0
-fi
-
echo "Starting router..."
"$SCRIPT_PATH/pypln/backend/router.py" &
ROUTER_PID=$!
diff --git a/scripts/start_stanford_ner_in_dev_environment.sh b/scripts/start_stanford_ner_in_dev_environment.sh
deleted file mode 100755
index 03a388f..0000000
--- a/scripts/start_stanford_ner_in_dev_environment.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2012 NAMD-EMAP-FGV
-#
-# This file is part of PyPLN. You can get more information at: http://pypln.org/.
-#
-# PyPLN is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# PyPLN is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with PyPLN. If not, see .
-
-
-SCRIPT_PATH=$(dirname $(readlink -f $0))/
-
-NER_DIRNAME="stanford-ner-2013-06-20"
-
-NER_DIR="$SCRIPT_PATH/$NER_DIRNAME"
-
-NER_PORT=4242
-NER_CLASSIFIER="classifiers/english.muc.7class.distsim.crf.ser.gz"
-
-cd "$NER_DIR"
-exec java -mx500m -cp stanford-ner.jar edu.stanford.nlp.ie.NERServer -port $NER_PORT -loadClassifier "$NER_CLASSIFIER"
diff --git a/tests/test_worker_stanford_ner.py b/tests/test_worker_stanford_ner.py
deleted file mode 100644
index f096f64..0000000
--- a/tests/test_worker_stanford_ner.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# coding: utf-8
-#
-# Copyright 2012 NAMD-EMAP-FGV
-#
-# This file is part of PyPLN. You can get more information at: http://pypln.org/.
-#
-# PyPLN is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# PyPLN is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with PyPLN. If not, see .
-
-import unittest
-from pypln.backend.workers import StanfordNER
-
-
-class TestStanfordNERWorker(unittest.TestCase):
-
- def test_ner_should_return_marked_entities(self):
- text = 'The sky is blue, the sun is yellow.'
-
- # Sample text from https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
- text = (u"Dijkstra's algorithm, conceived by Dutch computer scientist "
- u"Edsger Dijkstra in 1956 and published in 1959,[1][2] is a graph "
- u"search algorithm that solves the single-source shortest path "
- u"problem for a graph with non-negative edge path costs, producing "
- u"a shortest path tree. This algorithm is often used in routing as "
- u"a subroutine in other graph algorithms, or in GPS Technology. "
- u"I'll add a unicode character here just for completion: Flávio."
- )
- self.maxDiff = None
- expected = [('O', 'Dijkstra'), ('O', "'s"), ('O', 'algorithm'),
- ('O', ','), ('O', 'conceived'), ('O', 'by'), ('O', 'Dutch'),
- ('O', 'computer'), ('O', 'scientist'), ('PERSON', 'Edsger'),
- ('PERSON', 'Dijkstra'), ('O', 'in'), ('DATE', '1956'), ('O', 'and'),
- ('O', 'published'), ('O', 'in'), ('DATE', '1959'), ('O', ','),
- ('O', '-LSB-'), ('O', '1'), ('O', '-RSB-'), ('O', '-LSB-'),
- ('O', '2'), ('O', '-RSB-'), ('O', 'is'), ('O', 'a'), ('O', 'graph'),
- ('O', 'search'), ('O', 'algorithm'), ('O', 'that'), ('O', 'solves'),
- ('O', 'the'), ('O', 'single-source'), ('O', 'shortest'),
- ('O', 'path'), ('O', 'problem'), ('O', 'for'), ('O', 'a'),
- ('O', 'graph'), ('O', 'with'), ('O', 'non-negative'), ('O', 'edge'),
- ('O', 'path'), ('O', 'costs'), ('O', ','), ('O', 'producing'),
- ('O', 'a'), ('O', 'shortest'), ('O', 'path'), ('O', 'tree'),
- ('O', '.'), ('O', 'This'), ('O', 'algorithm'), ('O', 'is'),
- ('O', 'often'), ('O', 'used'), ('O', 'in'), ('O', 'routing'),
- ('O', 'as'), ('O', 'a'), ('O', 'subroutine'), ('O', 'in'),
- ('O', 'other'), ('O', 'graph'), ('O', 'algorithms'), ('O', ','),
- ('O', 'or'), ('O', 'in'), ('O', 'GPS'), ('O', 'Technology'),
- ('O', '.'), ('O', 'I'), ('O', "'ll"), ('O', 'add'), ('O', 'a'),
- ('O', 'unicode'), ('O', 'character'), ('O', 'here'), ('O', 'just'),
- ('O', 'for'), ('O', 'completion'), ('O', ':'),
- ('O', 'Fl\xc3\xa1vio'), ('O', '.')]
-
- result = StanfordNER().process({'text': text})
- self.assertEqual(result, {'named_entities': expected})