diff --git a/.gitignore b/.gitignore
index 79912b364..b624a1e92 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,8 +7,5 @@ javadocs/*
 logs/
 
 # IntelliJ IDEA
-**/.idea/dictionaries/
-**/.idea/dataSources.*
-**/.idea/libraries/
-**/.idea/tasks.xml
-**/.idea/workspace.xml
+**/.idea
+*.iml
diff --git a/.idea/.name b/.idea/.name
deleted file mode 100644
index 3dd7124b0..000000000
--- a/.idea/.name
+++ /dev/null
@@ -1 +0,0 @@
-crawler4j
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index c41b8aa99..2798ff491 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,190 +1,188 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-	<modelVersion>4.0.0</modelVersion>
-	<groupId>edu.uci.ics</groupId>
-	<artifactId>crawler4j</artifactId>
-	<packaging>jar</packaging>
-	<name>crawler4j</name>
-	<version>4.3-SNAPSHOT</version>
-	<description>Open Source Web Crawler for Java</description>
-	<url>https://github.com/yasserg/crawler4j</url>
-	<licenses>
-		<license>
-			<name>The Apache Software License, Version 2.0</name>
-			<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
-			<distribution>repo</distribution>
-		</license>
-	</licenses>
-	<scm>
-		<url>https://github.com/yasserg/crawler4j</url>
-		<connection>scm:git:git@github.com:yasserg/crawler4j.git</connection>
-		<developerConnection>scm:git:git@github.com:yasserg/crawler4j.git</developerConnection>
-	</scm>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>edu.uci.ics</groupId>
+    <artifactId>crawler4j</artifactId>
+    <packaging>jar</packaging>
+    <name>crawler4j</name>
+    <version>4.3-SNAPSHOT</version>
+    <description>Open Source Web Crawler for Java</description>
+    <url>https://github.com/yasserg/crawler4j</url>
+    <licenses>
+        <license>
+            <name>The Apache Software License, Version 2.0</name>
+            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+            <distribution>repo</distribution>
+        </license>
+    </licenses>
+    <scm>
+        <url>https://github.com/yasserg/crawler4j</url>
+        <connection>scm:git:git@github.com:yasserg/crawler4j.git</connection>
+        <developerConnection>scm:git:git@github.com:yasserg/crawler4j.git</developerConnection>
+    </scm>
 
-	<parent>
-		<groupId>org.sonatype.oss</groupId>
-		<artifactId>oss-parent</artifactId>
-		<version>7</version>
-	</parent>
+    <parent>
+        <groupId>org.sonatype.oss</groupId>
+        <artifactId>oss-parent</artifactId>
+        <version>7</version>
+    </parent>
 
-	<build>
-		<plugins>
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-compiler-plugin</artifactId>
-				<version>3.2</version>
-				<configuration>
-					<source>1.7</source>
-					<target>1.7</target>
-				</configuration>
-			</plugin>
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-jar-plugin</artifactId>
-				<version>2.5</version>
-				<configuration>
-					<excludes>
-						<exclude>**/*.properties</exclude>
-					</excludes>
-				</configuration>
-			</plugin>
-			<!-- generate a source jar -->
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-source-plugin</artifactId>
-				<version>2.4</version>
-				<executions>
-					<execution>
-						<id>attach-sources</id>
-						<goals>
-							<goal>jar</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
-			<!-- generate a javadoc jar -->
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-javadoc-plugin</artifactId>
-				<version>2.10.1</version>
-				<executions>
-					<execution>
-						<id>attach-javadocs</id>
-						<goals>
-							<goal>jar</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
-			<plugin>
-				<artifactId>maven-assembly-plugin</artifactId>
-				<version>2.5.3</version>
-				<configuration>
-				  <descriptorRefs>
-					<descriptorRef>jar-with-dependencies</descriptorRef>
-				  </descriptorRefs>
-				</configuration>
-				<executions>
-				  <execution>
-					<id>make-fat-jar</id>
-					<phase>package</phase>
-					<goals>
-					  <goal>single</goal>
-					</goals>
-					<configuration>
-					  <finalName>crawler4j-${project.version}</finalName>
-					</configuration>
-				  </execution>
-				</executions>
-			</plugin>
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-checkstyle-plugin</artifactId>
-				<version>2.17</version>
-				<executions>
-					<execution>
-						<id>compile</id>
-						<phase>compile</phase>
-						<configuration>
-							<configLocation>checkstyle.xml</configLocation>
-							<encoding>UTF-8</encoding>
-							<consoleOutput>true</consoleOutput>
-							<failsOnError>true</failsOnError>
-							<includeTestSourceDirectory>true</includeTestSourceDirectory>
-						</configuration>
-						<goals>
-							<goal>check</goal>
-						</goals>
-					</execution>
-				</executions>
-				<dependencies>
-					<dependency>
-						<groupId>com.puppycrawl.tools</groupId>
-						<artifactId>checkstyle</artifactId>
-						<version>7.1</version>
-					</dependency>
-				</dependencies>
-			</plugin>
-		</plugins>
-	</build>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.2</version>
+                <configuration>
+                    <source>1.7</source>
+                    <target>1.7</target>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <version>2.5</version>
+                <configuration>
+                    <excludes>
+                        <exclude>**/*.properties</exclude>
+                    </excludes>
+                </configuration>
+            </plugin>
+            <!-- generate a source jar -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-source-plugin</artifactId>
+                <version>2.4</version>
+                <executions>
+                    <execution>
+                        <id>attach-sources</id>
+                        <goals>
+                            <goal>jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <!-- generate a javadoc jar -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-javadoc-plugin</artifactId>
+                <version>2.10.1</version>
+                <executions>
+                    <execution>
+                        <id>attach-javadocs</id>
+                        <goals>
+                            <goal>jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <version>2.5.3</version>
+                <configuration>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>make-fat-jar</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                        <configuration>
+                            <finalName>crawler4j-${project.version}</finalName>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-checkstyle-plugin</artifactId>
+                <version>2.17</version>
+                <executions>
+                    <execution>
+                        <id>compile</id>
+                        <phase>compile</phase>
+                        <configuration>
+                            <configLocation>checkstyle.xml</configLocation>
+                            <encoding>UTF-8</encoding>
+                            <consoleOutput>true</consoleOutput>
+                            <failsOnError>true</failsOnError>
+                            <includeTestSourceDirectory>true</includeTestSourceDirectory>
+                        </configuration>
+                        <goals>
+                            <goal>check</goal>
+                        </goals>
+                    </execution>
+                </executions>
+                <dependencies>
+                    <dependency>
+                        <groupId>com.puppycrawl.tools</groupId>
+                        <artifactId>checkstyle</artifactId>
+                        <version>7.1</version>
+                    </dependency>
+                </dependencies>
+            </plugin>
+        </plugins>
+    </build>
 
-	<dependencies>
+    <dependencies>
+        <dependency>
+            <!-- Logging framework -->
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.7.21</version>
+        </dependency>
+        <dependency>
+            <!-- Implementation of slf4j -->
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <version>1.1.7</version>
+            <scope>runtime</scope>
+        </dependency>
+        <dependency>
+            <!-- Google's core Java libraries -->
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+            <version>19.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient</artifactId>
+            <version>4.4</version>
+            <scope>compile</scope>
+        </dependency>
 
-    <!-- Compile time Dependencies -->
+        <dependency>
+            <groupId>com.sleepycat</groupId>
+            <artifactId>je</artifactId>
+            <version>5.0.73</version>
+        </dependency>
 
-    <dependency>
-        <!-- Logging framework -->
-        <groupId>org.slf4j</groupId>
-        <artifactId>slf4j-api</artifactId>
-        <version>1.7.21</version>
-    </dependency>
-    <dependency>
-        <!-- Implementation of slf4j -->
-        <groupId>ch.qos.logback</groupId>
-        <artifactId>logback-classic</artifactId>
-        <version>1.1.7</version>
-        <scope>runtime</scope>
-    </dependency>
-    <dependency>
-        <!-- Google's core Java libraries -->
-        <groupId>com.google.guava</groupId>
-        <artifactId>guava</artifactId>
-        <version>19.0</version>
-    </dependency>
-	<dependency>
-		<groupId>org.apache.httpcomponents</groupId>
-		<artifactId>httpclient</artifactId>
-		<version>4.4</version>
-		<scope>compile</scope>
-	</dependency>
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-parsers</artifactId>
+            <version>1.5</version>
+        </dependency>
 
-	<dependency>
-		<groupId>com.sleepycat</groupId>
-		<artifactId>je</artifactId>
-		<version>5.0.73</version>
-	</dependency>
+        <!-- Test Dependencies -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.11</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
 
-	<dependency>
-		<groupId>org.apache.tika</groupId>
-		<artifactId>tika-parsers</artifactId>
-		<version>1.5</version>
-	</dependency>
-
-    <!-- Test Dependencies -->
-		<dependency>
-			<groupId>junit</groupId>
-			<artifactId>junit</artifactId>
-			<version>4.11</version>
-			<scope>test</scope>
-		</dependency>
-	</dependencies>
-
-	<repositories>
-		<repository>
-			<id>oracleReleases</id>
-			<name>Oracle Released Java Packages</name>
-			<url>http://download.oracle.com/maven</url>
-			<layout>default</layout>
-		</repository>
-	</repositories>
+    <repositories>
+        <repository>
+            <id>oracleReleases</id>
+            <name>Oracle Released Java Packages</name>
+            <url>http://download.oracle.com/maven</url>
+            <layout>default</layout>
+        </repository>
+    </repositories>
 
 </project>
diff --git a/src/main/java/edu/uci/ics/crawler4j/crawler/CrawlConfig.java b/src/main/java/edu/uci/ics/crawler4j/crawler/CrawlConfig.java
index 27ab1d3e4..cf2e68274 100644
--- a/src/main/java/edu/uci/ics/crawler4j/crawler/CrawlConfig.java
+++ b/src/main/java/edu/uci/ics/crawler4j/crawler/CrawlConfig.java
@@ -164,6 +164,14 @@ public class CrawlConfig {
      */
     private List<AuthInfo> authInfos;
 
+    /**
+     * Possibility to filter out certain parts of the html but
+     * will still follow the links in between the filter tags.
+     * eg. tag name "crawlerfilter", then all the html between those tags will not be
+     * processed in the text.
+     */
+    private String htmlFilterTag = null;
+
     /**
      * Validates the configs specified by this instance.
      *
@@ -502,6 +510,26 @@ public void setAuthInfos(List<AuthInfo> authInfos) {
         this.authInfos = authInfos;
     }
 
+    /**
+     *
+     * @return the HTML filter tag
+     */
+    public String getHtmlFilterTag() {
+        return htmlFilterTag;
+    }
+
+    /**
+     * Possibility to filter out certain parts of the html but
+     * will still follow the links in between the filter tags.
+     * eg. tag name "crawlerfilter", then all the html between those tags will not be
+     * processed in the text.
+     *
+     * @param htmlFilterTag String containing the html filter tag eg "crawler-filter"
+     */
+    public void setHtmlFilterTag(String htmlFilterTag) {
+        this.htmlFilterTag = htmlFilterTag.toLowerCase();
+    }
+
     @Override
     public String toString() {
         StringBuilder sb = new StringBuilder();
@@ -523,6 +551,7 @@ public String toString() {
         sb.append("Proxy port: " + getProxyPort() + "\n");
         sb.append("Proxy username: " + getProxyUsername() + "\n");
         sb.append("Proxy password: " + getProxyPassword() + "\n");
+        sb.append("HTML filter tags: " + getHtmlFilterTag() + "\n");
         return sb.toString();
     }
 }
diff --git a/src/main/java/edu/uci/ics/crawler4j/parser/HTMLSchema.java b/src/main/java/edu/uci/ics/crawler4j/parser/HTMLSchema.java
new file mode 100644
index 000000000..c85a2c392
--- /dev/null
+++ b/src/main/java/edu/uci/ics/crawler4j/parser/HTMLSchema.java
@@ -0,0 +1,12 @@
+package edu.uci.ics.crawler4j.parser;
+
+public class HTMLSchema extends org.ccil.cowan.tagsoup.HTMLSchema {
+
+    public HTMLSchema(String htmlFilterTag) {
+        super();
+        if (htmlFilterTag != null && !htmlFilterTag.isEmpty()) {
+            elementType(htmlFilterTag, M_PCDATA | M_INLINE | M_BLOCK, M_BLOCK, 0);
+            parent(htmlFilterTag, "body");
+        }
+    }
+}
diff --git a/src/main/java/edu/uci/ics/crawler4j/parser/HtmlContentHandler.java b/src/main/java/edu/uci/ics/crawler4j/parser/HtmlContentHandler.java
index 54cb5d74d..3e1726e10 100644
--- a/src/main/java/edu/uci/ics/crawler4j/parser/HtmlContentHandler.java
+++ b/src/main/java/edu/uci/ics/crawler4j/parser/HtmlContentHandler.java
@@ -22,12 +22,16 @@
 import java.util.List;
 import java.util.Map;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
 
 public class HtmlContentHandler extends DefaultHandler {
 
+    protected static final Logger logger = LoggerFactory.getLogger(HtmlContentHandler.class);
+
     private static final int MAX_ANCHOR_LENGTH = 100;
 
     private enum Element {
@@ -71,11 +75,14 @@ public static Element getElement(String name) {
     private ExtractedUrlAnchorPair curUrl = null;
     private boolean anchorFlag = false;
     private final StringBuilder anchorText = new StringBuilder();
+    private String htmlFilterTag = null;
+    private boolean isWithinFilteredHtml = false;
 
-    public HtmlContentHandler() {
+    public HtmlContentHandler(String htmlFilterTag) {
         isWithinBodyElement = false;
         bodyText = new StringBuilder();
         outgoingUrls = new ArrayList<>();
+        this.htmlFilterTag = htmlFilterTag;
     }
 
     @Override
@@ -83,6 +90,10 @@ public void startElement(String uri, String localName, String qName, Attributes
         throws SAXException {
         Element element = HtmlFactory.getElement(localName);
 
+        if (htmlFilterTag != null && localName.equals(htmlFilterTag)) {
+            isWithinFilteredHtml = true;
+        }
+
         if ((element == Element.A) || (element == Element.AREA) || (element == Element.LINK)) {
             String href = attributes.getValue("href");
             if (href != null) {
@@ -151,6 +162,10 @@ private void addToOutgoingUrls(String href, String tag) {
     @Override
     public void endElement(String uri, String localName, String qName) throws SAXException {
         Element element = HtmlFactory.getElement(localName);
+        if (htmlFilterTag != null && localName.equals(htmlFilterTag)) {
+            isWithinFilteredHtml = false;
+        }
+
         if ((element == Element.A) || (element == Element.AREA) || (element == Element.LINK)) {
             anchorFlag = false;
             if (curUrl != null) {
@@ -173,7 +188,7 @@ public void endElement(String uri, String localName, String qName) throws SAXExc
 
     @Override
     public void characters(char[] ch, int start, int length) throws SAXException {
-        if (isWithinBodyElement) {
+        if (isWithinBodyElement && !isWithinFilteredHtml) {
             if (bodyText.length() > 0) {
                 bodyText.append(' ');
             }
diff --git a/src/main/java/edu/uci/ics/crawler4j/parser/HtmlContentMapper.java b/src/main/java/edu/uci/ics/crawler4j/parser/HtmlContentMapper.java
new file mode 100644
index 000000000..40a4d009e
--- /dev/null
+++ b/src/main/java/edu/uci/ics/crawler4j/parser/HtmlContentMapper.java
@@ -0,0 +1,25 @@
+package edu.uci.ics.crawler4j.parser;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.tika.parser.html.DefaultHtmlMapper;
+
+public class HtmlContentMapper extends DefaultHtmlMapper {
+    private static Map<String, String> customSafeElements = new LinkedHashMap<>();
+
+    public HtmlContentMapper(String htmlFilterTag) {
+        if (htmlFilterTag != null) {
+            customSafeElements.put(htmlFilterTag.toUpperCase(), htmlFilterTag.toLowerCase());
+        }
+    }
+
+    @Override
+    public String mapSafeElement(String name) {
+        String mapSafeElement = super.mapSafeElement(name);
+        if (customSafeElements.size() > 0 && mapSafeElement == null) {
+            mapSafeElement = customSafeElements.get(name);
+        }
+        return mapSafeElement;
+    }
+}
diff --git a/src/main/java/edu/uci/ics/crawler4j/parser/Parser.java b/src/main/java/edu/uci/ics/crawler4j/parser/Parser.java
index fbc065bde..8a9842656 100644
--- a/src/main/java/edu/uci/ics/crawler4j/parser/Parser.java
+++ b/src/main/java/edu/uci/ics/crawler4j/parser/Parser.java
@@ -27,7 +27,9 @@
 import org.apache.tika.metadata.DublinCore;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.html.HtmlMapper;
 import org.apache.tika.parser.html.HtmlParser;
+import org.ccil.cowan.tagsoup.Schema;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -91,8 +93,11 @@ public void parse(Page page, String contextURL)
             }
         } else { // isHTML
             Metadata metadata = new Metadata();
-            HtmlContentHandler contentHandler = new HtmlContentHandler();
+            HtmlContentHandler contentHandler = new HtmlContentHandler(config.getHtmlFilterTag());
             try (InputStream inputStream = new ByteArrayInputStream(page.getContentData())) {
+                parseContext.set(Schema.class, new HTMLSchema(config.getHtmlFilterTag()));
+                parseContext.set(HtmlMapper.class,
+                                 new HtmlContentMapper(config.getHtmlFilterTag()));
                 htmlParser.parse(inputStream, contentHandler, metadata, parseContext);
             } catch (Exception e) {
                 logger.error("{}, while parsing: {}", e.getMessage(), page.getWebURL().getURL());
diff --git a/src/test/java/edu/uci/ics/crawler4j/examples/basic/BasicCrawlController.java b/src/test/java/edu/uci/ics/crawler4j/examples/basic/BasicCrawlController.java
index 7ffb34747..d3ecffca1 100644
--- a/src/test/java/edu/uci/ics/crawler4j/examples/basic/BasicCrawlController.java
+++ b/src/test/java/edu/uci/ics/crawler4j/examples/basic/BasicCrawlController.java
@@ -98,6 +98,17 @@ public static void main(String[] args) throws Exception {
      */
         config.setResumableCrawling(false);
 
+    /*
+     * This config parameter is used to filter out certain parts of the text.
+     * When text / links are between the <crawlfilter>...</crawlfilter> tag,
+     * the text will not be added to the added to the Text.
+     * This will not affect the list of links, they will still be processed.
+     * Use case: to exclude the content of the navigation or footer put still
+     * want to follow the links.
+     * This will not work on the given seeds but is to give an idea for future projects
+     */
+        config.setHtmlFilterTag("crawlerfilter");
+
     /*
      * Instantiate the controller for this crawl.
      */
diff --git a/src/test/java/edu/uci/ics/crawler4j/examples/basic/BasicCrawler.java b/src/test/java/edu/uci/ics/crawler4j/examples/basic/BasicCrawler.java
index a49c1f353..eb7e34e20 100644
--- a/src/test/java/edu/uci/ics/crawler4j/examples/basic/BasicCrawler.java
+++ b/src/test/java/edu/uci/ics/crawler4j/examples/basic/BasicCrawler.java
@@ -79,8 +79,13 @@ public void visit(Page page) {
             Set<WebURL> links = htmlParseData.getOutgoingUrls();
 
             logger.debug("Text length: {}", text.length());
+            logger.debug("Text : {}", text);
             logger.debug("Html length: {}", html.length());
+            logger.debug("Html : {}", html);
             logger.debug("Number of outgoing links: {}", links.size());
+            for (WebURL link : links) {
+                logger.debug("Link : {}", link.getURL());
+            }
         }
 
         Header[] responseHeaders = page.getFetchResponseHeaders();
diff --git a/src/test/java/edu/uci/ics/crawler4j/tests/HtmlContentHandlerTest.java b/src/test/java/edu/uci/ics/crawler4j/tests/HtmlContentHandlerTest.java
index 3c2a41827..34b6bd6a8 100644
--- a/src/test/java/edu/uci/ics/crawler4j/tests/HtmlContentHandlerTest.java
+++ b/src/test/java/edu/uci/ics/crawler4j/tests/HtmlContentHandlerTest.java
@@ -6,39 +6,46 @@
 
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.html.HtmlMapper;
 import org.apache.tika.parser.html.HtmlParser;
+import org.ccil.cowan.tagsoup.Schema;
 import org.junit.Test;
 
+import edu.uci.ics.crawler4j.parser.HTMLSchema;
 import edu.uci.ics.crawler4j.parser.HtmlContentHandler;
+import edu.uci.ics.crawler4j.parser.HtmlContentMapper;
 
 public class HtmlContentHandlerTest {
 
     private HtmlParser parser = new HtmlParser();
     private ParseContext parseContext = new ParseContext();
 
-    private HtmlContentHandler parseHtml(String html) throws Exception {
+    private HtmlContentHandler parseHtml(String html, String htmlFilterTag) throws Exception {
         ByteArrayInputStream bais = new ByteArrayInputStream(html.getBytes());
         Metadata metadata = new Metadata();
-        HtmlContentHandler contentHandler = new HtmlContentHandler();
+        parseContext.set(Schema.class, new HTMLSchema(htmlFilterTag));
+        parseContext.set(HtmlMapper.class, new HtmlContentMapper(htmlFilterTag));
+        HtmlContentHandler contentHandler = new HtmlContentHandler(htmlFilterTag);
         parser.parse(bais, contentHandler, metadata, parseContext);
         return contentHandler;
     }
 
     @Test
     public void testEmpty() throws Exception {
-        HtmlContentHandler parse = parseHtml("<html></html>");
+        HtmlContentHandler parse = parseHtml("<html></html>", null);
         assertEquals("", parse.getBodyText());
     }
 
     @Test
     public void testParaInBody() throws Exception {
-        HtmlContentHandler parse = parseHtml("<html><body><p>Hello there</p></html>");
+        HtmlContentHandler parse = parseHtml("<html><body><p>Hello there</p></html>", null);
         assertEquals("Hello there", parse.getBodyText());
     }
 
     @Test
     public void test2ParaInBody() throws Exception {
-        HtmlContentHandler parse = parseHtml("<html><body><p>Hello there</p><p>mr</p></html>");
+        HtmlContentHandler parse =
+            parseHtml("<html><body><p>Hello there</p><p>mr</p></html>", null);
         assertEquals("Hello there mr", parse.getBodyText());
     }
 
@@ -46,8 +53,16 @@ public void test2ParaInBody() throws Exception {
     public void testTableInBody() throws Exception {
         HtmlContentHandler parse = parseHtml(
             "<html><body><table><tr><th>Hello</th><th>there</th></tr>" +
-            "<tr><td>mr</td><td>bear</td></tr></html>");
+            "<tr><td>mr</td><td>bear</td></tr></html>", null);
         assertEquals("Hello there mr bear", parse.getBodyText());
     }
 
+    @Test
+    public void testFilterHtmlTagInBody() throws Exception {
+        HtmlContentHandler parse = parseHtml(
+            "<html><body><p>Hello there</p><crawlfilter>should not be in </crawlfilter></html>",
+            "crawlfilter");
+        assertEquals("Hello there", parse.getBodyText());
+    }
+
 }
diff --git a/src/test/resources/log4j.xml b/src/test/resources/log4j.xml
new file mode 100755
index 000000000..a4ece93a4
--- /dev/null
+++ b/src/test/resources/log4j.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
+<log4j:configuration debug="true"
+                     xmlns:log4j='http://jakarta.apache.org/log4j/'>
+
+    <appender name="console" class="org.apache.log4j.ConsoleAppender">
+        <layout class="org.apache.log4j.PatternLayout">
+            <param name="ConversionPattern" value="%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n"/>
+        </layout>
+    </appender>
+
+    <root>
+        <level value="DEBUG"/>
+        <appender-ref ref="console"/>
+    </root>
+
+</log4j:configuration>
\ No newline at end of file