-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Reorganised tests for different models
- Loading branch information
Showing
4 changed files
with
249 additions
and
240 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
defmodule SimpleBayes.BernoulliTest do | ||
use ExUnit.Case, async: true | ||
|
||
describe "Bernoulli" do | ||
# http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html | ||
test "China yes or no" do | ||
result = SimpleBayes.init(model: :bernoulli) | ||
|> SimpleBayes.train(:yes, "Chinese Beijing Chinese") | ||
|> SimpleBayes.train(:yes, "Chinese Chinese Shanghai") | ||
|> SimpleBayes.train(:yes, "Chinese Macao") | ||
|> SimpleBayes.train(:no, "Tokyo Japan Chinese") | ||
|> SimpleBayes.classify("Chinese Chinese Chinese Tokyo Japan") | ||
|
||
assert result[:yes] == 324/62500 | ||
assert result[:no] == 64/2916 | ||
end | ||
|
||
test "binary word counting" do | ||
result = SimpleBayes.init(model: :bernoulli) | ||
|> SimpleBayes.train(:apple, "red red green fruit") | ||
|> SimpleBayes.train(:banana, "yellow green fruit") | ||
|> SimpleBayes.train(:orange, "orange yellow fruit") | ||
|> SimpleBayes.classify("red yellow fruit") | ||
|
||
assert result[:apple] == result[:banana] | ||
assert result[:apple] == result[:orange] | ||
end | ||
|
||
test "stop words" do | ||
result = SimpleBayes.init(model: :bernoulli) | ||
|> SimpleBayes.train(:apple, "it is so red") | ||
|> SimpleBayes.train(:banana, "it is a bit yellow") | ||
|> SimpleBayes.classify_one("it is so much yellow") | ||
|
||
assert result == :banana | ||
end | ||
|
||
test "only stop words" do | ||
result = SimpleBayes.init(model: :bernoulli) | ||
|> SimpleBayes.train(:apple, "is so much") | ||
|> SimpleBayes.train(:banana, "it so much") | ||
|> SimpleBayes.classify("it is so much yellow") | ||
|
||
assert result[:apple] == result[:banana] | ||
end | ||
|
||
test "smoothing - should be ignored" do | ||
result = SimpleBayes.init(model: :bernoulli, smoothing: 1) | ||
|> SimpleBayes.train("apple", "red") | ||
|> SimpleBayes.train("banana", "yellow") | ||
|> SimpleBayes.train("orange", "orange") | ||
|> SimpleBayes.classify("red") | ||
|> Keyword.keys() | ||
|
||
assert result == ["apple", "orange", "banana"] | ||
end | ||
|
||
test "ordering" do | ||
result = SimpleBayes.init(model: :bernoulli) | ||
|> SimpleBayes.train(:apple, "red green orange") | ||
|> SimpleBayes.train(:banana, "red green") | ||
|> SimpleBayes.train(:orange, "red orange") | ||
|> SimpleBayes.classify("red green orange") | ||
|> Keyword.keys() | ||
|
||
assert result == [:apple, :orange, :banana] | ||
end | ||
|
||
test "keywords weighting - should be ignored" do | ||
result = SimpleBayes.init(model: :bernoulli) | ||
|> SimpleBayes.train(:apple, "red", weight: 100) | ||
|> SimpleBayes.train(:banana, "red", weight: 0.01) | ||
|> SimpleBayes.train(:orange, "red", weight: 10) | ||
|> SimpleBayes.classify("red") | ||
|> Keyword.keys() | ||
|
||
assert result != [:apple, :orange, :banana] | ||
end | ||
|
||
test "IDF (Inverse Document Frequency) - should be ignored" do | ||
result = SimpleBayes.init(model: :bernoulli) | ||
|> SimpleBayes.train(:apple, "red red fruit") | ||
|> SimpleBayes.train(:banana, "yellow yellow fruit") | ||
|> SimpleBayes.train(:orange, "orange yellow fruit") | ||
|> SimpleBayes.classify("red yellow fruit") | ||
|
||
assert result[:apple] == result[:banana] | ||
end | ||
|
||
test "stemming" do | ||
result = SimpleBayes.init(model: :bernoulli, stem: true) | ||
|> SimpleBayes.train(:apple, "buying apple") | ||
|> SimpleBayes.train(:banana, "buy banana") | ||
|> SimpleBayes.classify("buy apple") | ||
|
||
assert result[:apple] > result[:banana] | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
defmodule SimpleBayes.BinarizedMultinomialTest do | ||
use ExUnit.Case, async: true | ||
|
||
describe "Binarized multinomial" do | ||
test "binary word counting" do | ||
result = SimpleBayes.init(model: :binarized_multinomial) | ||
|> SimpleBayes.train(:apple, "red red green fruit") | ||
|> SimpleBayes.train(:banana, "yellow green fruit") | ||
|> SimpleBayes.train(:orange, "orange yellow fruit") | ||
|> SimpleBayes.classify("red yellow fruit") | ||
|
||
assert result[:apple] == result[:banana] | ||
assert result[:apple] == result[:orange] | ||
end | ||
|
||
test "stop words" do | ||
result = SimpleBayes.init(model: :binarized_multinomial) | ||
|> SimpleBayes.train(:apple, "it is so red") | ||
|> SimpleBayes.train(:banana, "it is a bit yellow") | ||
|> SimpleBayes.classify_one("it is so much yellow") | ||
|
||
assert result == :banana | ||
end | ||
|
||
test "only stop words" do | ||
result = SimpleBayes.init(model: :binarized_multinomial) | ||
|> SimpleBayes.train(:apple, "is so much") | ||
|> SimpleBayes.train(:banana, "it so much") | ||
|> SimpleBayes.classify("it is so much yellow") | ||
|
||
assert result[:apple] == result[:banana] | ||
end | ||
|
||
test "smoothing - should be ignored" do | ||
result = SimpleBayes.init(model: :binarized_multinomial, smoothing: 1) | ||
|> SimpleBayes.train("apple", "red") | ||
|> SimpleBayes.train("banana", "yellow") | ||
|> SimpleBayes.train("orange", "orange") | ||
|> SimpleBayes.classify("red") | ||
|> Keyword.keys() | ||
|
||
assert result == ["apple", "orange", "banana"] | ||
end | ||
|
||
test "ordering" do | ||
result = SimpleBayes.init(model: :binarized_multinomial) | ||
|> SimpleBayes.train(:apple, "red green orange") | ||
|> SimpleBayes.train(:banana, "red green") | ||
|> SimpleBayes.train(:orange, "red orange") | ||
|> SimpleBayes.classify("red green orange") | ||
|> Keyword.keys() | ||
|
||
assert result == [:apple, :orange, :banana] | ||
end | ||
|
||
test "keywords weighting - should be ignored" do | ||
result = SimpleBayes.init(model: :binarized_multinomial) | ||
|> SimpleBayes.train(:apple, "red", weight: 100) | ||
|> SimpleBayes.train(:banana, "red", weight: 0.01) | ||
|> SimpleBayes.train(:orange, "red", weight: 10) | ||
|> SimpleBayes.classify("red") | ||
|> Keyword.keys() | ||
|
||
assert result != [:apple, :orange, :banana] | ||
end | ||
|
||
test "IDF (Inverse Document Frequency) - should be ignored" do | ||
result = SimpleBayes.init(model: :binarized_multinomial) | ||
|> SimpleBayes.train(:apple, "red red fruit") | ||
|> SimpleBayes.train(:banana, "yellow yellow fruit") | ||
|> SimpleBayes.train(:orange, "orange yellow fruit") | ||
|> SimpleBayes.classify("red yellow fruit") | ||
|
||
assert result[:apple] == result[:banana] | ||
end | ||
|
||
test "stemming" do | ||
result = SimpleBayes.init(model: :binarized_multinomial, stem: true) | ||
|> SimpleBayes.train(:apple, "buying apple") | ||
|> SimpleBayes.train(:banana, "buy banana") | ||
|> SimpleBayes.classify("buy apple") | ||
|
||
assert result[:apple] > result[:banana] | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
defmodule SimpleBayes.MultinomialTest do | ||
use ExUnit.Case, async: true | ||
|
||
describe "multinomial" do | ||
test "stop words" do | ||
result = SimpleBayes.init | ||
|> SimpleBayes.train(:apple, "it is so red") | ||
|> SimpleBayes.train(:banana, "it is a bit yellow") | ||
|> SimpleBayes.classify_one("it is so much yellow") | ||
|
||
assert result == :banana | ||
end | ||
|
||
test "only stop words" do | ||
result = SimpleBayes.init | ||
|> SimpleBayes.train(:apple, "is so much") | ||
|> SimpleBayes.train(:banana, "it so much") | ||
|> SimpleBayes.classify("it is so much yellow") | ||
|
||
assert result[:apple] == result[:banana] | ||
end | ||
|
||
test "smoothing" do | ||
result = SimpleBayes.init(smoothing: 1) | ||
|> SimpleBayes.train("apple", "red") | ||
|> SimpleBayes.train("banana", "yellow") | ||
|> SimpleBayes.train("orange", "orange") | ||
|> SimpleBayes.classify("red") | ||
|> Keyword.keys() | ||
|
||
assert result == ["orange", "banana", "apple"] | ||
end | ||
|
||
test "ordering" do | ||
result = SimpleBayes.init | ||
|> SimpleBayes.train(:apple, "red", weight: 100) | ||
|> SimpleBayes.train(:banana, "red", weight: 0.01) | ||
|> SimpleBayes.train(:orange, "red", weight: 10) | ||
|> SimpleBayes.classify("red") | ||
|> Keyword.keys() | ||
|
||
assert result == [:apple, :orange, :banana] | ||
end | ||
|
||
test "IDF (Inverse Document Frequency)" do | ||
result = SimpleBayes.init | ||
|> SimpleBayes.train(:apple, "red red fruit") | ||
|> SimpleBayes.train(:banana, "yellow yellow fruit") | ||
|> SimpleBayes.train(:orange, "orange yellow fruit") | ||
|> SimpleBayes.classify("red yellow fruit") | ||
|
||
assert result[:apple] > result[:banana] | ||
end | ||
|
||
test "stemming" do | ||
result = SimpleBayes.init(stem: true) | ||
|> SimpleBayes.train(:apple, "buying apple") | ||
|> SimpleBayes.train(:banana, "buy banana") | ||
|> SimpleBayes.classify("buy apple") | ||
|
||
assert result[:apple] > result[:banana] | ||
end | ||
end | ||
end |
Oops, something went wrong.