Skip to content

Commit

Permalink
Implemented file system storage
Browse files Browse the repository at this point in the history
  • Loading branch information
fredwu committed Jul 25, 2016
1 parent 5a01aba commit 0f58444
Show file tree
Hide file tree
Showing 13 changed files with 210 additions and 24 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ erl_crash.dump

# Also ignore archive artifacts (built via "mix archive.build").
*.ez

/test/temp/*.txt
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Simple Bayes Changelog

## master

- Introduced the storage mechanism (defaults to `:memory`)
- Implemented file system storage

## v0.7.1 [2016-07-23]

- Adjusted the `:smoothing` option
Expand Down
35 changes: 34 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ For application wide configuration, in your application's `config/config.exs`:

```elixir
config :simple_bayes, model: :multinomial
config :simple_bayes, storage: :memory
config :simple_bayes, default_weight: 1
config :simple_bayes, smoothing: 0
config :simple_bayes, stem: false
Expand All @@ -109,19 +110,51 @@ Alternatively, you may pass in the configuration options when you initialise:
```elixir
SimpleBayes.init(
model: :multinomial,
storage: :memory,
default_weight: 1,
smoothing: 0,
stem: false,
stop_words: []
)
```

Available options for `:model` are:
#### Available options for `:model` are:

- `:multinomial` (default)
- `:binarized_multinomial`
- `:bernoulli`

#### Available options for `:storage` are:

- `:memory` (default)
- `:file_system`

Some storage options have extra configurations available via `:storage_config`:

- `:memory`
- `:namespace` - optional, it's only useful when you want to `load` by the namespace
- `:file_system`
- `:file_path`

Please use `:storage_config` when setting application-wide configuration, otherwise you may use the varies configuration options directly. See below for some examples.

```elixir
config :simple_bayes, storage: :file_system
config :simple_bayes, storage_config: [file_path: "path/to/the/file.txt"]

SimpleBayes.init(
storage: :file_system,
storage_config: [
file_path: "path/to/the/file.txt"
]
)

SimpleBayes.init(
storage: :file_system,
file_path: "path/to/the/file.txt"
)
```

## Changelog

Please see [CHANGELOG.md](CHANGELOG.md).
Expand Down
6 changes: 6 additions & 0 deletions lib/simple_bayes.ex
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ defmodule SimpleBayes do
@storages[opts[:storage]].init(struct, opts)
end

def save(pid) do
struct = Agent.get(pid, &(&1))

@storages[struct.opts[:storage]].save(pid, struct)
end

def load(opts \\ []) do
@storages[opts[:storage]].load(opts)
end
Expand Down
15 changes: 15 additions & 0 deletions lib/simple_bayes/data.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
defmodule SimpleBayes.Data do
def encode(value, opts \\ []) do
value
|> Kernel.inspect(limit: 1_000_000_000)
|> Base.encode64(opts)
end

def decode(value, opts \\ []) do
{struct, _} = value
|> Base.decode64!(opts)
|> Code.eval_string()

struct
end
end
3 changes: 2 additions & 1 deletion lib/simple_bayes/storage/behaviour.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
defmodule SimpleBayes.Storage.Behaviour do
@callback init(%SimpleBayes{}, Keyword.t) :: pid
@callback load(Keyword.t) :: pid
@callback save(pid, Keyword.t) :: {:ok, pid}
@callback load(Keyword.t) :: pid
end
29 changes: 29 additions & 0 deletions lib/simple_bayes/storage/file_system.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
defmodule SimpleBayes.Storage.FileSystem do
@behaviour SimpleBayes.Storage.Behaviour

alias SimpleBayes.Data

def init(struct, _opts) do
{:ok, pid} = Agent.start_link(fn -> struct end)

pid
end

def save(pid, struct) do
File.write!(file_path(struct.opts), Data.encode(struct))

pid
end

def load(opts) do
struct = file_path(opts)
|> File.read!()
|> Data.decode()

init(struct, opts)
end

defp file_path(opts) do
opts[:file_path] || opts[:storage_config][:file_path]
end
end
4 changes: 4 additions & 0 deletions lib/simple_bayes/storage/memory.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ defmodule SimpleBayes.Storage.Memory do
pid
end

def save(pid, _data) do
pid
end

def load(opts) do
namespace(opts)
end
Expand Down
4 changes: 2 additions & 2 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ defmodule SimpleBayes.Mixfile do

defp deps do
[
{:ex_doc, ">= 0.0.0", only: :dev},
{:stemmer, "~> 1.0.0-beta.1"},
{:ex_doc, ">= 0.0.0", only: :dev},
{:stemmer, "~> 1.0.0-beta.1"}
]
end

Expand Down
4 changes: 2 additions & 2 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
%{"earmark": {:hex, :earmark, "0.2.1", "ba6d26ceb16106d069b289df66751734802777a3cbb6787026dd800ffeb850f3", [:mix], []},
"ex_doc": {:hex, :ex_doc, "0.12.0", "b774aabfede4af31c0301aece12371cbd25995a21bb3d71d66f5c2fe074c603f", [:mix], [{:earmark, "~> 0.2", [hex: :earmark, optional: false]}]},
%{"earmark": {:hex, :earmark, "1.0.1", "2c2cd903bfdc3de3f189bd9a8d4569a075b88a8981ded9a0d95672f6e2b63141", [:mix], []},
"ex_doc": {:hex, :ex_doc, "0.13.0", "aa2f8fe4c6136a2f7cfc0a7e06805f82530e91df00e2bff4b4362002b43ada65", [:mix], [{:earmark, "~> 1.0", [hex: :earmark, optional: false]}]},
"stemmer": {:hex, :stemmer, "1.0.0-beta.1", "17112a69e72953e80985068ce68a77413de7e9adfe17204f23df1a1dc69fa7de", [:mix], []}}
87 changes: 87 additions & 0 deletions test/simple_bayes/storage/file_system_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
defmodule SimpleBayes.Storage.FileSystemTest do
use ExUnit.Case, async: true

doctest SimpleBayes.Storage.FileSystem

describe "file system storage" do
setup do
opts = [
storage: :file_system,
storage_config: [file_path: "test/temp/file_sysmte_test.txt"]
]

SimpleBayes.init(opts)
|> SimpleBayes.train(:apple, "red sweet")
|> SimpleBayes.train(:apple, "green", weight: 0.5)
|> SimpleBayes.train(:apple, "round", weight: 2)
|> SimpleBayes.train(:banana, "sweet")
|> SimpleBayes.save()

SimpleBayes.load(opts)
|> SimpleBayes.train(:banana, "green", weight: 0.5)
|> SimpleBayes.train(:banana, "yellow long", weight: 2)
|> SimpleBayes.train(:orange, "red")
|> SimpleBayes.train(:orange, "yellow sweet", weight: 0.5)
|> SimpleBayes.train(:orange, "round", weight: 2)
|> SimpleBayes.save()

{:ok, opts: opts}
end

test "README example on .classify", meta do
result = SimpleBayes.load(meta.opts)
|> SimpleBayes.classify("Maybe green maybe red but definitely round and sweet")
|> Keyword.keys()

assert result == [:apple, :orange, :banana]
end

test "README example on .classify_one", meta do
result = SimpleBayes.load(meta.opts)
|> SimpleBayes.classify_one("Maybe green maybe red but definitely round and sweet")

assert result == :apple
end
end

describe "file system storage with flattened configuration options" do
setup do
opts = [
storage: :file_system,
file_path: "test/temp/file_sysmte_test.txt"
]

SimpleBayes.init(opts)
|> SimpleBayes.train(:apple, "red sweet")
|> SimpleBayes.train(:apple, "green", weight: 0.5)
|> SimpleBayes.train(:apple, "round", weight: 2)
|> SimpleBayes.train(:banana, "sweet")
|> SimpleBayes.save()

SimpleBayes.load(opts)
|> SimpleBayes.train(:banana, "green", weight: 0.5)
|> SimpleBayes.train(:banana, "yellow long", weight: 2)
|> SimpleBayes.train(:orange, "red")
|> SimpleBayes.train(:orange, "yellow sweet", weight: 0.5)
|> SimpleBayes.train(:orange, "round", weight: 2)
|> SimpleBayes.save()

{:ok, opts: opts}
end

test "README example on .classify", meta do
result = SimpleBayes.load(meta.opts)
|> SimpleBayes.classify("Maybe green maybe red but definitely round and sweet")
|> Keyword.keys()

assert result == [:apple, :orange, :banana]
end

test "README example on .classify_one", meta do
result = SimpleBayes.load(meta.opts)
|> SimpleBayes.classify_one("Maybe green maybe red but definitely round and sweet")

assert result == :apple
end
end
end
40 changes: 22 additions & 18 deletions test/simple_bayes/storage/memory_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,19 @@ defmodule SimpleBayes.Storage.MemoryTest do
]

SimpleBayes.init(opts)
|> SimpleBayes.train(:apple, "red sweet")
|> SimpleBayes.train(:apple, "green", weight: 0.5)
|> SimpleBayes.train(:apple, "round", weight: 2)
|> SimpleBayes.train(:banana, "sweet")
|> SimpleBayes.train(:apple, "red sweet")
|> SimpleBayes.train(:apple, "green", weight: 0.5)
|> SimpleBayes.train(:apple, "round", weight: 2)
|> SimpleBayes.train(:banana, "sweet")
|> SimpleBayes.save()

SimpleBayes.load(opts)
|> SimpleBayes.train(:banana, "green", weight: 0.5)
|> SimpleBayes.train(:banana, "yellow long", weight: 2)
|> SimpleBayes.train(:orange, "red")
|> SimpleBayes.train(:orange, "yellow sweet", weight: 0.5)
|> SimpleBayes.train(:orange, "round", weight: 2)
|> SimpleBayes.train(:banana, "green", weight: 0.5)
|> SimpleBayes.train(:banana, "yellow long", weight: 2)
|> SimpleBayes.train(:orange, "red")
|> SimpleBayes.train(:orange, "yellow sweet", weight: 0.5)
|> SimpleBayes.train(:orange, "round", weight: 2)
|> SimpleBayes.save()

{:ok, opts: opts}
end
Expand Down Expand Up @@ -50,17 +52,19 @@ defmodule SimpleBayes.Storage.MemoryTest do
]

SimpleBayes.init(opts)
|> SimpleBayes.train(:apple, "red sweet")
|> SimpleBayes.train(:apple, "green", weight: 0.5)
|> SimpleBayes.train(:apple, "round", weight: 2)
|> SimpleBayes.train(:banana, "sweet")
|> SimpleBayes.train(:apple, "red sweet")
|> SimpleBayes.train(:apple, "green", weight: 0.5)
|> SimpleBayes.train(:apple, "round", weight: 2)
|> SimpleBayes.train(:banana, "sweet")
|> SimpleBayes.save()

SimpleBayes.load(opts)
|> SimpleBayes.train(:banana, "green", weight: 0.5)
|> SimpleBayes.train(:banana, "yellow long", weight: 2)
|> SimpleBayes.train(:orange, "red")
|> SimpleBayes.train(:orange, "yellow sweet", weight: 0.5)
|> SimpleBayes.train(:orange, "round", weight: 2)
|> SimpleBayes.train(:banana, "green", weight: 0.5)
|> SimpleBayes.train(:banana, "yellow long", weight: 2)
|> SimpleBayes.train(:orange, "red")
|> SimpleBayes.train(:orange, "yellow sweet", weight: 0.5)
|> SimpleBayes.train(:orange, "round", weight: 2)
|> SimpleBayes.save()

{:ok, opts: opts}
end
Expand Down
Empty file added test/temp/.gitkeep
Empty file.

0 comments on commit 0f58444

Please sign in to comment.