Skip to content
1 change: 1 addition & 0 deletions src/SOM.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ using Distances
using ProgressMeter
using StatsBase
using Distributions
using NearestNeighbors
#using TensorToolbox
using LinearAlgebra
# if VERSION < v"0.7.0-DEV.5183"
Expand Down
8 changes: 8 additions & 0 deletions src/helpers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ function findWinner(cod, sampl)
return winner
end

function findWinner(kd_tree::KDTree, sampl)
idxs, _ = knn(kd_tree, sampl, 1)
idxs[1]
end

function buildKDTree(codes)
permutedims(codes,(2,1)) |> KDTree
end

"""
normTrainData(x, normParams)
Expand Down
9 changes: 7 additions & 2 deletions src/soms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,21 @@ end
Return the index of the winner neuron for each training pattern
in x (row-wise).
"""
function visual(codes, x)

function visualBruteForce(codes, x)

vis = zeros(Int, nrow(x))
for i in 1:nrow(x)
@time for i in 1:nrow(x)
vis[i] = findWinner(codes, [x[i, col] for col in 1:size(x, 2)])
end

return(vis)
end

function visual(codes, x)
kd_tree = buildKDTree(codes)
visualBruteForce(kd_tree, x)
end

"""
makePopulation(nCodes, vis)
Expand Down
88 changes: 88 additions & 0 deletions test/benchmark.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
using Primes

function generate_integer_hash_function(a,b,cardinality)
# cardinality must be prime number
# a,b: 1 ≤ a ≤ p−1 , 0 ≤ b ≤ p−1
@assert a < cardinality
@assert a > 0
@assert b < cardinality
@assert b >= 0
@assert isprime(cardinality)
m = cardinality
x -> (x^a%m + b)%m #TODO use memory-efficient modular exponentiation for better performance
end

"""
# function generate_vector_deterministic_pseudorandom

Generates a pseudorandom vector. For the same hash function it always generates the same vector.
If the hash function is an integer hash from a universal family, the elements of the vector will
obey a continuous uniform distribution.
# Arguments:
- `seed`: A random number chosen by the user to ensure the result is deterministic
- `hash_func`: An integer hash function sampled from a universal family
- `dimension`: number of elements in the vector
"""
function generate_vector_deterministic_pseudorandom(seed,hash_func,dimension)
[hash_func(x+seed) for x in 1:dimension]
end


"""
# function generate_dataset

Generates a pseudorandom dataset that can be used for benchmarking SOMs.
The elements in the vectors are distributed uniformly between 0 and `max_value`.

# Arguments:
- `number`: Number of rows/entries/vectors/individual iris plants in the dataset
- `dimension`: Number of dimensions/elements in each vector
- `max_value`: The maximum value that an element in a vector can have
- `y`: an optional function to map the uniformly distributed elements to a desired non-uniform distribution
- `params`: Randomness parameters chosen by user to ensure deterministic result
"""
function generate_dataset(number, dimension, max_value, y=(x->x), params = Dict(:a => 13, :b => 17, :hash_seed => 23, :prime => 1013))
hash_func = generate_integer_hash_function(params[:a],params[:b],params[:prime])

seed = params[:hash_seed]
matrix = zeros((number,dimension))
for i in 1:number
vector = generate_vector_deterministic_pseudorandom(seed,hash_func,dimension)
vector = map((x->x*max_value/params[:prime]),vector)
vector = map((x->(x+max_value)/2),vector)
vector = map(y,vector)
seed = hash_func(seed+i)
matrix[i,:] = vector
end
convert(DataFrame,matrix)
end



# Benchmarking functions
# ======================

function benchmark_init(train, topol; toroidal = false,
normType = :zscore)

xdim = 10
ydim = 10

initSOM(train, xdim, ydim, norm = normType,
topol = topol, toroidal = toroidal)
end

function benchmark_train(som,train, kernel = gaussianKernel)
trainSOM(som, train, 100000, kernelFun = kernel)
end


# Example
# =======

# iris = dataset("datasets", "iris")
# train = iris[:,1:4]

# train = generate_dataset(1000,100,1)
# som = benchmark_init(train, :rectangular, toroidal = false)
# som = benchmark_train(som,train)