Skip to content

Commit 42b54c1

Browse files
committed
Merge pull request #146 from tomerk/dag-construction-v2
Dag construction v2
2 parents a8cbf14 + 882811d commit 42b54c1

File tree

76 files changed

+990
-258
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+990
-258
lines changed

src/main/resources/log4j.properties

+1
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
66

77
# Only pay attention to INFO messages from Keystone.
88
log4j.logger.pipelines=INFO
9+
log4j.logger.workflow=INFO
910
log4j.logger.nodes=INFO
1011
log4j.logger.utils=INFO

src/main/scala/loaders/VOCLoader.scala

+2-14
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,9 @@
11
package loaders
22

3-
import java.awt.image.BufferedImage
4-
import java.io.{ByteArrayInputStream, InputStream}
5-
import java.net.URI
6-
import java.util.zip.GZIPInputStream
7-
import javax.imageio.ImageIO
8-
9-
import org.apache.commons.compress.archivers.ArchiveStreamFactory
10-
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
11-
import org.apache.hadoop.conf.Configuration
12-
import org.apache.hadoop.fs.{FileSystem, Path}
133
import org.apache.spark.SparkContext
144
import org.apache.spark.rdd.RDD
15-
import pipelines.{Logging, Transformer}
16-
import utils.{MultiLabeledImage, Image, ImageConversions}
17-
18-
import scala.collection.mutable.ArrayBuffer
5+
import pipelines.Logging
6+
import utils.MultiLabeledImage
197

208

219
case class VOCDataPath(imagesDirName: String, namePrefix: String, numParts: Option[Int])

src/main/scala/nodes/images/Convolver.scala

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import org.apache.spark.SparkContext
66
import org.apache.spark.rdd.RDD
77
import pipelines._
88
import utils.{ChannelMajorArrayVectorizedImage, ImageMetadata, _}
9+
import workflow.Transformer
910

1011
/**
1112
* Convolves images with a bank of convolution filters. Convolution filters must be square.

src/main/scala/nodes/images/DaisyExtractor.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package nodes.images
33
import breeze.linalg._
44
import breeze.numerics._
55

6-
import pipelines.Transformer
6+
import workflow.Transformer
77
import utils.Image
88
import utils.ImageUtils
99

src/main/scala/nodes/images/FisherVector.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package nodes.images
22

33
import breeze.linalg.DenseMatrix
4-
import pipelines.Transformer
4+
import workflow.Transformer
55

66
/**
77
* Abstract interface for Fisher Vector.

src/main/scala/nodes/images/GrayScaler.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package nodes.images
22

3-
import pipelines.Transformer
3+
import workflow.Transformer
44
import utils.{ImageUtils, Image}
55

66
/**

src/main/scala/nodes/images/HogExtractor.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ package nodes.images
1818

1919
import breeze.linalg._
2020

21-
import pipelines.Transformer
21+
import workflow.Transformer
2222
import utils.ChannelMajorArrayVectorizedImage
2323
import utils.Image
2424
import utils.ImageUtils

src/main/scala/nodes/images/ImageVectorizer.scala

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import breeze.linalg.DenseVector
44
import org.apache.spark.rdd.RDD
55
import pipelines._
66
import utils.Image
7+
import workflow.Transformer
78

89
/**
910
* Takes an image and converts it to a dense vector.

src/main/scala/nodes/images/LCSExtractor.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package nodes.images
22

33
import breeze.linalg._
44

5-
import pipelines.Transformer
5+
import workflow.Transformer
66
import utils.ChannelMajorArrayVectorizedImage
77
import utils.Image
88
import utils.ImageUtils

src/main/scala/nodes/images/LabeledImageExtractors.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package nodes.images
22

3-
import pipelines._
43
import utils.{MultiLabeledImage, Image, LabeledImage}
4+
import workflow.Transformer
55

66
/**
77
* Extracts a label from a labeled image.

src/main/scala/nodes/images/PixelScaler.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package nodes.images
22

3-
import pipelines.Transformer
3+
import workflow.Transformer
44
import utils.{ImageUtils, Image}
55

66

src/main/scala/nodes/images/Pooler.scala

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package nodes.images
33
import breeze.linalg.DenseVector
44
import pipelines._
55
import utils.{ImageMetadata, ChannelMajorArrayVectorizedImage, Image}
6+
import workflow.Transformer
67

78
/**
89
* This node takes an image and performs pooling on regions of the image.

src/main/scala/nodes/images/SIFTExtractor.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package nodes.images
22

33
import breeze.linalg.DenseMatrix
4-
import pipelines.Transformer
4+
import workflow.Transformer
55
import utils.Image
66

77
/**

src/main/scala/nodes/images/SymmetricRectifier.scala

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package nodes.images
22

33
import pipelines._
44
import utils.{ChannelMajorArrayVectorizedImage, Image}
5+
import workflow.Transformer
56

67
case class SymmetricRectifier(maxVal: Double = 0.0, alpha: Double = 0.0)
78
extends Transformer[Image, Image] {

src/main/scala/nodes/images/Windower.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package nodes.images
22

33
import breeze.linalg.DenseVector
44
import org.apache.spark.rdd.RDD
5-
import pipelines.{FunctionNode, Transformer}
5+
import pipelines.FunctionNode
66
import utils.{ImageMetadata, ChannelMajorArrayVectorizedImage, Image}
77

88

src/main/scala/nodes/learning/BlockLinearMapper.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ import edu.berkeley.cs.amplab.mlmatrix.{RowPartition, NormalEquations, BlockCoor
55
import nodes.stats.{StandardScalerModel, StandardScaler}
66
import org.apache.spark.rdd.RDD
77
import nodes.util.{VectorSplitter, Identity}
8-
import pipelines.{Transformer, LabelEstimator}
98
import utils.{MatrixUtils, Stats}
9+
import workflow.{Transformer, LabelEstimator}
1010

1111

1212
/**

src/main/scala/nodes/learning/BlockWeightedLeastSquares.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package nodes.learning
22

33
import nodes.util.VectorSplitter
4+
import workflow.LabelEstimator
45

56
import scala.collection.mutable.ArrayBuffer
67

@@ -16,7 +17,7 @@ import edu.berkeley.cs.amplab.mlmatrix.{RowPartition, NormalEquations, BlockCoor
1617
import edu.berkeley.cs.amplab.mlmatrix.util.{Utils => MLMatrixUtils}
1718

1819
import nodes.stats.StandardScaler
19-
import pipelines.{Transformer, LabelEstimator, Logging}
20+
import pipelines.Logging
2021
import utils.{MatrixUtils, Stats}
2122

2223
// Utility class that holds statistics related to each block we solve

src/main/scala/nodes/learning/GaussianMixtureModel.scala

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import org.apache.spark.rdd.RDD
77
import pipelines._
88
import utils.MatrixUtils
99
import utils.external.EncEval
10+
import workflow.{Transformer, Estimator}
1011

1112
/**
1213
* A Mixture of Gaussians, usually computed via some clustering process.

src/main/scala/nodes/learning/LinearDiscriminantAnalysis.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ package nodes.learning
33
import breeze.linalg._
44
import breeze.stats._
55
import org.apache.spark.rdd.RDD
6-
import pipelines.LabelEstimator
76
import utils.MatrixUtils
7+
import workflow.LabelEstimator
88

99
/**
1010
* An Estimator that fits Linear Discriminant Analysis (currently not calculated in a distributed fashion),

src/main/scala/nodes/learning/LinearMapper.scala

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,10 @@ package nodes.learning
22

33
import breeze.linalg._
44
import edu.berkeley.cs.amplab.mlmatrix.{NormalEquations, RowPartitionedMatrix}
5-
import nodes.stats.{StandardScalerModel, StandardScaler}
5+
import nodes.stats.{StandardScaler, StandardScalerModel}
66
import org.apache.spark.rdd.RDD
7-
8-
import pipelines.{LabelEstimator, Transformer}
97
import utils.MatrixUtils
8+
import workflow.{LabelEstimator, Transformer}
109

1110
/**
1211
* Computes A * x + b i.e. a linear map of data using a trained model.

src/main/scala/nodes/learning/NaiveBayesModel.scala

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
package nodes.learning
22

3-
import breeze.linalg.{argmax, DenseMatrix, DenseVector, Vector}
3+
import breeze.linalg.{DenseMatrix, DenseVector, Vector}
44
import org.apache.spark.mllib.classification.NaiveBayes
55
import org.apache.spark.mllib.regression.LabeledPoint
66
import org.apache.spark.rdd.RDD
7-
import pipelines.LabelEstimator
8-
import pipelines.Transformer
97
import utils.MLlibUtils.breezeVectorToMLlib
8+
import workflow.{Transformer, LabelEstimator}
109

1110
import scala.reflect.ClassTag
1211

src/main/scala/nodes/learning/PCA.scala

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import com.github.fommil.netlib.LAPACK.{getInstance => lapack}
77
import org.apache.spark.rdd.RDD
88
import org.netlib.util.intW
99
import pipelines._
10+
import workflow.{Transformer, Estimator}
1011

1112

1213
/**

src/main/scala/nodes/learning/ZCAWhitener.scala

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import com.github.fommil.netlib.LAPACK.{getInstance => lapack}
77
import org.apache.spark.rdd.RDD
88
import org.netlib.util.intW
99
import pipelines._
10+
import workflow.{Transformer, Estimator}
1011

1112
class ZCAWhitener(val whitener: DenseMatrix[Double], val means: DenseVector[Double])
1213
extends Transformer[DenseMatrix[Double],DenseMatrix[Double]] {

src/main/scala/nodes/nlp/CoreNLPFeatureExtractor.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package nodes.nlp
33
import edu.arizona.sista.processors.Processor
44
import edu.arizona.sista.processors.fastnlp.FastNLPProcessor
55
import org.apache.spark.rdd.RDD
6-
import pipelines.Transformer
6+
import workflow.Transformer
77

88
/**
99
* Transformer that uses CoreNLP to (in order):

src/main/scala/nodes/nlp/StringUtils.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package nodes.nlp
33
import java.util.Locale
44

55
import org.apache.spark.rdd.RDD
6-
import pipelines.Transformer
6+
import workflow.Transformer
77

88
/**
99
* Transformer that tokenizes a String into a Seq[String] by splitting on a regular expression.

src/main/scala/nodes/nlp/StupidBackoff.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
package nodes.nlp
22

3-
import pipelines.{Transformer, Estimator}
4-
53
import org.apache.spark.Partitioner
64
import org.apache.spark.rdd.RDD
75

86
import java.util.{HashMap => JHashMap}
97

8+
import workflow.{Transformer, Estimator}
9+
1010
import scala.annotation.tailrec
1111
import scala.collection.JavaConverters._
1212
import scala.reflect.ClassTag

src/main/scala/nodes/nlp/WordFrequencyEncoder.scala

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
package nodes.nlp
22

33
import org.apache.spark.broadcast.Broadcast
4-
import pipelines.{Estimator, Transformer}
5-
64
import org.apache.spark.rdd.RDD
5+
import workflow.{Estimator, Transformer}
76

87
object WordFrequencyEncoder extends Estimator[Seq[String], Seq[Int]] {
98
private[this] def makeUnigrams(data: RDD[Seq[String]]) =

src/main/scala/nodes/nlp/ngrams.scala

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
package nodes.nlp
22

3-
import pipelines.{FunctionNode, Transformer}
3+
import pipelines.FunctionNode
44

55
import org.apache.spark.rdd.RDD
66

77
import java.util.{HashMap => JHashMap}
88

9+
import workflow.Transformer
10+
911
import scala.collection.JavaConverters._
1012
import scala.collection.mutable.ArrayBuffer
1113
import scala.reflect.ClassTag

src/main/scala/nodes/stats/CosineRandomFeatures.scala

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import breeze.stats.distributions.Rand
66
import org.apache.spark.rdd.RDD
77
import pipelines._
88
import utils.MatrixUtils
9+
import workflow.Transformer
910

1011
/**
1112
* Transformer that extracts random cosine features from a feature vector

src/main/scala/nodes/stats/LinearRectifier.scala

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package nodes.stats
22

33
import breeze.linalg.DenseVector
44
import pipelines._
5+
import workflow.Transformer
56

67
/**
78
* This transformer applies a Linear Rectifier,

src/main/scala/nodes/stats/NormalizeRows.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package nodes.stats
22

33
import breeze.linalg.{max, sum, DenseVector}
44
import breeze.numerics._
5-
import pipelines.Transformer
5+
import workflow.Transformer
66

77
/**
88
* Divides each row by the max of its two-norm and 2.2e-16.

src/main/scala/nodes/stats/PaddedFFT.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@ package nodes.stats
22

33
import breeze.linalg.DenseVector
44
import breeze.math.Complex
5-
import pipelines.Transformer
5+
import workflow.Transformer
66

77
/**
88
* This transformer pads input vectors to the nearest power of two,
99
* then returns the real values of the first half of the fourier transform on the padded vectors.
1010
*
1111
* Goes from vectors of size n to vectors of size nextPositivePowerOfTwo(n)/2
1212
*/
13-
object PaddedFFT extends Transformer[DenseVector[Double], DenseVector[Double]] {
13+
case class PaddedFFT() extends Transformer[DenseVector[Double], DenseVector[Double]] {
1414
override def apply(in: DenseVector[Double]): DenseVector[Double] = {
1515
val paddedSize = nextPositivePowerOfTwo(in.length)
1616
val fft: DenseVector[Complex] = breeze.signal.fourierTr(in.padTo(paddedSize, 0.0).toDenseVector)

src/main/scala/nodes/stats/RandomSignNode.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package nodes.stats
22

33
import breeze.linalg._
44
import breeze.stats.distributions._
5-
import pipelines.Transformer
5+
import workflow.Transformer
66

77
/**
88
* A node that takes in DenseVector[Double] and randomly flips

src/main/scala/nodes/stats/Sampling.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package nodes.stats
22

33
import breeze.linalg.{DenseVector, DenseMatrix}
44
import org.apache.spark.rdd.RDD
5-
import pipelines.{FunctionNode, Transformer}
5+
import pipelines.FunctionNode
66

77
/**
88
* Given a collection of Dense Matrices, this will generate a sample of `numSamples` columns from the entire set.

src/main/scala/nodes/stats/SignedHellingerMapper.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package nodes.stats
22

33
import breeze.linalg.{DenseVector, DenseMatrix}
44
import breeze.numerics._
5-
import pipelines.Transformer
5+
import workflow.Transformer
66

77
/**
88
* Apply power normalization: z <- sign(z)|z|^{\rho}

src/main/scala/nodes/stats/StandardScaler.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ import breeze.linalg.DenseVector
44
import breeze.numerics.sqrt
55
import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
66
import org.apache.spark.rdd.RDD
7-
import pipelines.{Estimator, Transformer}
87
import utils.MLlibUtils
8+
import workflow.{Transformer, Estimator}
99

1010
/**
1111
* Represents a StandardScaler model that can transform dense vectors.

src/main/scala/nodes/stats/TermFrequency.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package nodes.stats
22

3-
import pipelines.Transformer
3+
import workflow.Transformer
44

55
/**
66
* Transformer that maps a Seq[Any] of objects to a Seq[(Any, Double)] of (unique object, weighting_scheme(tf)),

src/main/scala/nodes/util/AllSparseFeatures.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package nodes.util
22

33
import breeze.linalg.SparseVector
44
import org.apache.spark.rdd.RDD
5-
import pipelines.Estimator
5+
import workflow.Estimator
66

77
import scala.reflect.ClassTag
88

src/main/scala/nodes/util/Cacher.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
package nodes.util
22

33
import org.apache.spark.rdd.RDD
4-
import pipelines.{Logging, Transformer}
4+
import pipelines.Logging
5+
import workflow.Transformer
56

67
import scala.reflect.ClassTag
78

0 commit comments

Comments
 (0)