diff --git a/src/FPGrowth.php b/src/FPGrowth.php index c832aa2..5446acf 100644 --- a/src/FPGrowth.php +++ b/src/FPGrowth.php @@ -10,8 +10,10 @@ class FPGrowth { protected int $support = 3; protected float $confidence = 0.7; - + private int $maxLength = 0; + private $itemsetSeparator; private $patterns; + private $rules; /** @@ -50,6 +52,15 @@ public function setConfidence(float $confidence): self return $this; } + public function getMaxLength(): int + { + return $this->maxLength; + } + + public function setMaxLength(int $maxLength): void + { + $this->maxLength = $maxLength; + } /** * @return mixed */ @@ -71,10 +82,12 @@ public function getRules() * @param int $support 1, 2, 3 ... * @param float $confidence 0 ... 1 */ - public function __construct(int $support, float $confidence) + public function __construct(int $support, float $confidence, int $maxLength = 0, string $itemsetSeparator = "\0") { $this->setSupport($support); $this->setConfidence($confidence); + $this->setMaxLength($maxLength); + $this->itemsetSeparator = $itemsetSeparator; } /** @@ -93,7 +106,7 @@ public function run(array $transactions) */ protected function findFrequentPatterns(array $transactions): array { - $tree = new FPTree($transactions, $this->support, null, 0); + $tree = new FPTree($transactions, $this->support, null, 0, $this->maxLength, $this->itemsetSeparator); return $tree->minePatterns($this->support); } @@ -105,16 +118,16 @@ protected function generateAssociationRules(array $patterns): array { $rules = []; foreach (array_keys($patterns) as $pattern) { - $itemSet = explode(',', $pattern); + $itemSet = explode($this->itemsetSeparator, $pattern); $upperSupport = $patterns[$pattern]; for ($i = 1; $i < count($itemSet); $i++) { $combinations = new Combinations($itemSet, $i); foreach ($combinations->generator() as $antecedent) { sort($antecedent); - $antecedentStr = implode(',', $antecedent); + $antecedentStr = implode($this->itemsetSeparator, $antecedent); $consequent = array_diff($itemSet, $antecedent); sort($consequent); - $consequentStr = implode(',', $consequent); + $consequentStr = implode($this->itemsetSeparator, $consequent); if (isset($patterns[$antecedentStr])) { $lowerSupport = $patterns[$antecedentStr]; $confidence = floatval($upperSupport) / $lowerSupport; diff --git a/src/FPTree.php b/src/FPTree.php index dfa04f5..3763818 100644 --- a/src/FPTree.php +++ b/src/FPTree.php @@ -16,6 +16,12 @@ class FPTree private FPNode $root; + private int $maxLength = 0; + + private string $itemsetSeparator; + + private int $depth = 0; + /** * Initialize the tree. * @param array $transactions @@ -23,11 +29,13 @@ class FPTree * @param $rootValue * @param int $rootCount */ - public function __construct(array $transactions, int $threshold, $rootValue, int $rootCount) + public function __construct(array $transactions, int $threshold, $rootValue, int $rootCount, $maxLength = 0, string $itemsetSeparator = "\0") { $this->frequent = $this->findFrequentItems($transactions, $threshold); $this->headers = $this->buildHeaderTable(); $this->root = $this->buildFPTree($transactions, $rootValue, $rootCount, $this->frequent); + $this->maxLength = $maxLength; + $this->itemsetSeparator = $itemsetSeparator; } /** @@ -168,6 +176,8 @@ public function minePatterns(int $threshold): array { if ($this->treeHasSinglePath($this->root)) { return $this->generatePatternList(); + } elseif ($this->maxLength && $this->maxLength <= $this->getDepth()) { + return []; } return $this->zipPatterns($this->mineSubTrees($threshold)); @@ -188,10 +198,10 @@ protected function zipPatterns(array $patterns): array // We are in a conditional tree. $newPatterns = []; foreach (array_keys($patterns) as $strKey) { - $key = explode(',', $strKey); + $key = explode($this->itemsetSeparator, $strKey); $key[] = $this->root->value; sort($key); - $newPatterns[implode(',', $key)] = $patterns[$strKey]; + $newPatterns[implode($this->itemsetSeparator, $key)] = $patterns[$strKey]; } return $newPatterns; @@ -211,7 +221,13 @@ protected function generatePatternList(): array $patterns[$this->root->value] = $this->root->count; } - for ($i = 1; $i <= count($items); $i++) { + // limit length of combinations to remaining length + $count = count($items); + if ($this->maxLength) { + $count = min($count, $this->maxLength - $this->getDepth()); + } + + for ($i = 1; $i <= $count; $i++) { $combinations = new Combinations($items,$i); foreach ($combinations->generator() as $subset) { $pattern = $this->root->value !== null ? array_merge($subset, [$this->root->value]) : $subset; @@ -223,7 +239,7 @@ protected function generatePatternList(): array $min = $this->frequent[$x]; } } - $patterns[implode(',', $pattern)] = $min; + $patterns[implode($this->itemsetSeparator, $pattern)] = $min; } } @@ -270,7 +286,8 @@ protected function mineSubTrees(int $threshold): array } // Now we have the input for a subtree, so construct it and grab the patterns. - $subtree = new FPTree($conditionalTreeInput, $threshold, $item, $this->frequent[$item]); + $subtree = new FPTree($conditionalTreeInput, $threshold, $item, $this->frequent[$item], $this->maxLength); + $subtree->depth = $this->depth + 1; $subtreePatterns = $subtree->minePatterns($threshold); // Insert subtree patterns into main patterns dictionary. @@ -285,4 +302,9 @@ protected function mineSubTrees(int $threshold): array return $patterns; } + + private function getDepth(): int + { + return $this->depth; + } }