Skip to content

Commit ad08210

Browse files
committed
master check in
1 parent e520cf2 commit ad08210

File tree

2 files changed

+98
-0
lines changed

2 files changed

+98
-0
lines changed

src/Analysis/Summarize/Simple.php

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?php
2+
3+
namespace TextAnalysis\Analysis\Summarize;
4+
5+
use TextAnalysis\Models\ScoreKeeper;
6+
7+
/**
8+
* A simple algorithm based off of frequency counts for finding the best
9+
* sentence to summarize the text
10+
* @author yooper
11+
*/
12+
class Simple
13+
{
14+
/**
15+
* Returns each sentenced scored.
16+
* @param array $wordTokens
17+
* @param array $sentenceTokens
18+
* @return array
19+
*/
20+
public function summarize(array $wordTokens, array $sentenceTokens) : array
21+
{
22+
$tokenCounts = array_count_values($wordTokens);
23+
$scoreKeepers = [];
24+
for($index = 0; $index < count($sentenceTokens); $index++)
25+
{
26+
$scoreKeepers[] = new ScoreKeeper($sentenceTokens[$index], $index);
27+
}
28+
29+
foreach($tokenCounts as $token => $freq)
30+
{
31+
foreach($scoreKeepers as $sentenceKeeper)
32+
{
33+
if(strpos($sentenceKeeper->getToken(), $token) !== false) {
34+
35+
$sentenceKeeper->addToScore($freq);
36+
}
37+
}
38+
}
39+
40+
usort($scoreKeepers, 'score_keeper_sort');
41+
return $scoreKeepers;
42+
}
43+
44+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<?php
2+
3+
namespace Tests\TextAnalysis\Analysis\Summarize;
4+
5+
/**
6+
* Test out the simple summary algorithm
7+
* @author yooper
8+
*/
9+
class SimpleTest extends \PHPUnit_Framework_TestCase
10+
{
11+
public function testSimpleWithStopwords()
12+
{
13+
$stopwords = get_stop_words(VENDOR_DIR."yooper/stop-words/data/stop-words_english_1_en.txt");
14+
$stopwords = array_map(function($word){ return " {$word} ";}, $stopwords);
15+
$bestSentences = summary_simple($this->getArticle(), $stopwords);
16+
$this->assertCount(13, $bestSentences);
17+
$this->assertEquals($this->getTopSentence(), $bestSentences[0]);
18+
19+
}
20+
21+
public function testSimpleWithoutStopwords()
22+
{
23+
$bestSentences = summary_simple($this->getArticle());
24+
$this->assertCount(13, $bestSentences);
25+
$this->assertNotEquals($this->getTopSentenceWithoutStopwords(), $bestSentences[0]);
26+
}
27+
28+
public function getArticle() : string
29+
{
30+
return <<<TEXT
31+
According to a Tuesday news release, Houghton County leaders are asking for a slowing of supply donations.
32+
33+
Volunteers and financial donations are still needed, along with dehumidifiers, box fans or large equipment that can be used for excavating, demolition or reconstruction.
34+
35+
"The response to our recent flood disaster has been overwhelming, and the Copper Country cannot be thankful enough for the support that’s been received," said Michael Babcock, the director of marketing and communications at Finlandia University. "However, as of now, volunteers have reached a point where enough normal supplies are on hand. Officials are now asking that the flow of general donations be reduced or stopped with a few exceptions. We know of several additional semi loads coming, but we’re now asking that any additional large deliveries that are planned please be put on hold, unless the items being donated are dehumidifiers, box fans or large equipment that can be used for excavating, demolition or reconstruction."
36+
37+
Volunteers are still needed. The recovery effort is transitioning from initial clean-up to the rehab and reconstruction phase, and additional volunteers are a vital part of that effort.
38+
39+
To donate money, please go to coppercountrystrong.com/donate.
40+
41+
On Friday at 4 p.m. the Flood Relief Supply Distribution at Dee Stadium will be closing. Those in need of supplies are asked to stop by before it closes to get what’s needed for the weekend. Next steps for the distribution center are being evaluated and will be announced as soon as possible.
42+
TEXT;
43+
}
44+
45+
public function getTopSentence()
46+
{
47+
return '"the response to our recent flood disaster has been overwhelming, and the copper country cannot be thankful enough for the support that\'s been received," said michael babcock, the director of marketing and communications at finlandia university.';
48+
}
49+
50+
public function getTopSentenceWithoutStopwords()
51+
{
52+
return 'we know of several additional semi loads coming, but we’re now asking that any additional large deliveries that are planned please be put on hold, unless the items being donated are dehumidifiers, box fans or large equipment that can be used for excavating, demolition or reconstruction."';
53+
}
54+
}

0 commit comments

Comments
 (0)