Skip to content

Commit 4253706

Browse files
committed
Add in name corpus support
1 parent d9077c7 commit 4253706

File tree

4 files changed

+117
-4
lines changed

4 files changed

+117
-4
lines changed

src/Corpus/NameCorpus.php

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
<?php
2+
3+
namespace TextAnalysis\Corpus;
4+
5+
use PDO;
6+
7+
/**
8+
* Opens the US names sqlite database
9+
*
10+
* @author yooper
11+
*/
12+
class NameCorpus extends ReadCorpusAbstract
13+
{
14+
protected $pdo;
15+
16+
public function __construct($dir = null, $lang = 'eng')
17+
{
18+
if(!$dir) {
19+
$dir = get_storage_path('corpora');
20+
}
21+
parent::__construct($dir, $lang);
22+
}
23+
24+
25+
public function getFileNames(): array
26+
{
27+
return ['us_names.sqlite3'];
28+
}
29+
30+
/**
31+
*
32+
* @param string $name
33+
* @return boolean
34+
*/
35+
public function isFirstName($name) : bool
36+
{
37+
return $this->isName('names_by_state_and_year', $name);
38+
}
39+
40+
/**
41+
*
42+
* @param string $name
43+
* @return boolean
44+
*/
45+
public function isLastName($name) : bool
46+
{
47+
return $this->isName('surnames', $name);
48+
}
49+
50+
/**
51+
* Check if the name exists
52+
* @param string $tableName
53+
* @param string $name
54+
* @return boolean
55+
*/
56+
protected function isName($tableName, $name) : bool
57+
{
58+
$stmt = $this->getPdo()->prepare("SELECT name FROM $tableName WHERE name = LOWER(:name) LIMIT 1");
59+
$stmt->bindParam(':name', $name);
60+
$stmt->execute();
61+
return !empty($stmt->fetchColumn());
62+
}
63+
64+
65+
/**
66+
* @return PDO
67+
*/
68+
public function getPdo() : PDO
69+
{
70+
if(empty($this->pdo)) {
71+
$this->pdo = new PDO("sqlite:".$this->getDir().$this->getFileNames()[0]);
72+
}
73+
return $this->pdo;
74+
}
75+
}
76+
77+

src/Corpus/ReadCorpusAbstract.php

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
namespace TextAnalysis\Corpus;
44

5-
65
/**
76
* Abstract class for making corpus readers
87
*/
@@ -29,7 +28,7 @@ abstract class ReadCorpusAbstract
2928
public function __construct($dir, $lang = 'eng')
3029
{
3130
$this->dir = $dir;
32-
$this->lang = $lang;
31+
$this->lang = $lang;
3332
}
3433

3534
/**

src/Downloaders/DownloadPackageFactory.php

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,7 @@ public function verifyChecksum()
5454
if(empty($this->getPackage()->getChecksum())) {
5555
return true;
5656
}
57-
58-
return $this->getPackage()->getChecksum() === md5($this->getDownloadFullPath());
57+
return $this->getPackage()->getChecksum() === md5_file($this->getDownloadFullPath());
5958
}
6059

6160
/**
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<?php
2+
3+
namespace Tests\TextAnalysis\Corpus;
4+
5+
use TextAnalysis\Corpus\NameCorpus;
6+
use Mockery;
7+
use TextAnalysis\Corpus\ImportCorpus;
8+
9+
/**
10+
* Test out the name corpus
11+
*
12+
* @author yooper
13+
*/
14+
class NameCorpusTest extends \PHPUnit_Framework_TestCase
15+
{
16+
public function testFirstNames()
17+
{
18+
if( getenv('SKIP_TEST')) {
19+
return;
20+
}
21+
22+
$corpus = new NameCorpus();
23+
$this->assertTrue($corpus->isFirstName('Dan'));
24+
$this->assertFalse($corpus->isFirstName('very'));
25+
26+
}
27+
28+
public function testLastNames()
29+
{
30+
if( getenv('SKIP_TEST')) {
31+
return;
32+
}
33+
34+
$corpus = new NameCorpus();
35+
$this->assertTrue($corpus->isLastName('Williamson'));
36+
$this->assertFalse($corpus->isLastName('baggins'));
37+
}
38+
}

0 commit comments

Comments
 (0)