diff options
Diffstat (limited to 'plugins/Irc/extlib/phergie/Phergie/Plugin/Tea/db.php')
-rw-r--r-- | plugins/Irc/extlib/phergie/Phergie/Plugin/Tea/db.php | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/plugins/Irc/extlib/phergie/Phergie/Plugin/Tea/db.php b/plugins/Irc/extlib/phergie/Phergie/Plugin/Tea/db.php new file mode 100644 index 000000000..21fe1950d --- /dev/null +++ b/plugins/Irc/extlib/phergie/Phergie/Plugin/Tea/db.php @@ -0,0 +1,49 @@ +<?php + +if (!defined('__DIR__')) { + define('__DIR__', dirname(__FILE__)); +} + +// Create database schema +echo 'Creating database', PHP_EOL; +$file = __DIR__ . '/tea.db'; +if (file_exists($file)) { + unlink($file); +} +$db = new PDO('sqlite:' . $file); +$db->exec('CREATE TABLE tea (name VARCHAR(255), link VARCHAR(255))'); +$db->exec('CREATE UNIQUE INDEX tea_name ON tea (name)'); +$insert = $db->prepare('INSERT INTO tea (name, link) VALUES (:name, :link)'); + +// Get raw teacuppa.com data set +echo 'Downloading teacuppa.com data set', PHP_EOL; +$file = __DIR__ . '/tea-list.html'; +if (!file_exists($file)) { + copy('http://www.teacuppa.com/tea-list.asp', $file); +} +$contents = file_get_contents($file); + +// Extract data from data set +echo 'Processing teacuppa.com data', PHP_EOL; +$contents = tidy_repair_string($contents); +libxml_use_internal_errors(true); +$doc = new DOMDocument; +$doc->loadHTML($contents); +libxml_clear_errors(); +$xpath = new DOMXPath($doc); +$teas = $xpath->query('//p[@class="page_title"]/following-sibling::table//a'); +$db->beginTransaction(); +foreach ($teas as $tea) { + $name = preg_replace( + array('/\s*\v+\s*/', '/\s+tea\s*$/i'), + array(' ', ''), + $tea->textContent + ); + $link = 'http://teacuppa.com/' . $tea->getAttribute('href'); + $insert->execute(array($name, $link)); +} +$db->commit(); + +// Clean up +echo 'Cleaning up', PHP_EOL; +unlink($file); |