From 17c50f338ceb574780476f6b788f48e2d7d06017 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 18 Mar 2010 20:52:00 -0500 Subject: Remove hkit and do our own hcard parsing Parsing hcards for the data we need wasn't hard enough to justify using hkit. It was dependent on a number of external systems (something to run tidy), and only could handle XHTML. We now parse HTML with the PHP dom libraries used elsewhere, and scrape out our own hcards. Seems to work nicer and faster and most of all works with Google Buzz profile URLs. --- plugins/OStatus/extlib/hkit/hcard.profile.php | 105 ------ plugins/OStatus/extlib/hkit/hkit.class.php | 475 -------------------------- 2 files changed, 580 deletions(-) delete mode 100644 plugins/OStatus/extlib/hkit/hcard.profile.php delete mode 100644 plugins/OStatus/extlib/hkit/hkit.class.php (limited to 'plugins/OStatus/extlib') diff --git a/plugins/OStatus/extlib/hkit/hcard.profile.php b/plugins/OStatus/extlib/hkit/hcard.profile.php deleted file mode 100644 index 6ec0dc890..000000000 --- a/plugins/OStatus/extlib/hkit/hcard.profile.php +++ /dev/null @@ -1,105 +0,0 @@ -root_class = 'vcard'; - - $this->classes = array( - 'fn', array('honorific-prefix', 'given-name', 'additional-name', 'family-name', 'honorific-suffix'), - 'n', array('honorific-prefix', 'given-name', 'additional-name', 'family-name', 'honorific-suffix'), - 'adr', array('post-office-box', 'extended-address', 'street-address', 'postal-code', 'country-name', 'type', 'region', 'locality'), - 'label', 'bday', 'agent', 'nickname', 'photo', 'class', - 'email', array('type', 'value'), - 'category', 'key', 'logo', 'mailer', 'note', - 'org', array('organization-name', 'organization-unit'), - 'tel', array('type', 'value'), - 'geo', array('latitude', 'longitude'), - 'tz', 'uid', 'url', 'rev', 'role', 'sort-string', 'sound', 'title' - ); - - // classes that must only appear once per card - $this->singles = array( - 'fn' - ); - - // classes that are required (not strictly enforced - give at least one!) - $this->required = array( - 'fn' - ); - - $this->att_map = array( - 'fn' => array('IMG|alt'), - 'url' => array('A|href', 'IMG|src', 'AREA|href'), - 'photo' => array('IMG|src'), - 'bday' => array('ABBR|title'), - 'logo' => array('IMG|src'), - 'email' => array('A|href'), - 'geo' => array('ABBR|title') - ); - - - $this->callbacks = array( - 'url' => array($this, 'resolvePath'), - 'photo' => array($this, 'resolvePath'), - 'logo' => array($this, 'resolvePath'), - 'email' => array($this, 'resolveEmail') - ); - - - - function hKit_hcard_post($a) - { - - foreach ($a as &$vcard){ - - hKit_implied_n_optimization($vcard); - hKit_implied_n_from_fn($vcard); - - } - - return $a; - - } - - - function hKit_implied_n_optimization(&$vcard) - { - if (array_key_exists('fn', $vcard) && !is_array($vcard['fn']) && - !array_key_exists('n', $vcard) && (!array_key_exists('org', $vcard) || $vcard['fn'] != $vcard['org'])){ - - if (sizeof(explode(' ', $vcard['fn'])) == 2){ - $patterns = array(); - $patterns[] = array('/^(\S+),\s*(\S{1})$/', 2, 1); // Lastname, Initial - $patterns[] = array('/^(\S+)\s*(\S{1})\.*$/', 2, 1); // Lastname Initial(.) - $patterns[] = array('/^(\S+),\s*(\S+)$/', 2, 1); // Lastname, Firstname - $patterns[] = array('/^(\S+)\s*(\S+)$/', 1, 2); // Firstname Lastname - - foreach ($patterns as $pattern){ - if (preg_match($pattern[0], $vcard['fn'], $matches) === 1){ - $n = array(); - $n['given-name'] = $matches[$pattern[1]]; - $n['family-name'] = $matches[$pattern[2]]; - $vcard['n'] = $n; - - - break; - } - } - } - } - } - - - function hKit_implied_n_from_fn(&$vcard) - { - if (array_key_exists('fn', $vcard) && is_array($vcard['fn']) - && !array_key_exists('n', $vcard) && (!array_key_exists('org', $vcard) || $vcard['fn'] != $vcard['org'])){ - - $vcard['n'] = $vcard['fn']; - } - - if (array_key_exists('fn', $vcard) && is_array($vcard['fn'])){ - $vcard['fn'] = $vcard['fn']['text']; - } - } - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/hkit/hkit.class.php b/plugins/OStatus/extlib/hkit/hkit.class.php deleted file mode 100644 index c3a54cff6..000000000 --- a/plugins/OStatus/extlib/hkit/hkit.class.php +++ /dev/null @@ -1,475 +0,0 @@ -' . implode(', ', $missing) . ''); - - } - - - public function getByURL($profile='', $url='') - { - - if ($profile=='' || $url == '') return false; - - $this->loadProfile($profile); - - $source = $this->loadURL($url); - - if ($source){ - $tidy_xhtml = $this->tidyThis($source); - - $fragment = false; - - if (strrchr($url, '#')) - $fragment = array_pop(explode('#', $url)); - - $doc = $this->loadDoc($tidy_xhtml, $fragment); - $s = $this->processNodes($doc, $this->classes); - $s = $this->postProcess($profile, $s); - - return $s; - }else{ - return false; - } - } - - public function getByString($profile='', $input_xml='') - { - if ($profile=='' || $input_xml == '') return false; - - $this->loadProfile($profile); - - $doc = $this->loadDoc($input_xml); - $s = $this->processNodes($doc, $this->classes); - $s = $this->postProcess($profile, $s); - - return $s; - - } - - private function processNodes($items, $classes, $allow_includes=true){ - - $out = array(); - - foreach($items as $item){ - $data = array(); - - for ($i=0; $ixpath($xpath); - - if ($results){ - foreach ($results as $result){ - if (isset($classes[$i+1]) && is_array($classes[$i+1])){ - $nodes = $this->processNodes($results, $classes[$i+1]); - if (sizeof($nodes) > 0){ - $nodes = array_merge(array('text'=>$this->getNodeValue($result, $classes[$i])), $nodes); - $data[$classes[$i]] = $nodes; - }else{ - $data[$classes[$i]] = $this->getNodeValue($result, $classes[$i]); - } - - }else{ - if (isset($data[$classes[$i]])){ - if (is_array($data[$classes[$i]])){ - // is already an array - append - $data[$classes[$i]][] = $this->getNodeValue($result, $classes[$i]); - - }else{ - // make it an array - if ($classes[$i] == 'value'){ // unless it's the 'value' of a type/value pattern - $data[$classes[$i]] .= $this->getNodeValue($result, $classes[$i]); - }else{ - $old_val = $data[$classes[$i]]; - $data[$classes[$i]] = array($old_val, $this->getNodeValue($result, $classes[$i])); - $old_val = false; - } - } - }else{ - // set as normal value - $data[$classes[$i]] = $this->getNodeValue($result, $classes[$i]); - - } - } - - // td@headers pattern - if (strtoupper(dom_import_simplexml($result)->tagName)== "TD" && $result['headers']){ - $include_ids = explode(' ', $result['headers']); - $doc = $this->doc; - foreach ($include_ids as $id){ - $xpath = "//*[@id='$id']/.."; - $includes = $doc->xpath($xpath); - foreach ($includes as $include){ - $tmp = $this->processNodes($include, $this->classes); - if (is_array($tmp)) $data = array_merge($data, $tmp); - } - } - } - } - } - } - $result = false; - } - - // include-pattern - if ($allow_includes){ - $xpath = ".//*[contains(concat(' ',normalize-space(@class),' '),' include ')]"; - $results = $item->xpath($xpath); - - if ($results){ - foreach ($results as $result){ - $tagName = strtoupper(dom_import_simplexml($result)->tagName); - if ((($tagName == "OBJECT" && $result['data']) || ($tagName == "A" && $result['href'])) - && preg_match('/\binclude\b/', $result['class'])){ - $att = ($tagName == "OBJECT" ? 'data' : 'href'); - $id = str_replace('#', '', $result[$att]); - $doc = $this->doc; - $xpath = "//*[@id='$id']"; - $includes = $doc->xpath($xpath); - foreach ($includes as $include){ - $include = simplexml_load_string(''.$include->asXML().''); // don't ask. - $tmp = $this->processNodes($include, $this->classes, false); - if (is_array($tmp)) $data = array_merge($data, $tmp); - } - } - } - } - } - $out[] = $data; - } - - if (sizeof($out) > 1){ - return $out; - }else if (isset($data)){ - return $data; - }else{ - return array(); - } - } - - - private function getNodeValue($node, $className) - { - - $tag_name = strtoupper(dom_import_simplexml($node)->tagName); - $s = false; - - // ignore DEL tags - if ($tag_name == 'DEL') return $s; - - // look up att map values - if (array_key_exists($className, $this->att_map)){ - - foreach ($this->att_map[$className] as $map){ - if (preg_match("/$tag_name\|/", $map)){ - $s = ''.$node[array_pop($foo = explode('|', $map))]; - } - } - } - - // if nothing and OBJ, try data. - if (!$s && $tag_name=='OBJECT' && $node['data']) $s = ''.$node['data']; - - // if nothing and IMG, try alt. - if (!$s && $tag_name=='IMG' && $node['alt']) $s = ''.$node['alt']; - - // if nothing and AREA, try alt. - if (!$s && $tag_name=='AREA' && $node['alt']) $s = ''.$node['alt']; - - //if nothing and not A, try title. - if (!$s && $tag_name!='A' && $node['title']) $s = ''.$node['title']; - - - // if nothing found, go with node text - $s = ($s ? $s : implode(array_filter($node->xpath('child::node()'), array(&$this, "filterBlankValues")), ' ')); - - // callbacks - if (array_key_exists($className, $this->callbacks)){ - $s = preg_replace_callback('/.*/', $this->callbacks[$className], $s, 1); - } - - // trim and remove line breaks - if ($tag_name != 'PRE'){ - $s = trim(preg_replace('/[\r\n\t]+/', '', $s)); - $s = trim(preg_replace('/(\s{2})+/', ' ', $s)); - } - - return $s; - } - - private function filterBlankValues($s){ - return preg_match("/\w+/", $s); - } - - - private function tidyThis($source) - { - switch ( $this->tidy_mode ) - { - case 'exec': - $tmp_file = $this->tmp_dir.md5($source).'.txt'; - file_put_contents($tmp_file, $source); - exec("tidy -utf8 -indent -asxhtml -numeric -bare -quiet $tmp_file", $tidy); - unlink($tmp_file); - return implode("\n", $tidy); - break; - - case 'php': - $tidy = tidy_parse_string($source); - return tidy_clean_repair($tidy); - break; - - default: - return $source; - break; - } - - } - - - private function loadProfile($profile) - { - require_once("$profile.profile.php"); - } - - - private function loadDoc($input_xml, $fragment=false) - { - $xml = simplexml_load_string($input_xml); - - $this->doc = $xml; - - if ($fragment){ - $doc = $xml->xpath("//*[@id='$fragment']"); - $xml = simplexml_load_string($doc[0]->asXML()); - $doc = null; - } - - // base tag - if ($xml->head->base['href']) $this->base = $xml->head->base['href']; - - // xml:base attribute - PITA with SimpleXML - preg_match('/xml:base="(.*)"/', $xml->asXML(), $matches); - if (is_array($matches) && sizeof($matches)>1) $this->base = $matches[1]; - - return $xml->xpath("//*[contains(concat(' ',normalize-space(@class),' '),' $this->root_class ')]"); - - } - - - private function loadURL($url) - { - $this->url = $url; - - if ($this->tidy_mode == 'proxy' && $this->tidy_proxy != ''){ - $url = $this->tidy_proxy . $url; - } - - return @file_get_contents($url); - - } - - - private function postProcess($profile, $s) - { - $required = $this->required; - - if (is_array($s) && array_key_exists($required[0], $s)){ - $s = array($s); - } - - $s = $this->dedupeSingles($s); - - if (function_exists('hKit_'.$profile.'_post')){ - $s = call_user_func('hKit_'.$profile.'_post', $s); - } - - $s = $this->removeTextVals($s); - - return $s; - } - - - private function resolvePath($filepath) - { // ugly code ahoy: needs a serious tidy up - - $filepath = $filepath[0]; - - $base = $this->base; - $url = $this->url; - - if ($base != '' && strpos($base, '://') !== false) - $url = $base; - - $r = parse_url($url); - $domain = $r['scheme'] . '://' . $r['host']; - - if (!isset($r['path'])) $r['path'] = '/'; - $path = explode('/', $r['path']); - $file = explode('/', $filepath); - $new = array(''); - - if (strpos($filepath, '://') !== false || strpos($filepath, 'data:') !== false){ - return $filepath; - } - - if ($file[0] == ''){ - // absolute path - return ''.$domain . implode('/', $file); - }else{ - // relative path - if ($path[sizeof($path)-1] == '') array_pop($path); - if (strpos($path[sizeof($path)-1], '.') !== false) array_pop($path); - - foreach ($file as $segment){ - if ($segment == '..'){ - array_pop($path); - }else{ - $new[] = $segment; - } - } - return ''.$domain . implode('/', $path) . implode('/', $new); - } - } - - private function resolveEmail($v) - { - $parts = parse_url($v[0]); - return ($parts['path']); - } - - - private function dedupeSingles($s) - { - $singles = $this->singles; - - foreach ($s as &$item){ - foreach ($singles as $classname){ - if (array_key_exists($classname, $item) && is_array($item[$classname])){ - if (isset($item[$classname][0])) $item[$classname] = $item[$classname][0]; - } - } - } - - return $s; - } - - private function removeTextVals($s) - { - foreach ($s as $key => &$val){ - if ($key){ - $k = $key; - }else{ - $k = ''; - } - - if (is_array($val)){ - $val = $this->removeTextVals($val); - }else{ - if ($k == 'text'){ - $val = ''; - } - } - } - - return array_filter($s); - } - - } - - -?> \ No newline at end of file -- cgit v1.2.3-54-g00ecf