From 6911e1c7972c3adec53d0fe04ebdd7da0fbd8b12 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 4 Jan 2010 11:55:27 -0800 Subject: Ticket 2141: bugs with weighted popularity lists across year boundary. Consolidated several separate implementations of the same weighting algorithm into common_sql_weight() and fixed some bugs... For MySQL, now using timestampdiff() instead of subtraction for the comparison, so we get sane results when the year doesn't match, and utc_timestamp() rather than now() so we don't get negative ages for recent items with local server timezone. Unknown whether the same problems affect PostgreSQL, but note that it lacks the timestampdiff() SQL function. --- actions/favorited.php | 8 ++------ actions/publictagcloud.php | 11 ++++------- lib/grouptagcloudsection.php | 7 +------ lib/personaltagcloudsection.php | 11 +++-------- lib/popularnoticesection.php | 6 +++--- lib/util.php | 20 ++++++++++++++++++++ 6 files changed, 33 insertions(+), 30 deletions(-) diff --git a/actions/favorited.php b/actions/favorited.php index 150b67b0b..9ffa5b844 100644 --- a/actions/favorited.php +++ b/actions/favorited.php @@ -185,11 +185,7 @@ class FavoritedAction extends Action function showContent() { - if (common_config('db', 'type') == 'pgsql') { - $weightexpr='sum(exp(-extract(epoch from (now() - fave.modified)) / %s))'; - } else { - $weightexpr='sum(exp(-(now() - fave.modified) / %s))'; - } + $weightexpr = common_sql_weight('fave.modified', common_config('popular', 'dropoff')); $qry = 'SELECT notice.*, '. $weightexpr . ' as weight ' . @@ -207,7 +203,7 @@ class FavoritedAction extends Action } $notice = Memcached_DataObject::cachedQuery('Notice', - sprintf($qry, common_config('popular', 'dropoff')), + $qry, 600); $nl = new NoticeList($notice, $this); diff --git a/actions/publictagcloud.php b/actions/publictagcloud.php index 5c7074029..b5b474f13 100644 --- a/actions/publictagcloud.php +++ b/actions/publictagcloud.php @@ -105,12 +105,8 @@ class PublictagcloudAction extends Action #Add the aggregated columns... $tags->selectAdd('max(notice_id) as last_notice_id'); - if(common_config('db','type')=='pgsql') { - $calc='sum(exp(-extract(epoch from (now()-created))/%s)) as weight'; - } else { - $calc='sum(exp(-(now() - created)/%s)) as weight'; - } - $tags->selectAdd(sprintf($calc, common_config('tag', 'dropoff'))); + $calc = common_sql_weight('created', common_config('tag', 'dropoff')); + $tags->selectAdd($calc . ' as weight'); $tags->groupBy('tag'); $tags->orderBy('weight DESC'); @@ -136,10 +132,11 @@ class PublictagcloudAction extends Action $this->elementStart('dd'); $this->elementStart('ul', 'tags xoxo tag-cloud'); foreach ($tw as $tag => $weight) { + common_log(LOG_DEBUG, "$weight/$sum"); if ($sum) { $weightedSum = $weight/$sum; } else { - $weightedSum = 1; + $weightedSum = 0.5; } $this->showTag($tag, $weight, $weightedSum); } diff --git a/lib/grouptagcloudsection.php b/lib/grouptagcloudsection.php index 091cf4845..14ceda085 100644 --- a/lib/grouptagcloudsection.php +++ b/lib/grouptagcloudsection.php @@ -58,11 +58,7 @@ class GroupTagCloudSection extends TagCloudSection function getTags() { - if (common_config('db', 'type') == 'pgsql') { - $weightexpr='sum(exp(-extract(epoch from (now() - notice_tag.created)) / %s))'; - } else { - $weightexpr='sum(exp(-(now() - notice_tag.created) / %s))'; - } + $weightexpr = common_sql_weight('notice_tag.created', common_config('tag', 'dropoff')); $names = $this->group->getAliases(); @@ -99,7 +95,6 @@ class GroupTagCloudSection extends TagCloudSection $tag = Memcached_DataObject::cachedQuery('Notice_tag', sprintf($qry, - common_config('tag', 'dropoff'), $this->group->id, $namestring), 3600); diff --git a/lib/personaltagcloudsection.php b/lib/personaltagcloudsection.php index 0b29d58ca..091425f92 100644 --- a/lib/personaltagcloudsection.php +++ b/lib/personaltagcloudsection.php @@ -58,13 +58,9 @@ class PersonalTagCloudSection extends TagCloudSection function getTags() { - if (common_config('db', 'type') == 'pgsql') { - $weightexpr='sum(exp(-extract(epoch from (now() - notice_tag.created)) / %s))'; - } else { - $weightexpr='sum(exp(-(now() - notice_tag.created) / %s))'; - } - - $qry = 'SELECT notice_tag.tag, '. + $weightexpr = common_sql_weight('notice_tag.created', common_config('tag', 'dropoff')); + + $qry = 'SELECT notice_tag.tag, '. $weightexpr . ' as weight ' . 'FROM notice_tag JOIN notice ' . 'ON notice_tag.notice_id = notice.id ' . @@ -83,7 +79,6 @@ class PersonalTagCloudSection extends TagCloudSection $tag = Memcached_DataObject::cachedQuery('Notice_tag', sprintf($qry, - common_config('tag', 'dropoff'), $this->user->id), 3600); return $tag; diff --git a/lib/popularnoticesection.php b/lib/popularnoticesection.php index 9fbc9d2dd..fbf9a60ab 100644 --- a/lib/popularnoticesection.php +++ b/lib/popularnoticesection.php @@ -48,17 +48,17 @@ class PopularNoticeSection extends NoticeSection { function getNotices() { + // @fixme there should be a common func for this if (common_config('db', 'type') == 'pgsql') { - $weightexpr='sum(exp(-extract(epoch from (now() - fave.modified)) / %s))'; if (!empty($this->out->tag)) { $tag = pg_escape_string($this->out->tag); } } else { - $weightexpr='sum(exp(-(now() - fave.modified) / %s))'; if (!empty($this->out->tag)) { $tag = mysql_escape_string($this->out->tag); } } + $weightexpr = common_sql_weight('fave.modified', common_config('popular', 'dropoff')); $qry = "SELECT notice.*, $weightexpr as weight "; if(isset($tag)) { $qry .= 'FROM notice_tag, notice JOIN fave ON notice.id = fave.notice_id ' . @@ -78,7 +78,7 @@ class PopularNoticeSection extends NoticeSection $qry .= ' LIMIT ' . $limit . ' OFFSET ' . $offset; $notice = Memcached_DataObject::cachedQuery('Notice', - sprintf($qry, common_config('popular', 'dropoff')), + $qry, 1200); return $notice; } diff --git a/lib/util.php b/lib/util.php index 63656b604..50bd0e2ac 100644 --- a/lib/util.php +++ b/lib/util.php @@ -908,6 +908,26 @@ function common_sql_date($datetime) return strftime('%Y-%m-%d %H:%M:%S', $datetime); } +/** + * Return an SQL fragment to calculate an age-based weight from a given + * timestamp or datetime column. + * + * @param string $column name of field we're comparing against current time + * @param integer $dropoff divisor for age in seconds before exponentiation + * @return string SQL fragment + */ +function common_sql_weight($column, $dropoff) +{ + if (common_config('db', 'type') == 'pgsql') { + // PostgreSQL doesn't support timestampdiff function. + // @fixme will this use the right time zone? + // @fixme does this handle cross-year subtraction correctly? + return "sum(exp(-extract(epoch from (now() - $column)) / $dropoff))"; + } else { + return "sum(exp(timestampdiff(second, utc_timestamp(), $column) / $dropoff))"; + } +} + function common_redirect($url, $code=307) { static $status = array(301 => "Moved Permanently", -- cgit v1.2.3-54-g00ecf