From 95159112b2331ee832a4cf1e711cb8f1f0193c44 Mon Sep 17 00:00:00 2001
From: Brion Vibber <brion@pobox.com>
Date: Tue, 25 May 2010 13:09:21 -0700
Subject: Hotpatch for infinite redirection-following loop seen processing URLs
 to http://clojure.org/ -- if we end up with an unstable redirect target
 (final item in a redirect chain ends up redirecting us somewhere else when we
 visit it again), just save the last version we saw instead of trying to start
 over.

Pretty much everything in File and File_redirection initial processing needs to be rewritten to be non-awful; this code is very hard to follow and very easy to make huge bugs. A fair amount of the complication is probably obsoleted by the redirection following being built into HTTPClient now.
---
 classes/File.php | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/classes/File.php b/classes/File.php
index 33273bbdc..8297e5091 100644
--- a/classes/File.php
+++ b/classes/File.php
@@ -116,7 +116,11 @@ class File extends Memcached_DataObject
         return false;
     }
 
-    function processNew($given_url, $notice_id=null) {
+    /**
+     * @fixme refactor this mess, it's gotten pretty scary.
+     * @param bool $followRedirects
+     */
+    function processNew($given_url, $notice_id=null, $followRedirects=true) {
         if (empty($given_url)) return -1;   // error, no url to process
         $given_url = File_redirection::_canonUrl($given_url);
         if (empty($given_url)) return -1;   // error, no url to process
@@ -124,6 +128,10 @@ class File extends Memcached_DataObject
         if (empty($file)) {
             $file_redir = File_redirection::staticGet('url', $given_url);
             if (empty($file_redir)) {
+                // @fixme for new URLs this also looks up non-redirect data
+                // such as target content type, size, etc, which we need
+                // for File::saveNew(); so we call it even if not following
+                // new redirects.
                 $redir_data = File_redirection::where($given_url);
                 if (is_array($redir_data)) {
                     $redir_url = $redir_data['url'];
@@ -134,11 +142,19 @@ class File extends Memcached_DataObject
                     throw new ServerException("Can't process url '$given_url'");
                 }
                 // TODO: max field length
-                if ($redir_url === $given_url || strlen($redir_url) > 255) {
+                if ($redir_url === $given_url || strlen($redir_url) > 255 || !$followRedirects) {
                     $x = File::saveNew($redir_data, $given_url);
                     $file_id = $x->id;
                 } else {
-                    $x = File::processNew($redir_url, $notice_id);
+                    // This seems kind of messed up... for now skipping this part
+                    // if we're already under a redirect, so we don't go into
+                    // horrible infinite loops if we've been given an unstable
+                    // redirect (where the final destination of the first request
+                    // doesn't match what we get when we ask for it again).
+                    //
+                    // Seen in the wild with clojure.org, which redirects through
+                    // wikispaces for auth and appends session data in the URL params.
+                    $x = File::processNew($redir_url, $notice_id, /*followRedirects*/false);
                     $file_id = $x->id;
                     File_redirection::saveNew($redir_data, $file_id, $given_url);
                 }
-- 
cgit v1.2.3-54-g00ecf