1 files changed, 535 insertions, 0 deletions
diff --git a/testing/mesa/gnome-shell-shader-fix.patch b/testing/mesa/gnome-shell-shader-fix.patch
new file mode 100644
index 000000000..3b3f37591
--- /dev/null
+++ b/testing/mesa/gnome-shell-shader-fix.patch
@@ -0,0 +1,535 @@
+From 3f625689acd570e4f14cc2ebaa43a425d13954ff Mon Sep 17 00:00:00 2001
+From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
+Date: Thu, 31 Mar 2011 13:49:33 +0000
+Subject: nv50: copy regalloc fixes from nvc0
+
+Should fix gnome-shell's fade shader.
+
+Unification of the shader backend which is supposed to remove the
+code duplication is still WIP.
+---
+diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
+index e6f3815..a9a3248 100644
+--- a/src/gallium/drivers/nv50/nv50_pc.h
++++ b/src/gallium/drivers/nv50/nv50_pc.h
+@@ -228,6 +228,8 @@ struct nv_ref {
+    ubyte flags; /* not used yet */
+ };
+ 
++#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0)
++
+ struct nv_basic_block;
+ 
+ struct nv_instruction {
+@@ -263,6 +265,15 @@ struct nv_instruction {
+    ubyte quadop;
+ };
+ 
++static INLINE int
++nvi_vector_size(struct nv_instruction *nvi)
++{
++   int i;
++   assert(nvi);
++   for (i = 0; i < 4 && nvi->def[i]; ++i);
++   return i;
++}
++
+ #define CFG_EDGE_FORWARD     0
+ #define CFG_EDGE_BACK        1
+ #define CFG_EDGE_LOOP_ENTER  2
+diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
+index 39ae366..657df2c 100644
+--- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c
++++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
+@@ -32,14 +32,39 @@
+ #include "util/u_simple_list.h"
+ 
+ #define NUM_REGISTER_FILES 4
++#define MAX_REGISTER_COUNT 256
+ 
+ struct register_set {
+    struct nv_pc *pc;
+ 
+    uint32_t last[NUM_REGISTER_FILES];
+-   uint32_t bits[NUM_REGISTER_FILES][8];
++   uint32_t bits[NUM_REGISTER_FILES][(MAX_REGISTER_COUNT + 31) / 32];
+ };
+ 
++/* using OR because a set bit means occupied/unavailable, aliasing is allowed */
++static void
++intersect_register_sets(struct register_set *dst,
++                        struct register_set *src1, struct register_set *src2)
++{
++   int i, j;
++
++   for (i = 0; i < NUM_REGISTER_FILES; ++i) {
++      for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j)
++         dst->bits[i][j] = src1->bits[i][j] | src2->bits[i][j];
++   }
++}
++
++static void
++mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask)
++{
++   int i, j;
++
++   for (i = 0; i < NUM_REGISTER_FILES; ++i) {
++      for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j)
++         set->bits[i][j] = (set->bits[i][j] | mask) & umask;
++   }
++}
++
+ struct nv_pc_pass {
+    struct nv_pc *pc;
+ 
+@@ -61,11 +86,15 @@ ranges_coalesce(struct nv_range *range)
+    }
+ }
+ 
++/* @return: TRUE if @new_range can be freed (i.e. was not reused) */
+ static boolean
+ add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
+ {
+    struct nv_range *range, **nextp = &val->livei;
+ 
++   if (bgn == end) /* [a, a) is invalid / empty */
++      return TRUE;
++
+    for (range = val->livei; range; range = range->next) {
+       if (end < range->bgn)
+          break; /* insert before */
+@@ -251,6 +280,8 @@ reg_occupy(struct register_set *set, struct nv_value *val)
+    id <<= s;
+    m = (1 << (1 << s)) - 1;
+ 
++   assert(s >= 0); /* XXX: remove me */
++
+    set->bits[f][id / 32] |= m << (id % 32);
+ 
+    if (set->pc->max_reg[f] < id)
+@@ -286,15 +317,12 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+    if (a->join->reg.id == b->join->reg.id)
+       return TRUE;
+ 
+-#if 1
+    /* either a or b or both have been assigned */
+ 
+    if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
+       return FALSE;
+    else
+    if (b->join->reg.id >= 0) {
+-      if (a->join->reg.id >= 0)
+-         return FALSE;
+       val = a;
+       a = b;
+       b = val;
+@@ -309,8 +337,6 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+          return FALSE;
+    }
+    return TRUE;
+-#endif
+-   return FALSE;
+ }
+ 
+ static INLINE void
+@@ -336,14 +362,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+    assert(b->join == a->join);
+ }
+ 
+-static INLINE void
++static INLINE boolean
+ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+ {
+    if (!join_allowed(ctx, a, b)) {
+ #ifdef NV50_RA_DEBUG_JOIN
+       debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
+ #endif
+-      return;
++      return FALSE;
+    }
+    if (livei_have_overlap(a->join, b->join)) {
+ #ifdef NV50_RA_DEBUG_JOIN
+@@ -351,10 +377,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+       livei_print(a);
+       livei_print(b);
+ #endif
+-      return;
++      return FALSE;
+    }
+ 
+    do_join_values(ctx, a, b);
++
++   return TRUE;
++}
++
++static void
++join_values_nofail(struct nv_pc_pass *ctx,
++                   struct nv_value *a, struct nv_value *b, boolean type_only)
++{
++   if (type_only) {
++      assert(join_allowed(ctx, a, b));
++      do_join_values(ctx, a, b);
++   } else {
++      boolean ok = try_join_values(ctx, a, b);
++      if (!ok) {
++         NOUVEAU_ERR("failed to coalesce values\n");
++      }
++   }
+ }
+ 
+ static INLINE boolean
+@@ -369,20 +412,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
+    return (b->num_in > 1) && (n == 2);
+ }
+ 
++/* Look for the @phi's operand whose definition reaches @b. */
+ static int
+ phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
+                 struct nv_basic_block *tb)
+ {
++   struct nv_ref *srci, *srcj;
+    int i, j;
+ 
+-   for (j = -1, i = 0; i < 4 && phi->src[i]; ++i) {
+-      if (!nvbb_reachable_by(b, phi->src[i]->value->insn->bb, tb))
++   for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
++      srci = phi->src[i];
++      /* if already replaced, check with original source first */
++      if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV)
++         srci = srci->value->insn->src[0];
++      if (!nvbb_reachable_by(b, srci->value->insn->bb, NULL))
+          continue;
+       /* NOTE: back-edges are ignored by the reachable-by check */
+-      if (j < 0 || !nvbb_reachable_by(phi->src[j]->value->insn->bb,
+-                                      phi->src[i]->value->insn->bb, tb))
++      if (j < 0 || !nvbb_reachable_by(srcj->value->insn->bb,
++                                      srci->value->insn->bb, NULL)) {
+          j = i;
++         srcj = srci;
++      }
+    }
++   if (j >= 0 && nvbb_reachable_by(b, phi->def[0]->insn->bb, NULL))
++      if (!nvbb_reachable_by(srcj->value->insn->bb,
++                             phi->def[0]->insn->bb, NULL))
++         j = -1;
+    return j;
+ }
+ 
+@@ -429,16 +484,21 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+       ctx->pc->current_block = pn;
+ 
+       for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
+-         if ((j = phi_opnd_for_bb(i, p, b)) < 0)
+-            continue;
+-         val = i->src[j]->value;
+-
+-         if (i->src[j]->flags) {
+-            val = val->insn->src[0]->value;
+-            while (j < 4 && i->src[j])
+-               ++j;
+-            assert(j < 4);
++         j = phi_opnd_for_bb(i, p, b);
++
++         if (j < 0) {
++            val = i->def[0];
++         } else {
++            val = i->src[j]->value;
++            if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) {
++               j = -1;
++               /* use original value, we already encountered & replaced it */
++               val = val->insn->src[0]->value;
++            }
+          }
++         if (j < 0) /* need an additional source ? */
++            for (j = 0; j < 5 && i->src[j] && i->src[j]->value != val; ++j);
++         assert(j < 5);
+ 
+          ni = new_instruction(ctx->pc, NV_OP_MOV);
+ 
+@@ -452,7 +512,7 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+ 
+          nv_reference(ctx->pc, &i->src[j], ni->def[0]);
+ 
+-         i->src[j]->flags = 1;
++         i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV;
+       }
+ 
+       if (pn != p && pn->exit) {
+@@ -470,45 +530,50 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+    return 0;
+ }
+ 
++#define JOIN_MASK_PHI    (1 << 0)
++#define JOIN_MASK_SELECT (1 << 1)
++#define JOIN_MASK_MOV    (1 << 2)
++#define JOIN_MASK_TEX    (1 << 3)
++
+ static int
+-pass_join_values(struct nv_pc_pass *ctx, int iter)
++pass_join_values(struct nv_pc_pass *ctx, unsigned mask)
+ {
+    int c, n;
+ 
+    for (n = 0; n < ctx->num_insns; ++n) {
+-      struct nv_instruction *i = ctx->insns[n];
++      struct nv_instruction *nvi, *i = ctx->insns[n];
+ 
+       switch (i->opcode) {
+       case NV_OP_PHI:
+-         if (iter != 2)
++         if (!(mask & JOIN_MASK_PHI))
+             break;
+-         for (c = 0; c < 4 && i->src[c]; ++c)
+-            try_join_values(ctx, i->def[0], i->src[c]->value);
++         for (c = 0; c < 5 && i->src[c]; ++c)
++            join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE);
+          break;
+       case NV_OP_MOV:
+-         if ((iter == 2) && i->src[0]->value->insn &&
+-             !nv_is_vector_op(i->src[0]->value->join->insn->opcode))
++         if (!(mask & JOIN_MASK_MOV))
++            break;
++         nvi = i->src[0]->value->join->insn;
++         if (nvi && !nv_is_vector_op(nvi->opcode))
+             try_join_values(ctx, i->def[0], i->src[0]->value);
+          break;
+       case NV_OP_SELECT:
+-         if (iter != 1)
++         if (!(mask & JOIN_MASK_SELECT))
+             break;
+-         for (c = 0; c < 4 && i->src[c]; ++c) {
+-            assert(join_allowed(ctx, i->def[0], i->src[c]->value));
+-            do_join_values(ctx, i->def[0], i->src[c]->value);
+-         }
++         for (c = 0; c < 5 && i->src[c]; ++c)
++            join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE);
+          break;
+       case NV_OP_TEX:
+       case NV_OP_TXB:
+       case NV_OP_TXL:
+       case NV_OP_TXQ:
+-         if (iter)
++         if (!(mask & JOIN_MASK_TEX))
+             break;
+-         for (c = 0; c < 4; ++c) {
+-            if (!i->src[c])
+-               break;
+-            do_join_values(ctx, i->def[c], i->src[c]->value);
+-         }
++         /* This should work without conflicts because we always generate
++          * extra MOVs for the sources of a TEX.
++          */
++         for (c = 0; c < 4 && i->src[c]; ++c)
++            join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE);
+          break;
+       default:
+          break;
+@@ -643,15 +708,15 @@ static void collect_live_values(struct nv_basic_block *b, const int n)
+ {
+    int i;
+ 
+-   if (b->out[0]) {
+-      if (b->out[1]) { /* what to do about back-edges ? */
++   if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) {
++      if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
+          for (i = 0; i < n; ++i)
+             b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
+       } else {
+          memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
+       }
+    } else
+-   if (b->out[1]) {
++   if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
+       memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
+    } else {
+       memset(b->live_set, 0, n * sizeof(uint32_t));
+@@ -770,8 +835,8 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
+    struct nv_value *elem;
+ 
+    for (elem = list->prev;
+-	elem != list && elem->livei->bgn > nval->livei->bgn;
+-	elem = elem->prev);
++        elem != list && elem->livei->bgn > nval->livei->bgn;
++        elem = elem->prev);
+    /* now elem begins before or at the same time as val */
+ 
+    nval->prev = elem;
+@@ -780,44 +845,49 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
+    elem->next = nval;
+ }
+ 
+-static int
+-pass_linear_scan(struct nv_pc_pass *ctx, int iter)
++static void
++collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head,
++                        boolean assigned_only)
+ {
+-   struct nv_instruction *i;
+-   struct register_set f, free;
++   struct nv_value *val;
+    int k, n;
+-   struct nv_value *cur, *val, *tmp[2];
+-   struct nv_value active, inactive, handled, unhandled;
+ 
+-   make_empty_list(&active);
+-   make_empty_list(&inactive);
+-   make_empty_list(&handled);
+-   make_empty_list(&unhandled);
+-
+-   nv50_ctor_register_set(ctx->pc, &free);
++   make_empty_list(head);
+ 
+-   /* joined values should have range = NULL and thus not be added;
+-    * also, fixed memory values won't be added because they're not
+-    * def'd, just used
+-    */
+    for (n = 0; n < ctx->num_insns; ++n) {
+-      i = ctx->insns[n];
++      struct nv_instruction *i = ctx->insns[n];
+ 
++      /* for joined values, only the representative will have livei != NULL */
+       for (k = 0; k < 4; ++k) {
+          if (i->def[k] && i->def[k]->livei)
+-            insert_ordered_tail(&unhandled, i->def[k]);
+-         else
+-         if (0 && i->def[k])
+-            debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
++            if (!assigned_only || i->def[k]->reg.id >= 0)
++               insert_ordered_tail(head, i->def[k]);
+       }
+       if (i->flags_def && i->flags_def->livei)
+-         insert_ordered_tail(&unhandled, i->flags_def);
++         if (!assigned_only || i->flags_def->reg.id >= 0)
++            insert_ordered_tail(head, i->flags_def);
+    }
+ 
+-   for (val = unhandled.next; val != unhandled.prev; val = val->next) {
++   for (val = head->next; val != head->prev; val = val->next) {
+       assert(val->join == val);
+       assert(val->livei->bgn <= val->next->livei->bgn);
+    }
++}
++
++static int
++pass_linear_scan(struct nv_pc_pass *ctx, int iter)
++{
++   struct register_set f, free;
++   struct nv_value *cur, *val, *tmp[2];
++   struct nv_value active, inactive, handled, unhandled;
++
++   make_empty_list(&active);
++   make_empty_list(&inactive);
++   make_empty_list(&handled);
++
++   nv50_ctor_register_set(ctx->pc, &free);
++
++   collect_register_values(ctx, &unhandled, FALSE);
+ 
+    foreach_s(cur, tmp[0], &unhandled) {
+       remove_from_list(cur);
+@@ -854,13 +924,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
+             reg_occupy(&f, val);
+ 
+       if (cur->reg.id < 0) {
+-         boolean mem = FALSE;
+-
+-         if (nv_is_vector_op(cur->insn->opcode))
+-            mem = !reg_assign(&f, &cur->insn->def[0], 4);
+-         else
+-         if (iter)
+-            mem = !reg_assign(&f, &cur, 1);
++         boolean mem = !reg_assign(&f, &cur, 1);
+ 
+          if (mem) {
+             NOUVEAU_ERR("out of registers\n");
+@@ -874,6 +938,67 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
+    return 0;
+ }
+ 
++/* Allocate values defined by instructions such as TEX, which have to be
++ * assigned to consecutive registers.
++ * Linear scan doesn't really work here since the values can have different
++ * live intervals.
++ */
++static int
++pass_allocate_constrained_values(struct nv_pc_pass *ctx)
++{
++   struct nv_value regvals, *val;
++   struct nv_instruction *i;
++   struct nv_value *defs[4];
++   struct register_set regs[4];
++   int n, vsize, c;
++   uint32_t mask;
++   boolean mem;
++
++   collect_register_values(ctx, &regvals, TRUE);
++
++   for (n = 0; n < ctx->num_insns; ++n) {
++      i = ctx->insns[n];
++      vsize = nvi_vector_size(i);
++      if (!(vsize > 1))
++         continue;
++      assert(vsize <= 4);
++      for (c = 0; c < vsize; ++c)
++         defs[c] = i->def[c]->join;
++
++      if (defs[0]->reg.id >= 0) {
++         for (c = 1; c < vsize; ++c)
++            assert(defs[c]->reg.id >= 0);
++         continue;
++      }
++
++      for (c = 0; c < vsize; ++c) {
++         nv50_ctor_register_set(ctx->pc, &regs[c]);
++
++         foreach(val, &regvals) {
++            if (val->reg.id >= 0 && livei_have_overlap(val, defs[c]))
++               reg_occupy(&regs[c], val);
++         }
++         mask = 0x11111111;
++         if (vsize == 2) /* granularity is 2 and not 4 */
++            mask |= 0x11111111 << 2;
++         mask_register_set(&regs[c], 0, mask << c);
++
++         if (defs[c]->livei)
++            insert_ordered_tail(&regvals, defs[c]);
++      }
++      for (c = 1; c < vsize; ++c)
++         intersect_register_sets(&regs[0], &regs[0], &regs[c]);
++
++      mem = !reg_assign(&regs[0], &defs[0], vsize);
++
++      if (mem) {
++         NOUVEAU_ERR("out of registers\n");
++         abort();
++      }
++   }
++   return 0;
++}
++
+ static int
+ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
+ {
+@@ -923,16 +1048,16 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
+       livei_print(&pc->values[i]);
+ #endif
+ 
+-   ret = pass_join_values(ctx, 0);
++   ret = pass_join_values(ctx, JOIN_MASK_PHI);
+    if (ret)
+       goto out;
+-   ret = pass_linear_scan(ctx, 0);
++   ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_TEX);
+    if (ret)
+       goto out;
+-   ret = pass_join_values(ctx, 1);
++   ret = pass_join_values(ctx, JOIN_MASK_MOV);
+    if (ret)
+       goto out;
+-   ret = pass_join_values(ctx, 2);
++   ret = pass_allocate_constrained_values(ctx);
+    if (ret)
+       goto out;
+    ret = pass_linear_scan(ctx, 1);
+--
+cgit v0.8.3-6-g21f6