summaryrefslogtreecommitdiff
path: root/testing/mesa/gnome-shell-shader-fix.patch
diff options
context:
space:
mode:
Diffstat (limited to 'testing/mesa/gnome-shell-shader-fix.patch')
-rw-r--r--testing/mesa/gnome-shell-shader-fix.patch535
1 files changed, 0 insertions, 535 deletions
diff --git a/testing/mesa/gnome-shell-shader-fix.patch b/testing/mesa/gnome-shell-shader-fix.patch
deleted file mode 100644
index 3b3f37591..000000000
--- a/testing/mesa/gnome-shell-shader-fix.patch
+++ /dev/null
@@ -1,535 +0,0 @@
-From 3f625689acd570e4f14cc2ebaa43a425d13954ff Mon Sep 17 00:00:00 2001
-From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
-Date: Thu, 31 Mar 2011 13:49:33 +0000
-Subject: nv50: copy regalloc fixes from nvc0
-
-Should fix gnome-shell's fade shader.
-
-Unification of the shader backend which is supposed to remove the
-code duplication is still WIP.
----
-diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
-index e6f3815..a9a3248 100644
---- a/src/gallium/drivers/nv50/nv50_pc.h
-+++ b/src/gallium/drivers/nv50/nv50_pc.h
-@@ -228,6 +228,8 @@ struct nv_ref {
- ubyte flags; /* not used yet */
- };
-
-+#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0)
-+
- struct nv_basic_block;
-
- struct nv_instruction {
-@@ -263,6 +265,15 @@ struct nv_instruction {
- ubyte quadop;
- };
-
-+static INLINE int
-+nvi_vector_size(struct nv_instruction *nvi)
-+{
-+ int i;
-+ assert(nvi);
-+ for (i = 0; i < 4 && nvi->def[i]; ++i);
-+ return i;
-+}
-+
- #define CFG_EDGE_FORWARD 0
- #define CFG_EDGE_BACK 1
- #define CFG_EDGE_LOOP_ENTER 2
-diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
-index 39ae366..657df2c 100644
---- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c
-+++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
-@@ -32,14 +32,39 @@
- #include "util/u_simple_list.h"
-
- #define NUM_REGISTER_FILES 4
-+#define MAX_REGISTER_COUNT 256
-
- struct register_set {
- struct nv_pc *pc;
-
- uint32_t last[NUM_REGISTER_FILES];
-- uint32_t bits[NUM_REGISTER_FILES][8];
-+ uint32_t bits[NUM_REGISTER_FILES][(MAX_REGISTER_COUNT + 31) / 32];
- };
-
-+/* using OR because a set bit means occupied/unavailable, aliasing is allowed */
-+static void
-+intersect_register_sets(struct register_set *dst,
-+ struct register_set *src1, struct register_set *src2)
-+{
-+ int i, j;
-+
-+ for (i = 0; i < NUM_REGISTER_FILES; ++i) {
-+ for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j)
-+ dst->bits[i][j] = src1->bits[i][j] | src2->bits[i][j];
-+ }
-+}
-+
-+static void
-+mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask)
-+{
-+ int i, j;
-+
-+ for (i = 0; i < NUM_REGISTER_FILES; ++i) {
-+ for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j)
-+ set->bits[i][j] = (set->bits[i][j] | mask) & umask;
-+ }
-+}
-+
- struct nv_pc_pass {
- struct nv_pc *pc;
-
-@@ -61,11 +86,15 @@ ranges_coalesce(struct nv_range *range)
- }
- }
-
-+/* @return: TRUE if @new_range can be freed (i.e. was not reused) */
- static boolean
- add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
- {
- struct nv_range *range, **nextp = &val->livei;
-
-+ if (bgn == end) /* [a, a) is invalid / empty */
-+ return TRUE;
-+
- for (range = val->livei; range; range = range->next) {
- if (end < range->bgn)
- break; /* insert before */
-@@ -251,6 +280,8 @@ reg_occupy(struct register_set *set, struct nv_value *val)
- id <<= s;
- m = (1 << (1 << s)) - 1;
-
-+ assert(s >= 0); /* XXX: remove me */
-+
- set->bits[f][id / 32] |= m << (id % 32);
-
- if (set->pc->max_reg[f] < id)
-@@ -286,15 +317,12 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
- if (a->join->reg.id == b->join->reg.id)
- return TRUE;
-
--#if 1
- /* either a or b or both have been assigned */
-
- if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
- return FALSE;
- else
- if (b->join->reg.id >= 0) {
-- if (a->join->reg.id >= 0)
-- return FALSE;
- val = a;
- a = b;
- b = val;
-@@ -309,8 +337,6 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
- return FALSE;
- }
- return TRUE;
--#endif
-- return FALSE;
- }
-
- static INLINE void
-@@ -336,14 +362,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
- assert(b->join == a->join);
- }
-
--static INLINE void
-+static INLINE boolean
- try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
- {
- if (!join_allowed(ctx, a, b)) {
- #ifdef NV50_RA_DEBUG_JOIN
- debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
- #endif
-- return;
-+ return FALSE;
- }
- if (livei_have_overlap(a->join, b->join)) {
- #ifdef NV50_RA_DEBUG_JOIN
-@@ -351,10 +377,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
- livei_print(a);
- livei_print(b);
- #endif
-- return;
-+ return FALSE;
- }
-
- do_join_values(ctx, a, b);
-+
-+ return TRUE;
-+}
-+
-+static void
-+join_values_nofail(struct nv_pc_pass *ctx,
-+ struct nv_value *a, struct nv_value *b, boolean type_only)
-+{
-+ if (type_only) {
-+ assert(join_allowed(ctx, a, b));
-+ do_join_values(ctx, a, b);
-+ } else {
-+ boolean ok = try_join_values(ctx, a, b);
-+ if (!ok) {
-+ NOUVEAU_ERR("failed to coalesce values\n");
-+ }
-+ }
- }
-
- static INLINE boolean
-@@ -369,20 +412,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
- return (b->num_in > 1) && (n == 2);
- }
-
-+/* Look for the @phi's operand whose definition reaches @b. */
- static int
- phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
- struct nv_basic_block *tb)
- {
-+ struct nv_ref *srci, *srcj;
- int i, j;
-
-- for (j = -1, i = 0; i < 4 && phi->src[i]; ++i) {
-- if (!nvbb_reachable_by(b, phi->src[i]->value->insn->bb, tb))
-+ for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
-+ srci = phi->src[i];
-+ /* if already replaced, check with original source first */
-+ if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV)
-+ srci = srci->value->insn->src[0];
-+ if (!nvbb_reachable_by(b, srci->value->insn->bb, NULL))
- continue;
- /* NOTE: back-edges are ignored by the reachable-by check */
-- if (j < 0 || !nvbb_reachable_by(phi->src[j]->value->insn->bb,
-- phi->src[i]->value->insn->bb, tb))
-+ if (j < 0 || !nvbb_reachable_by(srcj->value->insn->bb,
-+ srci->value->insn->bb, NULL)) {
- j = i;
-+ srcj = srci;
-+ }
- }
-+ if (j >= 0 && nvbb_reachable_by(b, phi->def[0]->insn->bb, NULL))
-+ if (!nvbb_reachable_by(srcj->value->insn->bb,
-+ phi->def[0]->insn->bb, NULL))
-+ j = -1;
- return j;
- }
-
-@@ -429,16 +484,21 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
- ctx->pc->current_block = pn;
-
- for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
-- if ((j = phi_opnd_for_bb(i, p, b)) < 0)
-- continue;
-- val = i->src[j]->value;
--
-- if (i->src[j]->flags) {
-- val = val->insn->src[0]->value;
-- while (j < 4 && i->src[j])
-- ++j;
-- assert(j < 4);
-+ j = phi_opnd_for_bb(i, p, b);
-+
-+ if (j < 0) {
-+ val = i->def[0];
-+ } else {
-+ val = i->src[j]->value;
-+ if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) {
-+ j = -1;
-+ /* use original value, we already encountered & replaced it */
-+ val = val->insn->src[0]->value;
-+ }
- }
-+ if (j < 0) /* need an additional source ? */
-+ for (j = 0; j < 5 && i->src[j] && i->src[j]->value != val; ++j);
-+ assert(j < 5);
-
- ni = new_instruction(ctx->pc, NV_OP_MOV);
-
-@@ -452,7 +512,7 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
-
- nv_reference(ctx->pc, &i->src[j], ni->def[0]);
-
-- i->src[j]->flags = 1;
-+ i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV;
- }
-
- if (pn != p && pn->exit) {
-@@ -470,45 +530,50 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
- return 0;
- }
-
-+#define JOIN_MASK_PHI (1 << 0)
-+#define JOIN_MASK_SELECT (1 << 1)
-+#define JOIN_MASK_MOV (1 << 2)
-+#define JOIN_MASK_TEX (1 << 3)
-+
- static int
--pass_join_values(struct nv_pc_pass *ctx, int iter)
-+pass_join_values(struct nv_pc_pass *ctx, unsigned mask)
- {
- int c, n;
-
- for (n = 0; n < ctx->num_insns; ++n) {
-- struct nv_instruction *i = ctx->insns[n];
-+ struct nv_instruction *nvi, *i = ctx->insns[n];
-
- switch (i->opcode) {
- case NV_OP_PHI:
-- if (iter != 2)
-+ if (!(mask & JOIN_MASK_PHI))
- break;
-- for (c = 0; c < 4 && i->src[c]; ++c)
-- try_join_values(ctx, i->def[0], i->src[c]->value);
-+ for (c = 0; c < 5 && i->src[c]; ++c)
-+ join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE);
- break;
- case NV_OP_MOV:
-- if ((iter == 2) && i->src[0]->value->insn &&
-- !nv_is_vector_op(i->src[0]->value->join->insn->opcode))
-+ if (!(mask & JOIN_MASK_MOV))
-+ break;
-+ nvi = i->src[0]->value->join->insn;
-+ if (nvi && !nv_is_vector_op(nvi->opcode))
- try_join_values(ctx, i->def[0], i->src[0]->value);
- break;
- case NV_OP_SELECT:
-- if (iter != 1)
-+ if (!(mask & JOIN_MASK_SELECT))
- break;
-- for (c = 0; c < 4 && i->src[c]; ++c) {
-- assert(join_allowed(ctx, i->def[0], i->src[c]->value));
-- do_join_values(ctx, i->def[0], i->src[c]->value);
-- }
-+ for (c = 0; c < 5 && i->src[c]; ++c)
-+ join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE);
- break;
- case NV_OP_TEX:
- case NV_OP_TXB:
- case NV_OP_TXL:
- case NV_OP_TXQ:
-- if (iter)
-+ if (!(mask & JOIN_MASK_TEX))
- break;
-- for (c = 0; c < 4; ++c) {
-- if (!i->src[c])
-- break;
-- do_join_values(ctx, i->def[c], i->src[c]->value);
-- }
-+ /* This should work without conflicts because we always generate
-+ * extra MOVs for the sources of a TEX.
-+ */
-+ for (c = 0; c < 4 && i->src[c]; ++c)
-+ join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE);
- break;
- default:
- break;
-@@ -643,15 +708,15 @@ static void collect_live_values(struct nv_basic_block *b, const int n)
- {
- int i;
-
-- if (b->out[0]) {
-- if (b->out[1]) { /* what to do about back-edges ? */
-+ if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) {
-+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
- for (i = 0; i < n; ++i)
- b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
- } else {
- memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
- }
- } else
-- if (b->out[1]) {
-+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
- memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
- } else {
- memset(b->live_set, 0, n * sizeof(uint32_t));
-@@ -770,8 +835,8 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
- struct nv_value *elem;
-
- for (elem = list->prev;
-- elem != list && elem->livei->bgn > nval->livei->bgn;
-- elem = elem->prev);
-+ elem != list && elem->livei->bgn > nval->livei->bgn;
-+ elem = elem->prev);
- /* now elem begins before or at the same time as val */
-
- nval->prev = elem;
-@@ -780,44 +845,49 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
- elem->next = nval;
- }
-
--static int
--pass_linear_scan(struct nv_pc_pass *ctx, int iter)
-+static void
-+collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head,
-+ boolean assigned_only)
- {
-- struct nv_instruction *i;
-- struct register_set f, free;
-+ struct nv_value *val;
- int k, n;
-- struct nv_value *cur, *val, *tmp[2];
-- struct nv_value active, inactive, handled, unhandled;
-
-- make_empty_list(&active);
-- make_empty_list(&inactive);
-- make_empty_list(&handled);
-- make_empty_list(&unhandled);
--
-- nv50_ctor_register_set(ctx->pc, &free);
-+ make_empty_list(head);
-
-- /* joined values should have range = NULL and thus not be added;
-- * also, fixed memory values won't be added because they're not
-- * def'd, just used
-- */
- for (n = 0; n < ctx->num_insns; ++n) {
-- i = ctx->insns[n];
-+ struct nv_instruction *i = ctx->insns[n];
-
-+ /* for joined values, only the representative will have livei != NULL */
- for (k = 0; k < 4; ++k) {
- if (i->def[k] && i->def[k]->livei)
-- insert_ordered_tail(&unhandled, i->def[k]);
-- else
-- if (0 && i->def[k])
-- debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
-+ if (!assigned_only || i->def[k]->reg.id >= 0)
-+ insert_ordered_tail(head, i->def[k]);
- }
- if (i->flags_def && i->flags_def->livei)
-- insert_ordered_tail(&unhandled, i->flags_def);
-+ if (!assigned_only || i->flags_def->reg.id >= 0)
-+ insert_ordered_tail(head, i->flags_def);
- }
-
-- for (val = unhandled.next; val != unhandled.prev; val = val->next) {
-+ for (val = head->next; val != head->prev; val = val->next) {
- assert(val->join == val);
- assert(val->livei->bgn <= val->next->livei->bgn);
- }
-+}
-+
-+static int
-+pass_linear_scan(struct nv_pc_pass *ctx, int iter)
-+{
-+ struct register_set f, free;
-+ struct nv_value *cur, *val, *tmp[2];
-+ struct nv_value active, inactive, handled, unhandled;
-+
-+ make_empty_list(&active);
-+ make_empty_list(&inactive);
-+ make_empty_list(&handled);
-+
-+ nv50_ctor_register_set(ctx->pc, &free);
-+
-+ collect_register_values(ctx, &unhandled, FALSE);
-
- foreach_s(cur, tmp[0], &unhandled) {
- remove_from_list(cur);
-@@ -854,13 +924,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
- reg_occupy(&f, val);
-
- if (cur->reg.id < 0) {
-- boolean mem = FALSE;
--
-- if (nv_is_vector_op(cur->insn->opcode))
-- mem = !reg_assign(&f, &cur->insn->def[0], 4);
-- else
-- if (iter)
-- mem = !reg_assign(&f, &cur, 1);
-+ boolean mem = !reg_assign(&f, &cur, 1);
-
- if (mem) {
- NOUVEAU_ERR("out of registers\n");
-@@ -874,6 +938,67 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
- return 0;
- }
-
-+/* Allocate values defined by instructions such as TEX, which have to be
-+ * assigned to consecutive registers.
-+ * Linear scan doesn't really work here since the values can have different
-+ * live intervals.
-+ */
-+static int
-+pass_allocate_constrained_values(struct nv_pc_pass *ctx)
-+{
-+ struct nv_value regvals, *val;
-+ struct nv_instruction *i;
-+ struct nv_value *defs[4];
-+ struct register_set regs[4];
-+ int n, vsize, c;
-+ uint32_t mask;
-+ boolean mem;
-+
-+ collect_register_values(ctx, &regvals, TRUE);
-+
-+ for (n = 0; n < ctx->num_insns; ++n) {
-+ i = ctx->insns[n];
-+ vsize = nvi_vector_size(i);
-+ if (!(vsize > 1))
-+ continue;
-+ assert(vsize <= 4);
-+ for (c = 0; c < vsize; ++c)
-+ defs[c] = i->def[c]->join;
-+
-+ if (defs[0]->reg.id >= 0) {
-+ for (c = 1; c < vsize; ++c)
-+ assert(defs[c]->reg.id >= 0);
-+ continue;
-+ }
-+
-+ for (c = 0; c < vsize; ++c) {
-+ nv50_ctor_register_set(ctx->pc, &regs[c]);
-+
-+ foreach(val, &regvals) {
-+ if (val->reg.id >= 0 && livei_have_overlap(val, defs[c]))
-+ reg_occupy(&regs[c], val);
-+ }
-+ mask = 0x11111111;
-+ if (vsize == 2) /* granularity is 2 and not 4 */
-+ mask |= 0x11111111 << 2;
-+ mask_register_set(&regs[c], 0, mask << c);
-+
-+ if (defs[c]->livei)
-+ insert_ordered_tail(&regvals, defs[c]);
-+ }
-+ for (c = 1; c < vsize; ++c)
-+ intersect_register_sets(&regs[0], &regs[0], &regs[c]);
-+
-+ mem = !reg_assign(&regs[0], &defs[0], vsize);
-+
-+ if (mem) {
-+ NOUVEAU_ERR("out of registers\n");
-+ abort();
-+ }
-+ }
-+ return 0;
-+}
-+
- static int
- nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
- {
-@@ -923,16 +1048,16 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
- livei_print(&pc->values[i]);
- #endif
-
-- ret = pass_join_values(ctx, 0);
-+ ret = pass_join_values(ctx, JOIN_MASK_PHI);
- if (ret)
- goto out;
-- ret = pass_linear_scan(ctx, 0);
-+ ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_TEX);
- if (ret)
- goto out;
-- ret = pass_join_values(ctx, 1);
-+ ret = pass_join_values(ctx, JOIN_MASK_MOV);
- if (ret)
- goto out;
-- ret = pass_join_values(ctx, 2);
-+ ret = pass_allocate_constrained_values(ctx);
- if (ret)
- goto out;
- ret = pass_linear_scan(ctx, 1);
---
-cgit v0.8.3-6-g21f6