diff options
Diffstat (limited to 'testing/mesa/gnome-shell-shader-fix.patch')
-rw-r--r-- | testing/mesa/gnome-shell-shader-fix.patch | 535 |
1 files changed, 0 insertions, 535 deletions
diff --git a/testing/mesa/gnome-shell-shader-fix.patch b/testing/mesa/gnome-shell-shader-fix.patch deleted file mode 100644 index 3b3f37591..000000000 --- a/testing/mesa/gnome-shell-shader-fix.patch +++ /dev/null @@ -1,535 +0,0 @@ -From 3f625689acd570e4f14cc2ebaa43a425d13954ff Mon Sep 17 00:00:00 2001 -From: Christoph Bumiller <e0425955@student.tuwien.ac.at> -Date: Thu, 31 Mar 2011 13:49:33 +0000 -Subject: nv50: copy regalloc fixes from nvc0 - -Should fix gnome-shell's fade shader. - -Unification of the shader backend which is supposed to remove the -code duplication is still WIP. ---- -diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h -index e6f3815..a9a3248 100644 ---- a/src/gallium/drivers/nv50/nv50_pc.h -+++ b/src/gallium/drivers/nv50/nv50_pc.h -@@ -228,6 +228,8 @@ struct nv_ref { - ubyte flags; /* not used yet */ - }; - -+#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0) -+ - struct nv_basic_block; - - struct nv_instruction { -@@ -263,6 +265,15 @@ struct nv_instruction { - ubyte quadop; - }; - -+static INLINE int -+nvi_vector_size(struct nv_instruction *nvi) -+{ -+ int i; -+ assert(nvi); -+ for (i = 0; i < 4 && nvi->def[i]; ++i); -+ return i; -+} -+ - #define CFG_EDGE_FORWARD 0 - #define CFG_EDGE_BACK 1 - #define CFG_EDGE_LOOP_ENTER 2 -diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c -index 39ae366..657df2c 100644 ---- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c -+++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c -@@ -32,14 +32,39 @@ - #include "util/u_simple_list.h" - - #define NUM_REGISTER_FILES 4 -+#define MAX_REGISTER_COUNT 256 - - struct register_set { - struct nv_pc *pc; - - uint32_t last[NUM_REGISTER_FILES]; -- uint32_t bits[NUM_REGISTER_FILES][8]; -+ uint32_t bits[NUM_REGISTER_FILES][(MAX_REGISTER_COUNT + 31) / 32]; - }; - -+/* using OR because a set bit means occupied/unavailable, aliasing is allowed */ -+static void -+intersect_register_sets(struct register_set *dst, -+ struct register_set *src1, struct register_set *src2) -+{ -+ int i, j; -+ -+ for (i = 0; i < NUM_REGISTER_FILES; ++i) { -+ for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j) -+ dst->bits[i][j] = src1->bits[i][j] | src2->bits[i][j]; -+ } -+} -+ -+static void -+mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask) -+{ -+ int i, j; -+ -+ for (i = 0; i < NUM_REGISTER_FILES; ++i) { -+ for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j) -+ set->bits[i][j] = (set->bits[i][j] | mask) & umask; -+ } -+} -+ - struct nv_pc_pass { - struct nv_pc *pc; - -@@ -61,11 +86,15 @@ ranges_coalesce(struct nv_range *range) - } - } - -+/* @return: TRUE if @new_range can be freed (i.e. was not reused) */ - static boolean - add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) - { - struct nv_range *range, **nextp = &val->livei; - -+ if (bgn == end) /* [a, a) is invalid / empty */ -+ return TRUE; -+ - for (range = val->livei; range; range = range->next) { - if (end < range->bgn) - break; /* insert before */ -@@ -251,6 +280,8 @@ reg_occupy(struct register_set *set, struct nv_value *val) - id <<= s; - m = (1 << (1 << s)) - 1; - -+ assert(s >= 0); /* XXX: remove me */ -+ - set->bits[f][id / 32] |= m << (id % 32); - - if (set->pc->max_reg[f] < id) -@@ -286,15 +317,12 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) - if (a->join->reg.id == b->join->reg.id) - return TRUE; - --#if 1 - /* either a or b or both have been assigned */ - - if (a->join->reg.id >= 0 && b->join->reg.id >= 0) - return FALSE; - else - if (b->join->reg.id >= 0) { -- if (a->join->reg.id >= 0) -- return FALSE; - val = a; - a = b; - b = val; -@@ -309,8 +337,6 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) - return FALSE; - } - return TRUE; --#endif -- return FALSE; - } - - static INLINE void -@@ -336,14 +362,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) - assert(b->join == a->join); - } - --static INLINE void -+static INLINE boolean - try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) - { - if (!join_allowed(ctx, a, b)) { - #ifdef NV50_RA_DEBUG_JOIN - debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); - #endif -- return; -+ return FALSE; - } - if (livei_have_overlap(a->join, b->join)) { - #ifdef NV50_RA_DEBUG_JOIN -@@ -351,10 +377,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) - livei_print(a); - livei_print(b); - #endif -- return; -+ return FALSE; - } - - do_join_values(ctx, a, b); -+ -+ return TRUE; -+} -+ -+static void -+join_values_nofail(struct nv_pc_pass *ctx, -+ struct nv_value *a, struct nv_value *b, boolean type_only) -+{ -+ if (type_only) { -+ assert(join_allowed(ctx, a, b)); -+ do_join_values(ctx, a, b); -+ } else { -+ boolean ok = try_join_values(ctx, a, b); -+ if (!ok) { -+ NOUVEAU_ERR("failed to coalesce values\n"); -+ } -+ } - } - - static INLINE boolean -@@ -369,20 +412,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) - return (b->num_in > 1) && (n == 2); - } - -+/* Look for the @phi's operand whose definition reaches @b. */ - static int - phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, - struct nv_basic_block *tb) - { -+ struct nv_ref *srci, *srcj; - int i, j; - -- for (j = -1, i = 0; i < 4 && phi->src[i]; ++i) { -- if (!nvbb_reachable_by(b, phi->src[i]->value->insn->bb, tb)) -+ for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { -+ srci = phi->src[i]; -+ /* if already replaced, check with original source first */ -+ if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV) -+ srci = srci->value->insn->src[0]; -+ if (!nvbb_reachable_by(b, srci->value->insn->bb, NULL)) - continue; - /* NOTE: back-edges are ignored by the reachable-by check */ -- if (j < 0 || !nvbb_reachable_by(phi->src[j]->value->insn->bb, -- phi->src[i]->value->insn->bb, tb)) -+ if (j < 0 || !nvbb_reachable_by(srcj->value->insn->bb, -+ srci->value->insn->bb, NULL)) { - j = i; -+ srcj = srci; -+ } - } -+ if (j >= 0 && nvbb_reachable_by(b, phi->def[0]->insn->bb, NULL)) -+ if (!nvbb_reachable_by(srcj->value->insn->bb, -+ phi->def[0]->insn->bb, NULL)) -+ j = -1; - return j; - } - -@@ -429,16 +484,21 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) - ctx->pc->current_block = pn; - - for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { -- if ((j = phi_opnd_for_bb(i, p, b)) < 0) -- continue; -- val = i->src[j]->value; -- -- if (i->src[j]->flags) { -- val = val->insn->src[0]->value; -- while (j < 4 && i->src[j]) -- ++j; -- assert(j < 4); -+ j = phi_opnd_for_bb(i, p, b); -+ -+ if (j < 0) { -+ val = i->def[0]; -+ } else { -+ val = i->src[j]->value; -+ if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) { -+ j = -1; -+ /* use original value, we already encountered & replaced it */ -+ val = val->insn->src[0]->value; -+ } - } -+ if (j < 0) /* need an additional source ? */ -+ for (j = 0; j < 5 && i->src[j] && i->src[j]->value != val; ++j); -+ assert(j < 5); - - ni = new_instruction(ctx->pc, NV_OP_MOV); - -@@ -452,7 +512,7 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) - - nv_reference(ctx->pc, &i->src[j], ni->def[0]); - -- i->src[j]->flags = 1; -+ i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV; - } - - if (pn != p && pn->exit) { -@@ -470,45 +530,50 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) - return 0; - } - -+#define JOIN_MASK_PHI (1 << 0) -+#define JOIN_MASK_SELECT (1 << 1) -+#define JOIN_MASK_MOV (1 << 2) -+#define JOIN_MASK_TEX (1 << 3) -+ - static int --pass_join_values(struct nv_pc_pass *ctx, int iter) -+pass_join_values(struct nv_pc_pass *ctx, unsigned mask) - { - int c, n; - - for (n = 0; n < ctx->num_insns; ++n) { -- struct nv_instruction *i = ctx->insns[n]; -+ struct nv_instruction *nvi, *i = ctx->insns[n]; - - switch (i->opcode) { - case NV_OP_PHI: -- if (iter != 2) -+ if (!(mask & JOIN_MASK_PHI)) - break; -- for (c = 0; c < 4 && i->src[c]; ++c) -- try_join_values(ctx, i->def[0], i->src[c]->value); -+ for (c = 0; c < 5 && i->src[c]; ++c) -+ join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE); - break; - case NV_OP_MOV: -- if ((iter == 2) && i->src[0]->value->insn && -- !nv_is_vector_op(i->src[0]->value->join->insn->opcode)) -+ if (!(mask & JOIN_MASK_MOV)) -+ break; -+ nvi = i->src[0]->value->join->insn; -+ if (nvi && !nv_is_vector_op(nvi->opcode)) - try_join_values(ctx, i->def[0], i->src[0]->value); - break; - case NV_OP_SELECT: -- if (iter != 1) -+ if (!(mask & JOIN_MASK_SELECT)) - break; -- for (c = 0; c < 4 && i->src[c]; ++c) { -- assert(join_allowed(ctx, i->def[0], i->src[c]->value)); -- do_join_values(ctx, i->def[0], i->src[c]->value); -- } -+ for (c = 0; c < 5 && i->src[c]; ++c) -+ join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE); - break; - case NV_OP_TEX: - case NV_OP_TXB: - case NV_OP_TXL: - case NV_OP_TXQ: -- if (iter) -+ if (!(mask & JOIN_MASK_TEX)) - break; -- for (c = 0; c < 4; ++c) { -- if (!i->src[c]) -- break; -- do_join_values(ctx, i->def[c], i->src[c]->value); -- } -+ /* This should work without conflicts because we always generate -+ * extra MOVs for the sources of a TEX. -+ */ -+ for (c = 0; c < 4 && i->src[c]; ++c) -+ join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE); - break; - default: - break; -@@ -643,15 +708,15 @@ static void collect_live_values(struct nv_basic_block *b, const int n) - { - int i; - -- if (b->out[0]) { -- if (b->out[1]) { /* what to do about back-edges ? */ -+ if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) { -+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { - for (i = 0; i < n; ++i) - b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; - } else { - memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); - } - } else -- if (b->out[1]) { -+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { - memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); - } else { - memset(b->live_set, 0, n * sizeof(uint32_t)); -@@ -770,8 +835,8 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval) - struct nv_value *elem; - - for (elem = list->prev; -- elem != list && elem->livei->bgn > nval->livei->bgn; -- elem = elem->prev); -+ elem != list && elem->livei->bgn > nval->livei->bgn; -+ elem = elem->prev); - /* now elem begins before or at the same time as val */ - - nval->prev = elem; -@@ -780,44 +845,49 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval) - elem->next = nval; - } - --static int --pass_linear_scan(struct nv_pc_pass *ctx, int iter) -+static void -+collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head, -+ boolean assigned_only) - { -- struct nv_instruction *i; -- struct register_set f, free; -+ struct nv_value *val; - int k, n; -- struct nv_value *cur, *val, *tmp[2]; -- struct nv_value active, inactive, handled, unhandled; - -- make_empty_list(&active); -- make_empty_list(&inactive); -- make_empty_list(&handled); -- make_empty_list(&unhandled); -- -- nv50_ctor_register_set(ctx->pc, &free); -+ make_empty_list(head); - -- /* joined values should have range = NULL and thus not be added; -- * also, fixed memory values won't be added because they're not -- * def'd, just used -- */ - for (n = 0; n < ctx->num_insns; ++n) { -- i = ctx->insns[n]; -+ struct nv_instruction *i = ctx->insns[n]; - -+ /* for joined values, only the representative will have livei != NULL */ - for (k = 0; k < 4; ++k) { - if (i->def[k] && i->def[k]->livei) -- insert_ordered_tail(&unhandled, i->def[k]); -- else -- if (0 && i->def[k]) -- debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n); -+ if (!assigned_only || i->def[k]->reg.id >= 0) -+ insert_ordered_tail(head, i->def[k]); - } - if (i->flags_def && i->flags_def->livei) -- insert_ordered_tail(&unhandled, i->flags_def); -+ if (!assigned_only || i->flags_def->reg.id >= 0) -+ insert_ordered_tail(head, i->flags_def); - } - -- for (val = unhandled.next; val != unhandled.prev; val = val->next) { -+ for (val = head->next; val != head->prev; val = val->next) { - assert(val->join == val); - assert(val->livei->bgn <= val->next->livei->bgn); - } -+} -+ -+static int -+pass_linear_scan(struct nv_pc_pass *ctx, int iter) -+{ -+ struct register_set f, free; -+ struct nv_value *cur, *val, *tmp[2]; -+ struct nv_value active, inactive, handled, unhandled; -+ -+ make_empty_list(&active); -+ make_empty_list(&inactive); -+ make_empty_list(&handled); -+ -+ nv50_ctor_register_set(ctx->pc, &free); -+ -+ collect_register_values(ctx, &unhandled, FALSE); - - foreach_s(cur, tmp[0], &unhandled) { - remove_from_list(cur); -@@ -854,13 +924,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) - reg_occupy(&f, val); - - if (cur->reg.id < 0) { -- boolean mem = FALSE; -- -- if (nv_is_vector_op(cur->insn->opcode)) -- mem = !reg_assign(&f, &cur->insn->def[0], 4); -- else -- if (iter) -- mem = !reg_assign(&f, &cur, 1); -+ boolean mem = !reg_assign(&f, &cur, 1); - - if (mem) { - NOUVEAU_ERR("out of registers\n"); -@@ -874,6 +938,67 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) - return 0; - } - -+/* Allocate values defined by instructions such as TEX, which have to be -+ * assigned to consecutive registers. -+ * Linear scan doesn't really work here since the values can have different -+ * live intervals. -+ */ -+static int -+pass_allocate_constrained_values(struct nv_pc_pass *ctx) -+{ -+ struct nv_value regvals, *val; -+ struct nv_instruction *i; -+ struct nv_value *defs[4]; -+ struct register_set regs[4]; -+ int n, vsize, c; -+ uint32_t mask; -+ boolean mem; -+ -+ collect_register_values(ctx, ®vals, TRUE); -+ -+ for (n = 0; n < ctx->num_insns; ++n) { -+ i = ctx->insns[n]; -+ vsize = nvi_vector_size(i); -+ if (!(vsize > 1)) -+ continue; -+ assert(vsize <= 4); -+ for (c = 0; c < vsize; ++c) -+ defs[c] = i->def[c]->join; -+ -+ if (defs[0]->reg.id >= 0) { -+ for (c = 1; c < vsize; ++c) -+ assert(defs[c]->reg.id >= 0); -+ continue; -+ } -+ -+ for (c = 0; c < vsize; ++c) { -+ nv50_ctor_register_set(ctx->pc, ®s[c]); -+ -+ foreach(val, ®vals) { -+ if (val->reg.id >= 0 && livei_have_overlap(val, defs[c])) -+ reg_occupy(®s[c], val); -+ } -+ mask = 0x11111111; -+ if (vsize == 2) /* granularity is 2 and not 4 */ -+ mask |= 0x11111111 << 2; -+ mask_register_set(®s[c], 0, mask << c); -+ -+ if (defs[c]->livei) -+ insert_ordered_tail(®vals, defs[c]); -+ } -+ for (c = 1; c < vsize; ++c) -+ intersect_register_sets(®s[0], ®s[0], ®s[c]); -+ -+ mem = !reg_assign(®s[0], &defs[0], vsize); -+ -+ if (mem) { -+ NOUVEAU_ERR("out of registers\n"); -+ abort(); -+ } -+ } -+ return 0; -+} -+ - static int - nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) - { -@@ -923,16 +1048,16 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) - livei_print(&pc->values[i]); - #endif - -- ret = pass_join_values(ctx, 0); -+ ret = pass_join_values(ctx, JOIN_MASK_PHI); - if (ret) - goto out; -- ret = pass_linear_scan(ctx, 0); -+ ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_TEX); - if (ret) - goto out; -- ret = pass_join_values(ctx, 1); -+ ret = pass_join_values(ctx, JOIN_MASK_MOV); - if (ret) - goto out; -- ret = pass_join_values(ctx, 2); -+ ret = pass_allocate_constrained_values(ctx); - if (ret) - goto out; - ret = pass_linear_scan(ctx, 1); --- -cgit v0.8.3-6-g21f6 |