summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan McGee <dan@archlinux.org>2012-01-07 11:25:04 -0600
committerDan McGee <dan@archlinux.org>2012-01-07 11:28:37 -0600
commit2d794cc031b634c55c77ec17b2b7864f7150ab34 (patch)
tree649fd32bac9dc93624dd5d64b7c7e148f77c8a74
parentb7ecff160ee541904e7b75c9ba88a455c9502409 (diff)
Update sync database size estimation values
As the comment states, this is more like a dartboard than science. Signed-off-by: Dan McGee <dan@archlinux.org>
-rw-r--r--lib/libalpm/be_sync.c49
1 files changed, 13 insertions, 36 deletions
diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
index 76c31f5a..b16271bc 100644
--- a/lib/libalpm/be_sync.c
+++ b/lib/libalpm/be_sync.c
@@ -351,61 +351,38 @@ static alpm_pkg_t *load_pkg_for_entry(alpm_db_t *db, const char *entryname,
return pkg;
}
-/*
- * This is the data table used to generate the estimating function below.
- * "Weighted Avg" means averaging the bottom table values; thus each repo, big
- * or small, will have equal influence. "Unweighted Avg" means averaging the
- * sums of the top table columns, thus each package has equal influence. The
- * final values are calculated by (surprise) averaging the averages, because
- * why the hell not.
- *
- * Database Pkgs tar bz2 gz xz
- * community 2096 5294080 256391 421227 301296
- * core 180 460800 25257 36850 29356
- * extra 2606 6635520 294647 470818 339392
- * multilib 126 327680 16120 23261 18732
- * testing 76 204800 10902 14348 12100
- *
- * Bytes Per Package
- * community 2096 2525.80 122.32 200.97 143.75
- * core 180 2560.00 140.32 204.72 163.09
- * extra 2606 2546.25 113.06 180.67 130.23
- * multilib 126 2600.63 127.94 184.61 148.67
- * testing 76 2694.74 143.45 188.79 159.21
-
- * Weighted Avg 2585.48 129.42 191.95 148.99
- * Unweighted Avg 2543.39 118.74 190.16 137.93
- * Average of Avgs 2564.44 124.08 191.06 143.46
- */
+/* This function doesn't work as well as one might think, as size of database
+ * entries varies considerably. Adding signatures nearly doubles the size of a
+ * single entry; deltas also can make for large variations in size. These
+ * current values are heavily influenced by Arch Linux; databases with no
+ * deltas and a single signature per package. */
static size_t estimate_package_count(struct stat *st, struct archive *archive)
{
- unsigned int per_package;
+ int per_package;
switch(archive_compression(archive)) {
case ARCHIVE_COMPRESSION_NONE:
- per_package = 2564;
+ per_package = 3015;
break;
case ARCHIVE_COMPRESSION_GZIP:
- per_package = 191;
+ case ARCHIVE_COMPRESSION_COMPRESS:
+ per_package = 464;
break;
case ARCHIVE_COMPRESSION_BZIP2:
- per_package = 124;
- break;
- case ARCHIVE_COMPRESSION_COMPRESS:
- per_package = 193;
+ per_package = 394;
break;
case ARCHIVE_COMPRESSION_LZMA:
case ARCHIVE_COMPRESSION_XZ:
- per_package = 143;
+ per_package = 400;
break;
#ifdef ARCHIVE_COMPRESSION_UU
case ARCHIVE_COMPRESSION_UU:
- per_package = 3543;
+ per_package = 3015 * 4 / 3;
break;
#endif
default:
/* assume it is at least somewhat compressed */
- per_package = 200;
+ per_package = 500;
}
return (size_t)((st->st_size / per_package) + 1);
}