Re: [PATCH v2 07/67] fscache: Implement a hash function

From: David Howells
Date: Fri Dec 10 2021 - 09:41:52 EST


Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:

> But as mentioned for the other patches, you should then also be a lot
> more careful about always using the end result as an 'unsigned int'
> (or maybe 'u32') too, and when comparing hashes for binary search or
> other, you should always do th4e compare in some stable format.
>
> Because doing
>
> return (long)hash_a - (long)hash_b;
>
> and looking at the sign doesn't actually result in a stable ordering
> on 32-bit architectures. You don't get a transitive ordering (ie a < b
> and b < c doesn't imply a < c).
>
> And presumably if the hashes are meaningful across machines, then hash
> comparisons should also be meaningful across machines.
>
> So when comparing hashes, you need to compare them either in a truly
> bigger signed type (and make sure that doesn't get truncated) - kind
> of like how a lot of 'memcmp()' functions do 'unsigned char'
> subtractions in an 'int' - or you need to compare them _as_ 'unsigned
> int'.
>
> Otherwise the comparisons will be all kinds of messed up.

I don't think it should matter in this case, as the in-memory hash tables are
an independent of what's on disk (and not even necessarily the same size).
They're only used for duplicate detection. The idea was to be able to shorten
the scanning of a hash bucket by half on average, but I didn't make it do
that. It's just that I use the same hash value as a quick check first.

However, since the comparator functions are only used to see if they're the
same or different, the attached change makes them return bool instead, no
cast or subtraction required.

David
---
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 65cf2ae22a70..ca36b598d6b5 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -289,17 +289,15 @@ static int fscache_set_key(struct fscache_cookie *cookie,
return 0;
}

-static long fscache_compare_cookie(const struct fscache_cookie *a,
- const struct fscache_cookie *b)
+static bool fscache_cookie_same(const struct fscache_cookie *a,
+ const struct fscache_cookie *b)
{
const void *ka, *kb;

- if (a->key_hash != b->key_hash)
- return (long)a->key_hash - (long)b->key_hash;
- if (a->volume != b->volume)
- return (long)a->volume - (long)b->volume;
- if (a->key_len != b->key_len)
- return (long)a->key_len - (long)b->key_len;
+ if (a->key_hash != b->key_hash ||
+ a->volume != b->volume ||
+ a->key_len != b->key_len)
+ return false;

if (a->key_len <= sizeof(a->inline_key)) {
ka = &a->inline_key;
@@ -308,7 +306,7 @@ static long fscache_compare_cookie(const struct fscache_cookie *a,
ka = a->key;
kb = b->key;
}
- return memcmp(ka, kb, a->key_len);
+ return memcmp(ka, kb, a->key_len) == 0;
}

static atomic_t fscache_cookie_debug_id = ATOMIC_INIT(1);
@@ -399,7 +397,7 @@ static bool fscache_hash_cookie(struct fscache_cookie *candidate)

hlist_bl_lock(h);
hlist_bl_for_each_entry(cursor, p, h, hash_link) {
- if (fscache_compare_cookie(candidate, cursor) == 0) {
+ if (fscache_cookie_same(candidate, cursor)) {
if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cursor->flags))
goto collision;
wait_for = fscache_get_cookie(cursor,
diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c
index 26a6b8f315e1..0e80fd8fd14a 100644
--- a/fs/fscache/volume.c
+++ b/fs/fscache/volume.c
@@ -119,20 +119,18 @@ void fscache_end_volume_access(struct fscache_volume *volume,
}
EXPORT_SYMBOL(fscache_end_volume_access);

-static long fscache_compare_volume(const struct fscache_volume *a,
- const struct fscache_volume *b)
+static bool fscache_volume_same(const struct fscache_volume *a,
+ const struct fscache_volume *b)
{
size_t klen;

- if (a->key_hash != b->key_hash)
- return (long)a->key_hash - (long)b->key_hash;
- if (a->cache != b->cache)
- return (long)a->cache - (long)b->cache;
- if (a->key[0] != b->key[0])
- return (long)a->key[0] - (long)b->key[0];
+ if (a->key_hash != b->key_hash ||
+ a->cache != b->cache ||
+ a->key[0] != b->key[0])
+ return false;

klen = round_up(a->key[0] + 1, sizeof(__le32));
- return memcmp(a->key, b->key, klen);
+ return memcmp(a->key, b->key, klen) == 0;
}

static bool fscache_is_acquire_pending(struct fscache_volume *volume)
@@ -170,7 +168,7 @@ static bool fscache_hash_volume(struct fscache_volume *candidate)

hlist_bl_lock(h);
hlist_bl_for_each_entry(cursor, p, h, hash_link) {
- if (fscache_compare_volume(candidate, cursor) == 0) {
+ if (fscache_volume_same(candidate, cursor)) {
if (!test_bit(FSCACHE_VOLUME_RELINQUISHED, &cursor->flags))
goto collision;
fscache_see_volume(cursor, fscache_volume_get_hash_collision);
@@ -335,7 +333,7 @@ static void fscache_wake_pending_volume(struct fscache_volume *volume,
struct hlist_bl_node *p;

hlist_bl_for_each_entry(cursor, p, h, hash_link) {
- if (fscache_compare_volume(cursor, volume) == 0) {
+ if (fscache_volume_same(cursor, volume)) {
fscache_see_volume(cursor, fscache_volume_see_hash_wake);
clear_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &cursor->flags);
wake_up_bit(&cursor->flags, FSCACHE_VOLUME_ACQUIRE_PENDING);