[PATCH v7 1/8] unicode: Add utf8_casefold_iter

From: Daniel Rosenberg
Date: Fri Feb 07 2020 - 20:36:03 EST


This function will allow other uses of unicode to act upon a casefolded
string without needing to allocate their own copy of one.

The actor function can return an nonzero value to exit early.

Signed-off-by: Daniel Rosenberg <drosen@xxxxxxxxxx>
---
fs/unicode/utf8-core.c | 25 ++++++++++++++++++++++++-
include/linux/unicode.h | 10 ++++++++++
2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 2a878b739115d..db050bf59a32b 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -122,9 +122,32 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
}
return -EINVAL;
}
-
EXPORT_SYMBOL(utf8_casefold);

+int utf8_casefold_iter(const struct unicode_map *um, const struct qstr *str,
+ struct utf8_itr_context *ctx)
+{
+ const struct utf8data *data = utf8nfdicf(um->version);
+ struct utf8cursor cur;
+ int c;
+ int res = 0;
+ int pos = 0;
+
+ if (utf8ncursor(&cur, data, str->name, str->len) < 0)
+ return -EINVAL;
+
+ while ((c = utf8byte(&cur))) {
+ if (c < 0)
+ return c;
+ res = ctx->actor(ctx, c, pos);
+ pos++;
+ if (res)
+ return res;
+ }
+ return res;
+}
+EXPORT_SYMBOL(utf8_casefold_iter);
+
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 990aa97d80496..2ae12f8710ae2 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -10,6 +10,13 @@ struct unicode_map {
int version;
};

+struct utf8_itr_context;
+typedef int (*utf8_itr_actor_t)(struct utf8_itr_context *, int byte, int pos);
+
+struct utf8_itr_context {
+ utf8_itr_actor_t actor;
+};
+
int utf8_validate(const struct unicode_map *um, const struct qstr *str);

int utf8_strncmp(const struct unicode_map *um,
@@ -27,6 +34,9 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen);

+int utf8_casefold_iter(const struct unicode_map *um, const struct qstr *str,
+ struct utf8_itr_context *ctx);
+
struct unicode_map *utf8_load(const char *version);
void utf8_unload(struct unicode_map *um);

--
2.25.0.341.g760bfbb309-goog