|  | /* | 
|  | * QEMU Xen emulation: The actual implementation of XenStore | 
|  | * | 
|  | * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. | 
|  | * | 
|  | * Authors: David Woodhouse <dwmw2@infradead.org>, Paul Durrant <paul@xen.org> | 
|  | * | 
|  | * This work is licensed under the terms of the GNU GPL, version 2 or later. | 
|  | * See the COPYING file in the top-level directory. | 
|  | */ | 
|  |  | 
|  | #include "qemu/osdep.h" | 
|  | #include "qom/object.h" | 
|  |  | 
|  | #include "hw/xen/xen.h" | 
|  |  | 
|  | #include "xen_xenstore.h" | 
|  | #include "xenstore_impl.h" | 
|  |  | 
|  | #include "hw/xen/interface/io/xs_wire.h" | 
|  |  | 
|  | #define XS_MAX_WATCHES          128 | 
|  | #define XS_MAX_DOMAIN_NODES     1000 | 
|  | #define XS_MAX_NODE_SIZE        2048 | 
|  | #define XS_MAX_TRANSACTIONS     10 | 
|  | #define XS_MAX_PERMS_PER_NODE   5 | 
|  |  | 
|  | #define XS_VALID_CHARS "abcdefghijklmnopqrstuvwxyz" \ | 
|  | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ | 
|  | "0123456789-/_" | 
|  |  | 
|  | typedef struct XsNode { | 
|  | uint32_t ref; | 
|  | GByteArray *content; | 
|  | GList *perms; | 
|  | GHashTable *children; | 
|  | uint64_t gencnt; | 
|  | bool deleted_in_tx; | 
|  | bool modified_in_tx; | 
|  | unsigned int serialized_tx; | 
|  | #ifdef XS_NODE_UNIT_TEST | 
|  | gchar *name; /* debug only */ | 
|  | #endif | 
|  | } XsNode; | 
|  |  | 
|  | typedef struct XsWatch { | 
|  | struct XsWatch *next; | 
|  | xs_impl_watch_fn *cb; | 
|  | void *cb_opaque; | 
|  | char *token; | 
|  | unsigned int dom_id; | 
|  | int rel_prefix; | 
|  | } XsWatch; | 
|  |  | 
|  | typedef struct XsTransaction { | 
|  | XsNode *root; | 
|  | unsigned int nr_nodes; | 
|  | unsigned int base_tx; | 
|  | unsigned int tx_id; | 
|  | unsigned int dom_id; | 
|  | } XsTransaction; | 
|  |  | 
|  | struct XenstoreImplState { | 
|  | XsNode *root; | 
|  | unsigned int nr_nodes; | 
|  | GHashTable *watches; | 
|  | unsigned int nr_domu_watches; | 
|  | GHashTable *transactions; | 
|  | unsigned int nr_domu_transactions; | 
|  | unsigned int root_tx; | 
|  | unsigned int last_tx; | 
|  | bool serialized; | 
|  | }; | 
|  |  | 
|  |  | 
|  | static void nobble_tx(gpointer key, gpointer value, gpointer user_data) | 
|  | { | 
|  | unsigned int *new_tx_id = user_data; | 
|  | XsTransaction *tx = value; | 
|  |  | 
|  | if (tx->base_tx == *new_tx_id) { | 
|  | /* Transactions based on XBT_NULL will always fail */ | 
|  | tx->base_tx = XBT_NULL; | 
|  | } | 
|  | } | 
|  |  | 
|  | static inline unsigned int next_tx(struct XenstoreImplState *s) | 
|  | { | 
|  | unsigned int tx_id; | 
|  |  | 
|  | /* Find the next TX id which isn't either XBT_NULL or in use. */ | 
|  | do { | 
|  | tx_id = ++s->last_tx; | 
|  | } while (tx_id == XBT_NULL || tx_id == s->root_tx || | 
|  | g_hash_table_lookup(s->transactions, GINT_TO_POINTER(tx_id))); | 
|  |  | 
|  | /* | 
|  | * It is vanishingly unlikely, but ensure that no outstanding transaction | 
|  | * is based on the (previous incarnation of the) newly-allocated TX id. | 
|  | */ | 
|  | g_hash_table_foreach(s->transactions, nobble_tx, &tx_id); | 
|  |  | 
|  | return tx_id; | 
|  | } | 
|  |  | 
|  | static inline XsNode *xs_node_new(void) | 
|  | { | 
|  | XsNode *n = g_new0(XsNode, 1); | 
|  | n->ref = 1; | 
|  |  | 
|  | #ifdef XS_NODE_UNIT_TEST | 
|  | nr_xs_nodes++; | 
|  | xs_node_list = g_list_prepend(xs_node_list, n); | 
|  | #endif | 
|  | return n; | 
|  | } | 
|  |  | 
|  | static inline XsNode *xs_node_ref(XsNode *n) | 
|  | { | 
|  | /* With just 10 transactions, it can never get anywhere near this. */ | 
|  | g_assert(n->ref < INT_MAX); | 
|  |  | 
|  | g_assert(n->ref); | 
|  | n->ref++; | 
|  | return n; | 
|  | } | 
|  |  | 
|  | static inline void xs_node_unref(XsNode *n) | 
|  | { | 
|  | if (!n) { | 
|  | return; | 
|  | } | 
|  | g_assert(n->ref); | 
|  | if (--n->ref) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (n->content) { | 
|  | g_byte_array_unref(n->content); | 
|  | } | 
|  | if (n->perms) { | 
|  | g_list_free_full(n->perms, g_free); | 
|  | } | 
|  | if (n->children) { | 
|  | g_hash_table_unref(n->children); | 
|  | } | 
|  | #ifdef XS_NODE_UNIT_TEST | 
|  | g_free(n->name); | 
|  | nr_xs_nodes--; | 
|  | xs_node_list = g_list_remove(xs_node_list, n); | 
|  | #endif | 
|  | g_free(n); | 
|  | } | 
|  |  | 
|  | char *xs_perm_as_string(unsigned int perm, unsigned int domid) | 
|  | { | 
|  | char letter; | 
|  |  | 
|  | switch (perm) { | 
|  | case XS_PERM_READ | XS_PERM_WRITE: | 
|  | letter = 'b'; | 
|  | break; | 
|  | case XS_PERM_READ: | 
|  | letter = 'r'; | 
|  | break; | 
|  | case XS_PERM_WRITE: | 
|  | letter = 'w'; | 
|  | break; | 
|  | case XS_PERM_NONE: | 
|  | default: | 
|  | letter = 'n'; | 
|  | break; | 
|  | } | 
|  |  | 
|  | return g_strdup_printf("%c%u", letter, domid); | 
|  | } | 
|  |  | 
|  | static gpointer do_perm_copy(gconstpointer src, gpointer user_data) | 
|  | { | 
|  | return g_strdup(src); | 
|  | } | 
|  |  | 
|  | static XsNode *xs_node_create(const char *name, GList *perms) | 
|  | { | 
|  | XsNode *n = xs_node_new(); | 
|  |  | 
|  | #ifdef XS_NODE_UNIT_TEST | 
|  | if (name) { | 
|  | n->name = g_strdup(name); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | n->perms = g_list_copy_deep(perms, do_perm_copy, NULL); | 
|  |  | 
|  | return n; | 
|  | } | 
|  |  | 
|  | /* For copying from one hash table to another using g_hash_table_foreach() */ | 
|  | static void do_child_insert(gpointer key, gpointer value, gpointer user_data) | 
|  | { | 
|  | g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value)); | 
|  | } | 
|  |  | 
|  | static XsNode *xs_node_copy(XsNode *old) | 
|  | { | 
|  | XsNode *n = xs_node_new(); | 
|  |  | 
|  | n->gencnt = old->gencnt; | 
|  |  | 
|  | #ifdef XS_NODE_UNIT_TEST | 
|  | if (n->name) { | 
|  | n->name = g_strdup(old->name); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | assert(old); | 
|  | if (old->children) { | 
|  | n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, | 
|  | (GDestroyNotify)xs_node_unref); | 
|  | g_hash_table_foreach(old->children, do_child_insert, n->children); | 
|  | } | 
|  | if (old->perms) { | 
|  | n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); | 
|  | } | 
|  | if (old->content) { | 
|  | n->content = g_byte_array_ref(old->content); | 
|  | } | 
|  | return n; | 
|  | } | 
|  |  | 
|  | /* Returns true if it made a change to the hash table */ | 
|  | static bool xs_node_add_child(XsNode *n, const char *path_elem, XsNode *child) | 
|  | { | 
|  | assert(!strchr(path_elem, '/')); | 
|  |  | 
|  | if (!child) { | 
|  | assert(n->children); | 
|  | return g_hash_table_remove(n->children, path_elem); | 
|  | } | 
|  |  | 
|  | #ifdef XS_NODE_UNIT_TEST | 
|  | g_free(child->name); | 
|  | child->name = g_strdup(path_elem); | 
|  | #endif | 
|  | if (!n->children) { | 
|  | n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, | 
|  | (GDestroyNotify)xs_node_unref); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * The documentation for g_hash_table_insert() says that it "returns a | 
|  | * boolean value to indicate whether the newly added value was already | 
|  | * in the hash table or not." | 
|  | * | 
|  | * It could perhaps be clearer that returning TRUE means it wasn't, | 
|  | */ | 
|  | return g_hash_table_insert(n->children, g_strdup(path_elem), child); | 
|  | } | 
|  |  | 
|  | struct walk_op { | 
|  | struct XenstoreImplState *s; | 
|  | char path[XENSTORE_ABS_PATH_MAX + 2]; /* Two NUL terminators */ | 
|  | int (*op_fn)(XsNode **n, struct walk_op *op); | 
|  | void *op_opaque; | 
|  | void *op_opaque2; | 
|  |  | 
|  | GList *watches; | 
|  | unsigned int dom_id; | 
|  | unsigned int tx_id; | 
|  |  | 
|  | /* The number of nodes which will exist in the tree if this op succeeds. */ | 
|  | unsigned int new_nr_nodes; | 
|  |  | 
|  | /* | 
|  | * This is maintained on the way *down* the walk to indicate | 
|  | * whether nodes can be modified in place or whether COW is | 
|  | * required. It starts off being true, as we're always going to | 
|  | * replace the root node. If we walk into a shared subtree it | 
|  | * becomes false. If we start *creating* new nodes for a write, | 
|  | * it becomes true again. | 
|  | * | 
|  | * Do not use it on the way back up. | 
|  | */ | 
|  | bool inplace; | 
|  | bool mutating; | 
|  | bool create_dirs; | 
|  | bool in_transaction; | 
|  |  | 
|  | /* Tracking during recursion so we know which is first. */ | 
|  | bool deleted_in_tx; | 
|  | }; | 
|  |  | 
|  | static void fire_watches(struct walk_op *op, bool parents) | 
|  | { | 
|  | GList *l = NULL; | 
|  | XsWatch *w; | 
|  |  | 
|  | if (!op->mutating || op->in_transaction) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (parents) { | 
|  | l = op->watches; | 
|  | } | 
|  |  | 
|  | w = g_hash_table_lookup(op->s->watches, op->path); | 
|  | while (w || l) { | 
|  | if (!w) { | 
|  | /* Fire the parent nodes from 'op' if asked to */ | 
|  | w = l->data; | 
|  | l = l->next; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | assert(strlen(op->path) > w->rel_prefix); | 
|  | w->cb(w->cb_opaque, op->path + w->rel_prefix, w->token); | 
|  |  | 
|  | w = w->next; | 
|  | } | 
|  | } | 
|  |  | 
|  | static int xs_node_add_content(XsNode **n, struct walk_op *op) | 
|  | { | 
|  | GByteArray *data = op->op_opaque; | 
|  |  | 
|  | if (op->dom_id) { | 
|  | /* | 
|  | * The real XenStored includes permissions and names of child nodes | 
|  | * in the calculated datasize but life's too short. For a single | 
|  | * tenant internal XenStore, we don't have to be quite as pedantic. | 
|  | */ | 
|  | if (data->len > XS_MAX_NODE_SIZE) { | 
|  | return E2BIG; | 
|  | } | 
|  | } | 
|  | /* We *are* the node to be written. Either this or a copy. */ | 
|  | if (!op->inplace) { | 
|  | XsNode *old = *n; | 
|  | *n = xs_node_copy(old); | 
|  | xs_node_unref(old); | 
|  | } | 
|  |  | 
|  | if ((*n)->content) { | 
|  | g_byte_array_unref((*n)->content); | 
|  | } | 
|  | (*n)->content = g_byte_array_ref(data); | 
|  | if (op->tx_id != XBT_NULL) { | 
|  | (*n)->modified_in_tx = true; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int xs_node_get_content(XsNode **n, struct walk_op *op) | 
|  | { | 
|  | GByteArray *data = op->op_opaque; | 
|  | GByteArray *node_data; | 
|  |  | 
|  | assert(op->inplace); | 
|  | assert(*n); | 
|  |  | 
|  | node_data = (*n)->content; | 
|  | if (node_data) { | 
|  | g_byte_array_append(data, node_data->data, node_data->len); | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data) | 
|  | { | 
|  | struct walk_op *op = user_data; | 
|  | int path_len = strlen(op->path); | 
|  | int key_len = strlen(key); | 
|  | XsNode *n = value; | 
|  | bool this_inplace = op->inplace; | 
|  |  | 
|  | if (n->ref != 1) { | 
|  | op->inplace = 0; | 
|  | } | 
|  |  | 
|  | assert(key_len + path_len + 2 <= sizeof(op->path)); | 
|  | op->path[path_len] = '/'; | 
|  | memcpy(op->path + path_len + 1, key, key_len + 1); | 
|  |  | 
|  | if (n->children) { | 
|  | g_hash_table_foreach_remove(n->children, node_rm_recurse, op); | 
|  | } | 
|  | op->new_nr_nodes--; | 
|  |  | 
|  | /* | 
|  | * Fire watches on *this* node but not the parents because they are | 
|  | * going to be deleted too, so the watch will fire for them anyway. | 
|  | */ | 
|  | fire_watches(op, false); | 
|  | op->path[path_len] = '\0'; | 
|  |  | 
|  | /* | 
|  | * Actually deleting the child here is just an optimisation; if we | 
|  | * don't then the final unref on the topmost victim will just have | 
|  | * to cascade down again repeating all the g_hash_table_foreach() | 
|  | * calls. | 
|  | */ | 
|  | return this_inplace; | 
|  | } | 
|  |  | 
|  | static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op); | 
|  | static void copy_deleted_recurse(gpointer key, gpointer value, | 
|  | gpointer user_data) | 
|  | { | 
|  | struct walk_op *op = user_data; | 
|  | GHashTable *siblings = op->op_opaque2; | 
|  | XsNode *n = xs_node_copy_deleted(value, op); | 
|  |  | 
|  | /* | 
|  | * Reinsert the deleted_in_tx copy of the node into the parent's | 
|  | * 'children' hash table. Having stashed it from op->op_opaque2 | 
|  | * before the recursive call to xs_node_copy_deleted() scribbled | 
|  | * over it. | 
|  | */ | 
|  | g_hash_table_insert(siblings, g_strdup(key), n); | 
|  | } | 
|  |  | 
|  | static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op) | 
|  | { | 
|  | XsNode *n = xs_node_new(); | 
|  |  | 
|  | n->gencnt = old->gencnt; | 
|  |  | 
|  | #ifdef XS_NODE_UNIT_TEST | 
|  | if (old->name) { | 
|  | n->name = g_strdup(old->name); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | if (old->children) { | 
|  | n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, | 
|  | (GDestroyNotify)xs_node_unref); | 
|  | op->op_opaque2 = n->children; | 
|  | g_hash_table_foreach(old->children, copy_deleted_recurse, op); | 
|  | } | 
|  | if (old->perms) { | 
|  | n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); | 
|  | } | 
|  | n->deleted_in_tx = true; | 
|  | /* If it gets resurrected we only fire a watch if it lost its content */ | 
|  | if (old->content) { | 
|  | n->modified_in_tx = true; | 
|  | } | 
|  | op->new_nr_nodes--; | 
|  | return n; | 
|  | } | 
|  |  | 
|  | static int xs_node_rm(XsNode **n, struct walk_op *op) | 
|  | { | 
|  | bool this_inplace = op->inplace; | 
|  |  | 
|  | if (op->tx_id != XBT_NULL) { | 
|  | /* It's not trivial to do inplace handling for this one */ | 
|  | XsNode *old = *n; | 
|  | *n = xs_node_copy_deleted(old, op); | 
|  | xs_node_unref(old); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Fire watches for, and count, nodes in the subtree which get deleted */ | 
|  | if ((*n)->children) { | 
|  | g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op); | 
|  | } | 
|  | op->new_nr_nodes--; | 
|  |  | 
|  | if (this_inplace) { | 
|  | xs_node_unref(*n); | 
|  | } | 
|  | *n = NULL; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int xs_node_get_perms(XsNode **n, struct walk_op *op) | 
|  | { | 
|  | GList **perms = op->op_opaque; | 
|  |  | 
|  | assert(op->inplace); | 
|  | assert(*n); | 
|  |  | 
|  | *perms = g_list_copy_deep((*n)->perms, do_perm_copy, NULL); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void parse_perm(const char *perm, char *letter, unsigned int *dom_id) | 
|  | { | 
|  | unsigned int n = sscanf(perm, "%c%u", letter, dom_id); | 
|  |  | 
|  | assert(n == 2); | 
|  | } | 
|  |  | 
|  | static bool can_access(unsigned int dom_id, GList *perms, const char *letters) | 
|  | { | 
|  | unsigned int i, n; | 
|  | char perm_letter; | 
|  | unsigned int perm_dom_id; | 
|  | bool access; | 
|  |  | 
|  | if (dom_id == 0) { | 
|  | return true; | 
|  | } | 
|  |  | 
|  | n = g_list_length(perms); | 
|  | assert(n >= 1); | 
|  |  | 
|  | /* | 
|  | * The dom_id of the first perm is the owner, and the owner always has | 
|  | * read-write access. | 
|  | */ | 
|  | parse_perm(g_list_nth_data(perms, 0), &perm_letter, &perm_dom_id); | 
|  | if (dom_id == perm_dom_id) { | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * The letter of the first perm specified the default access for all other | 
|  | * domains. | 
|  | */ | 
|  | access = !!strchr(letters, perm_letter); | 
|  | for (i = 1; i < n; i++) { | 
|  | parse_perm(g_list_nth_data(perms, i), &perm_letter, &perm_dom_id); | 
|  | if (dom_id != perm_dom_id) { | 
|  | continue; | 
|  | } | 
|  | access = !!strchr(letters, perm_letter); | 
|  | } | 
|  |  | 
|  | return access; | 
|  | } | 
|  |  | 
|  | static int xs_node_set_perms(XsNode **n, struct walk_op *op) | 
|  | { | 
|  | GList *perms = op->op_opaque; | 
|  |  | 
|  | if (op->dom_id) { | 
|  | unsigned int perm_dom_id; | 
|  | char perm_letter; | 
|  |  | 
|  | /* A guest may not change permissions on nodes it does not own */ | 
|  | if (!can_access(op->dom_id, (*n)->perms, "")) { | 
|  | return EPERM; | 
|  | } | 
|  |  | 
|  | /* A guest may not change the owner of a node it owns. */ | 
|  | parse_perm(perms->data, &perm_letter, &perm_dom_id); | 
|  | if (perm_dom_id != op->dom_id) { | 
|  | return EPERM; | 
|  | } | 
|  |  | 
|  | if (g_list_length(perms) > XS_MAX_PERMS_PER_NODE) { | 
|  | return ENOSPC; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* We *are* the node to be written. Either this or a copy. */ | 
|  | if (!op->inplace) { | 
|  | XsNode *old = *n; | 
|  | *n = xs_node_copy(old); | 
|  | xs_node_unref(old); | 
|  | } | 
|  |  | 
|  | if ((*n)->perms) { | 
|  | g_list_free_full((*n)->perms, g_free); | 
|  | } | 
|  | (*n)->perms = g_list_copy_deep(perms, do_perm_copy, NULL); | 
|  | if (op->tx_id != XBT_NULL) { | 
|  | (*n)->modified_in_tx = true; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Passed a full reference in *n which it may free if it needs to COW. | 
|  | * | 
|  | * When changing the tree, the op->inplace flag indicates whether this | 
|  | * node may be modified in place (i.e. it and all its parents had a | 
|  | * refcount of one). If walking down the tree we find a node whose | 
|  | * refcount is higher, we must clear op->inplace and COW from there | 
|  | * down. Unless we are creating new nodes as scaffolding for a write | 
|  | * (which works like 'mkdir -p' does). In which case those newly | 
|  | * created nodes can (and must) be modified in place again. | 
|  | */ | 
|  | static int xs_node_walk(XsNode **n, struct walk_op *op) | 
|  | { | 
|  | char *child_name = NULL; | 
|  | size_t namelen; | 
|  | XsNode *old = *n, *child = NULL; | 
|  | bool stole_child = false; | 
|  | bool this_inplace; | 
|  | XsWatch *watch; | 
|  | int err; | 
|  |  | 
|  | namelen = strlen(op->path); | 
|  | watch = g_hash_table_lookup(op->s->watches, op->path); | 
|  |  | 
|  | /* Is there a child, or do we hit the double-NUL termination? */ | 
|  | if (op->path[namelen + 1]) { | 
|  | char *slash; | 
|  | child_name = op->path + namelen + 1; | 
|  | slash = strchr(child_name, '/'); | 
|  | if (slash) { | 
|  | *slash = '\0'; | 
|  | } | 
|  | op->path[namelen] = '/'; | 
|  | } | 
|  |  | 
|  | /* If we walk into a subtree which is shared, we must COW */ | 
|  | if (op->mutating && old->ref != 1) { | 
|  | op->inplace = false; | 
|  | } | 
|  |  | 
|  | if (!child_name) { | 
|  | const char *letters = op->mutating ? "wb" : "rb"; | 
|  |  | 
|  | if (!can_access(op->dom_id, old->perms, letters)) { | 
|  | err = EACCES; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* This is the actual node on which the operation shall be performed */ | 
|  | err = op->op_fn(n, op); | 
|  | if (!err) { | 
|  | fire_watches(op, true); | 
|  | } | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* op->inplace will be further modified during the recursion */ | 
|  | this_inplace = op->inplace; | 
|  |  | 
|  | if (old && old->children) { | 
|  | child = g_hash_table_lookup(old->children, child_name); | 
|  | /* This is a *weak* reference to 'child', owned by the hash table */ | 
|  | } | 
|  |  | 
|  | if (child) { | 
|  | if (child->deleted_in_tx) { | 
|  | assert(child->ref == 1); | 
|  | /* Cannot actually set child->deleted_in_tx = false until later */ | 
|  | } | 
|  | xs_node_ref(child); | 
|  | /* | 
|  | * Now we own it too. But if we can modify inplace, that's going to | 
|  | * foil the check and force it to COW. We want to be the *only* owner | 
|  | * so that it can be modified in place, so remove it from the hash | 
|  | * table in that case. We'll add it (or its replacement) back later. | 
|  | */ | 
|  | if (op->mutating && this_inplace) { | 
|  | g_hash_table_remove(old->children, child_name); | 
|  | stole_child = true; | 
|  | } | 
|  | } else if (op->create_dirs) { | 
|  | assert(op->mutating); | 
|  |  | 
|  | if (!can_access(op->dom_id, old->perms, "wb")) { | 
|  | err = EACCES; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | if (op->dom_id && op->new_nr_nodes >= XS_MAX_DOMAIN_NODES) { | 
|  | err = ENOSPC; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | child = xs_node_create(child_name, old->perms); | 
|  | op->new_nr_nodes++; | 
|  |  | 
|  | /* | 
|  | * If we're creating a new child, we can clearly modify it (and its | 
|  | * children) in place from here on down. | 
|  | */ | 
|  | op->inplace = true; | 
|  | } else { | 
|  | err = ENOENT; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * If there's a watch on this node, add it to the list to be fired | 
|  | * (with the correct full pathname for the modified node) at the end. | 
|  | */ | 
|  | if (watch) { | 
|  | op->watches = g_list_append(op->watches, watch); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Except for the temporary child-stealing as noted, our node has not | 
|  | * changed yet. We don't yet know the overall operation will complete. | 
|  | */ | 
|  | err = xs_node_walk(&child, op); | 
|  |  | 
|  | if (watch) { | 
|  | op->watches = g_list_remove(op->watches, watch); | 
|  | } | 
|  |  | 
|  | if (err || !op->mutating) { | 
|  | if (stole_child) { | 
|  | /* Put it back as it was. */ | 
|  | g_hash_table_replace(old->children, g_strdup(child_name), child); | 
|  | } else { | 
|  | xs_node_unref(child); | 
|  | } | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Now we know the operation has completed successfully and we're on | 
|  | * the way back up. Make the change, substituting 'child' in the | 
|  | * node at our level. | 
|  | */ | 
|  | if (!this_inplace) { | 
|  | *n = xs_node_copy(old); | 
|  | xs_node_unref(old); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * If we resurrected a deleted_in_tx node, we can mark it as no longer | 
|  | * deleted now that we know the overall operation has succeeded. | 
|  | */ | 
|  | if (op->create_dirs && child && child->deleted_in_tx) { | 
|  | op->new_nr_nodes++; | 
|  | child->deleted_in_tx = false; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * The child may be NULL here, for a remove operation. Either way, | 
|  | * xs_node_add_child() will do the right thing and return a value | 
|  | * indicating whether it changed the parent's hash table or not. | 
|  | * | 
|  | * We bump the parent gencnt if it adds a child that we *didn't* | 
|  | * steal from it in the first place, or if child==NULL and was | 
|  | * thus removed (whether we stole it earlier and didn't put it | 
|  | * back, or xs_node_add_child() actually removed it now). | 
|  | */ | 
|  | if ((xs_node_add_child(*n, child_name, child) && !stole_child) || !child) { | 
|  | (*n)->gencnt++; | 
|  | } | 
|  |  | 
|  | out: | 
|  | op->path[namelen] = '\0'; | 
|  | if (!namelen) { | 
|  | assert(!op->watches); | 
|  | /* | 
|  | * On completing the recursion back up the path walk and reaching the | 
|  | * top, assign the new node count if the operation was successful. If | 
|  | * the main tree was changed, bump its tx ID so that outstanding | 
|  | * transactions correctly fail. But don't bump it every time; only | 
|  | * if it makes a difference. | 
|  | */ | 
|  | if (!err && op->mutating) { | 
|  | if (!op->in_transaction) { | 
|  | if (op->s->root_tx != op->s->last_tx) { | 
|  | op->s->root_tx = next_tx(op->s); | 
|  | } | 
|  | op->s->nr_nodes = op->new_nr_nodes; | 
|  | } else { | 
|  | XsTransaction *tx = g_hash_table_lookup(op->s->transactions, | 
|  | GINT_TO_POINTER(op->tx_id)); | 
|  | assert(tx); | 
|  | tx->nr_nodes = op->new_nr_nodes; | 
|  | } | 
|  | } | 
|  | } | 
|  | return err; | 
|  | } | 
|  |  | 
|  | static void append_directory_item(gpointer key, gpointer value, | 
|  | gpointer user_data) | 
|  | { | 
|  | GList **items = user_data; | 
|  |  | 
|  | *items = g_list_insert_sorted(*items, g_strdup(key), (GCompareFunc)strcmp); | 
|  | } | 
|  |  | 
|  | /* Populates items with char * names which caller must free. */ | 
|  | static int xs_node_directory(XsNode **n, struct walk_op *op) | 
|  | { | 
|  | GList **items = op->op_opaque; | 
|  |  | 
|  | assert(op->inplace); | 
|  | assert(*n); | 
|  |  | 
|  | if ((*n)->children) { | 
|  | g_hash_table_foreach((*n)->children, append_directory_item, items); | 
|  | } | 
|  |  | 
|  | if (op->op_opaque2) { | 
|  | *(uint64_t *)op->op_opaque2 = (*n)->gencnt; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int validate_path(char *outpath, const char *userpath, | 
|  | unsigned int dom_id) | 
|  | { | 
|  | size_t i, pathlen = strlen(userpath); | 
|  |  | 
|  | if (!pathlen || userpath[pathlen] == '/' || strstr(userpath, "//")) { | 
|  | return EINVAL; | 
|  | } | 
|  | for (i = 0; i < pathlen; i++) { | 
|  | if (!strchr(XS_VALID_CHARS, userpath[i])) { | 
|  | return EINVAL; | 
|  | } | 
|  | } | 
|  | if (userpath[0] == '/') { | 
|  | if (pathlen > XENSTORE_ABS_PATH_MAX) { | 
|  | return E2BIG; | 
|  | } | 
|  | memcpy(outpath, userpath, pathlen + 1); | 
|  | } else { | 
|  | if (pathlen > XENSTORE_REL_PATH_MAX) { | 
|  | return E2BIG; | 
|  | } | 
|  | snprintf(outpath, XENSTORE_ABS_PATH_MAX, "/local/domain/%u/%s", dom_id, | 
|  | userpath); | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  |  | 
|  | static int init_walk_op(XenstoreImplState *s, struct walk_op *op, | 
|  | xs_transaction_t tx_id, unsigned int dom_id, | 
|  | const char *path, XsNode ***rootp) | 
|  | { | 
|  | int ret = validate_path(op->path, path, dom_id); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * We use *two* NUL terminators at the end of the path, as during the walk | 
|  | * we will temporarily turn each '/' into a NUL to allow us to use that | 
|  | * path element for the lookup. | 
|  | */ | 
|  | op->path[strlen(op->path) + 1] = '\0'; | 
|  | op->watches = NULL; | 
|  | op->path[0] = '\0'; | 
|  | op->inplace = true; | 
|  | op->mutating = false; | 
|  | op->create_dirs = false; | 
|  | op->in_transaction = false; | 
|  | op->dom_id = dom_id; | 
|  | op->tx_id = tx_id; | 
|  | op->s = s; | 
|  |  | 
|  | if (tx_id == XBT_NULL) { | 
|  | *rootp = &s->root; | 
|  | op->new_nr_nodes = s->nr_nodes; | 
|  | } else { | 
|  | XsTransaction *tx = g_hash_table_lookup(s->transactions, | 
|  | GINT_TO_POINTER(tx_id)); | 
|  | if (!tx) { | 
|  | return ENOENT; | 
|  | } | 
|  | *rootp = &tx->root; | 
|  | op->new_nr_nodes = tx->nr_nodes; | 
|  | op->in_transaction = true; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int xs_impl_read(XenstoreImplState *s, unsigned int dom_id, | 
|  | xs_transaction_t tx_id, const char *path, GByteArray *data) | 
|  | { | 
|  | /* | 
|  | * The data GByteArray shall exist, and will be freed by caller. | 
|  | * Just g_byte_array_append() to it. | 
|  | */ | 
|  | struct walk_op op; | 
|  | XsNode **n; | 
|  | int ret; | 
|  |  | 
|  | ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  | op.op_fn = xs_node_get_content; | 
|  | op.op_opaque = data; | 
|  | return xs_node_walk(n, &op); | 
|  | } | 
|  |  | 
|  | int xs_impl_write(XenstoreImplState *s, unsigned int dom_id, | 
|  | xs_transaction_t tx_id, const char *path, GByteArray *data) | 
|  | { | 
|  | /* | 
|  | * The data GByteArray shall exist, will be freed by caller. You are | 
|  | * free to use g_byte_array_steal() and keep the data. Or just ref it. | 
|  | */ | 
|  | struct walk_op op; | 
|  | XsNode **n; | 
|  | int ret; | 
|  |  | 
|  | ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  | op.op_fn = xs_node_add_content; | 
|  | op.op_opaque = data; | 
|  | op.mutating = true; | 
|  | op.create_dirs = true; | 
|  | return xs_node_walk(n, &op); | 
|  | } | 
|  |  | 
|  | int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, | 
|  | xs_transaction_t tx_id, const char *path, | 
|  | uint64_t *gencnt, GList **items) | 
|  | { | 
|  | /* | 
|  | * The items are (char *) to be freed by caller. Although it's consumed | 
|  | * immediately so if you want to change it to (const char *) and keep | 
|  | * them, go ahead and change the caller. | 
|  | */ | 
|  | struct walk_op op; | 
|  | XsNode **n; | 
|  | int ret; | 
|  |  | 
|  | ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  | op.op_fn = xs_node_directory; | 
|  | op.op_opaque = items; | 
|  | op.op_opaque2 = gencnt; | 
|  | return xs_node_walk(n, &op); | 
|  | } | 
|  |  | 
|  | int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, | 
|  | xs_transaction_t *tx_id) | 
|  | { | 
|  | XsTransaction *tx; | 
|  |  | 
|  | if (*tx_id != XBT_NULL) { | 
|  | return EINVAL; | 
|  | } | 
|  |  | 
|  | if (dom_id && s->nr_domu_transactions >= XS_MAX_TRANSACTIONS) { | 
|  | return ENOSPC; | 
|  | } | 
|  |  | 
|  | tx = g_new0(XsTransaction, 1); | 
|  |  | 
|  | tx->nr_nodes = s->nr_nodes; | 
|  | tx->tx_id = next_tx(s); | 
|  | tx->base_tx = s->root_tx; | 
|  | tx->root = xs_node_ref(s->root); | 
|  | tx->dom_id = dom_id; | 
|  |  | 
|  | g_hash_table_insert(s->transactions, GINT_TO_POINTER(tx->tx_id), tx); | 
|  | if (dom_id) { | 
|  | s->nr_domu_transactions++; | 
|  | } | 
|  | *tx_id = tx->tx_id; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static gboolean tx_commit_walk(gpointer key, gpointer value, | 
|  | gpointer user_data) | 
|  | { | 
|  | struct walk_op *op = user_data; | 
|  | int path_len = strlen(op->path); | 
|  | int key_len = strlen(key); | 
|  | bool fire_parents = true; | 
|  | XsWatch *watch; | 
|  | XsNode *n = value; | 
|  |  | 
|  | if (n->ref != 1) { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | if (n->deleted_in_tx) { | 
|  | /* | 
|  | * We fire watches on our parents if we are the *first* node | 
|  | * to be deleted (the topmost one). This matches the behaviour | 
|  | * when deleting in the live tree. | 
|  | */ | 
|  | fire_parents = !op->deleted_in_tx; | 
|  |  | 
|  | /* Only used on the way down so no need to clear it later */ | 
|  | op->deleted_in_tx = true; | 
|  | } | 
|  |  | 
|  | assert(key_len + path_len + 2 <= sizeof(op->path)); | 
|  | op->path[path_len] = '/'; | 
|  | memcpy(op->path + path_len + 1, key, key_len + 1); | 
|  |  | 
|  | watch = g_hash_table_lookup(op->s->watches, op->path); | 
|  | if (watch) { | 
|  | op->watches = g_list_append(op->watches, watch); | 
|  | } | 
|  |  | 
|  | if (n->children) { | 
|  | g_hash_table_foreach_remove(n->children, tx_commit_walk, op); | 
|  | } | 
|  |  | 
|  | if (watch) { | 
|  | op->watches = g_list_remove(op->watches, watch); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Don't fire watches if this node was only copied because a | 
|  | * descendent was changed. The modified_in_tx flag indicates the | 
|  | * ones which were really changed. | 
|  | */ | 
|  | if (n->modified_in_tx || n->deleted_in_tx) { | 
|  | fire_watches(op, fire_parents); | 
|  | n->modified_in_tx = false; | 
|  | } | 
|  | op->path[path_len] = '\0'; | 
|  |  | 
|  | /* Deleted nodes really do get expunged when we commit */ | 
|  | return n->deleted_in_tx; | 
|  | } | 
|  |  | 
|  | static int transaction_commit(XenstoreImplState *s, XsTransaction *tx) | 
|  | { | 
|  | struct walk_op op; | 
|  | XsNode **n; | 
|  | int ret; | 
|  |  | 
|  | if (s->root_tx != tx->base_tx) { | 
|  | return EAGAIN; | 
|  | } | 
|  | xs_node_unref(s->root); | 
|  | s->root = tx->root; | 
|  | tx->root = NULL; | 
|  | s->root_tx = tx->tx_id; | 
|  | s->nr_nodes = tx->nr_nodes; | 
|  |  | 
|  | ret = init_walk_op(s, &op, XBT_NULL, tx->dom_id, "/", &n); | 
|  | /* | 
|  | * There are two reasons why init_walk_op() may fail: an invalid tx_id, | 
|  | * or an invalid path. We pass XBT_NULL and "/", and it cannot fail. | 
|  | * If it does, the world is broken. And returning 'ret' would be weird | 
|  | * because the transaction *was* committed, and all this tree walk is | 
|  | * trying to do is fire the resulting watches on newly-committed nodes. | 
|  | */ | 
|  | g_assert(!ret); | 
|  |  | 
|  | op.deleted_in_tx = false; | 
|  | op.mutating = true; | 
|  |  | 
|  | /* | 
|  | * Walk the new root and fire watches on any node which has a | 
|  | * refcount of one (which is therefore unique to this transaction). | 
|  | */ | 
|  | if (s->root->children) { | 
|  | g_hash_table_foreach_remove(s->root->children, tx_commit_walk, &op); | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id, | 
|  | xs_transaction_t tx_id, bool commit) | 
|  | { | 
|  | int ret = 0; | 
|  | XsTransaction *tx = g_hash_table_lookup(s->transactions, | 
|  | GINT_TO_POINTER(tx_id)); | 
|  |  | 
|  | if (!tx || tx->dom_id != dom_id) { | 
|  | return ENOENT; | 
|  | } | 
|  |  | 
|  | if (commit) { | 
|  | ret = transaction_commit(s, tx); | 
|  | } | 
|  |  | 
|  | g_hash_table_remove(s->transactions, GINT_TO_POINTER(tx_id)); | 
|  | if (dom_id) { | 
|  | assert(s->nr_domu_transactions); | 
|  | s->nr_domu_transactions--; | 
|  | } | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, | 
|  | xs_transaction_t tx_id, const char *path) | 
|  | { | 
|  | struct walk_op op; | 
|  | XsNode **n; | 
|  | int ret; | 
|  |  | 
|  | ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  | op.op_fn = xs_node_rm; | 
|  | op.mutating = true; | 
|  | return xs_node_walk(n, &op); | 
|  | } | 
|  |  | 
|  | int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id, | 
|  | xs_transaction_t tx_id, const char *path, GList **perms) | 
|  | { | 
|  | struct walk_op op; | 
|  | XsNode **n; | 
|  | int ret; | 
|  |  | 
|  | ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  | op.op_fn = xs_node_get_perms; | 
|  | op.op_opaque = perms; | 
|  | return xs_node_walk(n, &op); | 
|  | } | 
|  |  | 
|  | static void is_valid_perm(gpointer data, gpointer user_data) | 
|  | { | 
|  | char *perm = data; | 
|  | bool *valid = user_data; | 
|  | char letter; | 
|  | unsigned int dom_id; | 
|  |  | 
|  | if (!*valid) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (sscanf(perm, "%c%u", &letter, &dom_id) != 2) { | 
|  | *valid = false; | 
|  | return; | 
|  | } | 
|  |  | 
|  | switch (letter) { | 
|  | case 'n': | 
|  | case 'r': | 
|  | case 'w': | 
|  | case 'b': | 
|  | break; | 
|  |  | 
|  | default: | 
|  | *valid = false; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, | 
|  | xs_transaction_t tx_id, const char *path, GList *perms) | 
|  | { | 
|  | struct walk_op op; | 
|  | XsNode **n; | 
|  | bool valid = true; | 
|  | int ret; | 
|  |  | 
|  | if (!g_list_length(perms)) { | 
|  | return EINVAL; | 
|  | } | 
|  |  | 
|  | g_list_foreach(perms, is_valid_perm, &valid); | 
|  | if (!valid) { | 
|  | return EINVAL; | 
|  | } | 
|  |  | 
|  | ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  | op.op_fn = xs_node_set_perms; | 
|  | op.op_opaque = perms; | 
|  | op.mutating = true; | 
|  | return xs_node_walk(n, &op); | 
|  | } | 
|  |  | 
|  | static int do_xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, | 
|  | const char *path, const char *token, | 
|  | xs_impl_watch_fn fn, void *opaque) | 
|  |  | 
|  | { | 
|  | char abspath[XENSTORE_ABS_PATH_MAX + 1]; | 
|  | XsWatch *w, *l; | 
|  | int ret; | 
|  |  | 
|  | ret = validate_path(abspath, path, dom_id); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* Check for duplicates */ | 
|  | l = w = g_hash_table_lookup(s->watches, abspath); | 
|  | while (w) { | 
|  | if (!g_strcmp0(token, w->token) &&  opaque == w->cb_opaque && | 
|  | fn == w->cb && dom_id == w->dom_id) { | 
|  | return EEXIST; | 
|  | } | 
|  | w = w->next; | 
|  | } | 
|  |  | 
|  | if (dom_id && s->nr_domu_watches >= XS_MAX_WATCHES) { | 
|  | return E2BIG; | 
|  | } | 
|  |  | 
|  | w = g_new0(XsWatch, 1); | 
|  | w->token = g_strdup(token); | 
|  | w->cb = fn; | 
|  | w->cb_opaque = opaque; | 
|  | w->dom_id = dom_id; | 
|  | w->rel_prefix = strlen(abspath) - strlen(path); | 
|  |  | 
|  | /* l was looked up above when checking for duplicates */ | 
|  | if (l) { | 
|  | w->next = l->next; | 
|  | l->next = w; | 
|  | } else { | 
|  | g_hash_table_insert(s->watches, g_strdup(abspath), w); | 
|  | } | 
|  | if (dom_id) { | 
|  | s->nr_domu_watches++; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, | 
|  | const char *token, xs_impl_watch_fn fn, void *opaque) | 
|  | { | 
|  | int ret = do_xs_impl_watch(s, dom_id, path, token, fn, opaque); | 
|  |  | 
|  | if (!ret) { | 
|  | /* A new watch should fire immediately */ | 
|  | fn(opaque, path, token); | 
|  | } | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static XsWatch *free_watch(XenstoreImplState *s, XsWatch *w) | 
|  | { | 
|  | XsWatch *next = w->next; | 
|  |  | 
|  | if (w->dom_id) { | 
|  | assert(s->nr_domu_watches); | 
|  | s->nr_domu_watches--; | 
|  | } | 
|  |  | 
|  | g_free(w->token); | 
|  | g_free(w); | 
|  |  | 
|  | return next; | 
|  | } | 
|  |  | 
|  | int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, | 
|  | const char *path, const char *token, | 
|  | xs_impl_watch_fn fn, void *opaque) | 
|  | { | 
|  | char abspath[XENSTORE_ABS_PATH_MAX + 1]; | 
|  | XsWatch *w, **l; | 
|  | int ret; | 
|  |  | 
|  | ret = validate_path(abspath, path, dom_id); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | w = g_hash_table_lookup(s->watches, abspath); | 
|  | if (!w) { | 
|  | return ENOENT; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * The hash table contains the first element of a list of | 
|  | * watches. Removing the first element in the list is a | 
|  | * special case because we have to update the hash table to | 
|  | * point to the next (or remove it if there's nothing left). | 
|  | */ | 
|  | if (!g_strcmp0(token, w->token) && fn == w->cb && opaque == w->cb_opaque && | 
|  | dom_id == w->dom_id) { | 
|  | if (w->next) { | 
|  | /* Insert the previous 'next' into the hash table */ | 
|  | g_hash_table_insert(s->watches, g_strdup(abspath), w->next); | 
|  | } else { | 
|  | /* Nothing left; remove from hash table */ | 
|  | g_hash_table_remove(s->watches, abspath); | 
|  | } | 
|  | free_watch(s, w); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * We're all done messing with the hash table because the element | 
|  | * it points to has survived the cull. Now it's just a simple | 
|  | * linked list removal operation. | 
|  | */ | 
|  | for (l = &w->next; *l; l = &w->next) { | 
|  | w = *l; | 
|  |  | 
|  | if (!g_strcmp0(token, w->token) && fn == w->cb && | 
|  | opaque != w->cb_opaque && dom_id == w->dom_id) { | 
|  | *l = free_watch(s, w); | 
|  | return 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | return ENOENT; | 
|  | } | 
|  |  | 
|  | int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id) | 
|  | { | 
|  | char **watch_paths; | 
|  | guint nr_watch_paths; | 
|  | guint i; | 
|  |  | 
|  | watch_paths = (char **)g_hash_table_get_keys_as_array(s->watches, | 
|  | &nr_watch_paths); | 
|  |  | 
|  | for (i = 0; i < nr_watch_paths; i++) { | 
|  | XsWatch *w1 = g_hash_table_lookup(s->watches, watch_paths[i]); | 
|  | XsWatch *w2, *w, **l; | 
|  |  | 
|  | /* | 
|  | * w1 is the original list. The hash table has this pointer. | 
|  | * w2 is the head of our newly-filtered list. | 
|  | * w and l are temporary for processing. w is somewhat redundant | 
|  | * with *l but makes my eyes bleed less. | 
|  | */ | 
|  |  | 
|  | w = w2 = w1; | 
|  | l = &w; | 
|  | while (w) { | 
|  | if (w->dom_id == dom_id) { | 
|  | /* If we're freeing the head of the list, bump w2 */ | 
|  | if (w2 == w) { | 
|  | w2 = w->next; | 
|  | } | 
|  | *l = free_watch(s, w); | 
|  | } else { | 
|  | l = &w->next; | 
|  | } | 
|  | w = *l; | 
|  | } | 
|  | /* | 
|  | * If the head of the list survived the cull, we don't need to | 
|  | * touch the hash table and we're done with this path. Else... | 
|  | */ | 
|  | if (w1 != w2) { | 
|  | g_hash_table_steal(s->watches, watch_paths[i]); | 
|  |  | 
|  | /* | 
|  | * It was already freed. (Don't worry, this whole thing is | 
|  | * single-threaded and nobody saw it in the meantime). And | 
|  | * having *stolen* it, we now own the watch_paths[i] string | 
|  | * so if we don't give it back to the hash table, we need | 
|  | * to free it. | 
|  | */ | 
|  | if (w2) { | 
|  | g_hash_table_insert(s->watches, watch_paths[i], w2); | 
|  | } else { | 
|  | g_free(watch_paths[i]); | 
|  | } | 
|  | } | 
|  | } | 
|  | g_free(watch_paths); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void xs_tx_free(void *_tx) | 
|  | { | 
|  | XsTransaction *tx = _tx; | 
|  | if (tx->root) { | 
|  | xs_node_unref(tx->root); | 
|  | } | 
|  | g_free(tx); | 
|  | } | 
|  |  | 
|  | XenstoreImplState *xs_impl_create(unsigned int dom_id) | 
|  | { | 
|  | XenstoreImplState *s = g_new0(XenstoreImplState, 1); | 
|  | GList *perms; | 
|  |  | 
|  | s->watches = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); | 
|  | s->transactions = g_hash_table_new_full(g_direct_hash, g_direct_equal, | 
|  | NULL, xs_tx_free); | 
|  |  | 
|  | perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, 0)); | 
|  | s->root = xs_node_create("/", perms); | 
|  | g_list_free_full(perms, g_free); | 
|  | s->nr_nodes = 1; | 
|  |  | 
|  | s->root_tx = s->last_tx = 1; | 
|  | return s; | 
|  | } | 
|  |  | 
|  |  | 
|  | static void clear_serialized_tx(gpointer key, gpointer value, gpointer opaque) | 
|  | { | 
|  | XsNode *n = value; | 
|  |  | 
|  | n->serialized_tx = XBT_NULL; | 
|  | if (n->children) { | 
|  | g_hash_table_foreach(n->children, clear_serialized_tx, NULL); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void clear_tx_serialized_tx(gpointer key, gpointer value, | 
|  | gpointer opaque) | 
|  | { | 
|  | XsTransaction *t = value; | 
|  |  | 
|  | clear_serialized_tx(NULL, t->root, NULL); | 
|  | } | 
|  |  | 
|  | static void write_be32(GByteArray *save, uint32_t val) | 
|  | { | 
|  | uint32_t be = htonl(val); | 
|  | g_byte_array_append(save, (void *)&be, sizeof(be)); | 
|  | } | 
|  |  | 
|  |  | 
|  | struct save_state { | 
|  | GByteArray *bytes; | 
|  | unsigned int tx_id; | 
|  | }; | 
|  |  | 
|  | #define MODIFIED_IN_TX  (1U << 0) | 
|  | #define DELETED_IN_TX   (1U << 1) | 
|  | #define NODE_REF        (1U << 2) | 
|  |  | 
|  | static void save_node(gpointer key, gpointer value, gpointer opaque) | 
|  | { | 
|  | struct save_state *ss = opaque; | 
|  | XsNode *n = value; | 
|  | char *name = key; | 
|  | uint8_t flag = 0; | 
|  |  | 
|  | /* Child nodes (i.e. anything but the root) have a name */ | 
|  | if (name) { | 
|  | g_byte_array_append(ss->bytes, key, strlen(key) + 1); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * If we already wrote this node, refer to the previous copy. | 
|  | * There's no rename/move in XenStore, so all we need to find | 
|  | * it is the tx_id of the transaction in which it exists. Which | 
|  | * may be the root tx. | 
|  | */ | 
|  | if (n->serialized_tx != XBT_NULL) { | 
|  | flag = NODE_REF; | 
|  | g_byte_array_append(ss->bytes, &flag, 1); | 
|  | write_be32(ss->bytes, n->serialized_tx); | 
|  | } else { | 
|  | GList *l; | 
|  | n->serialized_tx = ss->tx_id; | 
|  |  | 
|  | if (n->modified_in_tx) { | 
|  | flag |= MODIFIED_IN_TX; | 
|  | } | 
|  | if (n->deleted_in_tx) { | 
|  | flag |= DELETED_IN_TX; | 
|  | } | 
|  | g_byte_array_append(ss->bytes, &flag, 1); | 
|  |  | 
|  | if (n->content) { | 
|  | write_be32(ss->bytes, n->content->len); | 
|  | g_byte_array_append(ss->bytes, n->content->data, n->content->len); | 
|  | } else { | 
|  | write_be32(ss->bytes, 0); | 
|  | } | 
|  |  | 
|  | for (l = n->perms; l; l = l->next) { | 
|  | g_byte_array_append(ss->bytes, l->data, strlen(l->data) + 1); | 
|  | } | 
|  | /* NUL termination after perms */ | 
|  | g_byte_array_append(ss->bytes, (void *)"", 1); | 
|  |  | 
|  | if (n->children) { | 
|  | g_hash_table_foreach(n->children, save_node, ss); | 
|  | } | 
|  | /* NUL termination after children (child name is NUL) */ | 
|  | g_byte_array_append(ss->bytes, (void *)"", 1); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void save_tree(struct save_state *ss, uint32_t tx_id, XsNode *root) | 
|  | { | 
|  | write_be32(ss->bytes, tx_id); | 
|  | ss->tx_id = tx_id; | 
|  | save_node(NULL, root, ss); | 
|  | } | 
|  |  | 
|  | static void save_tx(gpointer key, gpointer value, gpointer opaque) | 
|  | { | 
|  | uint32_t tx_id = GPOINTER_TO_INT(key); | 
|  | struct save_state *ss = opaque; | 
|  | XsTransaction *n = value; | 
|  |  | 
|  | write_be32(ss->bytes, n->base_tx); | 
|  | write_be32(ss->bytes, n->dom_id); | 
|  |  | 
|  | save_tree(ss, tx_id, n->root); | 
|  | } | 
|  |  | 
|  | static void save_watch(gpointer key, gpointer value, gpointer opaque) | 
|  | { | 
|  | struct save_state *ss = opaque; | 
|  | XsWatch *w = value; | 
|  |  | 
|  | /* We only save the *guest* watches. */ | 
|  | if (w->dom_id) { | 
|  | gpointer relpath = key + w->rel_prefix; | 
|  | g_byte_array_append(ss->bytes, relpath, strlen(relpath) + 1); | 
|  | g_byte_array_append(ss->bytes, (void *)w->token, strlen(w->token) + 1); | 
|  | } | 
|  | } | 
|  |  | 
|  | GByteArray *xs_impl_serialize(XenstoreImplState *s) | 
|  | { | 
|  | struct save_state ss; | 
|  |  | 
|  | ss.bytes = g_byte_array_new(); | 
|  |  | 
|  | /* | 
|  | * node = flags [ real_node / node_ref ] | 
|  | *   flags = uint8_t (MODIFIED_IN_TX | DELETED_IN_TX | NODE_REF) | 
|  | *   node_ref = tx_id (in which the original version of this node exists) | 
|  | *   real_node = content perms child* NUL | 
|  | *     content = len data | 
|  | *       len = uint32_t | 
|  | *       data = uint8_t{len} | 
|  | *     perms = perm* NUL | 
|  | *       perm = asciiz | 
|  | *   child = name node | 
|  | *     name = asciiz | 
|  | * | 
|  | * tree = tx_id node | 
|  | *   tx_id = uint32_t | 
|  | * | 
|  | * transaction = base_tx_id dom_id tree | 
|  | *   base_tx_id = uint32_t | 
|  | *   dom_id = uint32_t | 
|  | * | 
|  | * tx_list = tree transaction* XBT_NULL | 
|  | * | 
|  | * watch = path token | 
|  | *   path = asciiz | 
|  | *   token = asciiz | 
|  | * | 
|  | * watch_list = watch* NUL | 
|  | * | 
|  | * xs_serialize_stream = last_tx tx_list watch_list | 
|  | *   last_tx = uint32_t | 
|  | */ | 
|  |  | 
|  | /* Clear serialized_tx in every node. */ | 
|  | if (s->serialized) { | 
|  | clear_serialized_tx(NULL, s->root, NULL); | 
|  | g_hash_table_foreach(s->transactions, clear_tx_serialized_tx, NULL); | 
|  | } | 
|  |  | 
|  | s->serialized = true; | 
|  |  | 
|  | write_be32(ss.bytes, s->last_tx); | 
|  | save_tree(&ss, s->root_tx, s->root); | 
|  | g_hash_table_foreach(s->transactions, save_tx, &ss); | 
|  |  | 
|  | write_be32(ss.bytes, XBT_NULL); | 
|  |  | 
|  | g_hash_table_foreach(s->watches, save_watch, &ss); | 
|  | g_byte_array_append(ss.bytes, (void *)"", 1); | 
|  |  | 
|  | return ss.bytes; | 
|  | } | 
|  |  | 
|  | struct unsave_state { | 
|  | char path[XENSTORE_ABS_PATH_MAX + 1]; | 
|  | XenstoreImplState *s; | 
|  | GByteArray *bytes; | 
|  | uint8_t *d; | 
|  | size_t l; | 
|  | bool root_walk; | 
|  | }; | 
|  |  | 
|  | static int consume_be32(struct unsave_state *us, unsigned int *val) | 
|  | { | 
|  | uint32_t d; | 
|  |  | 
|  | if (us->l < sizeof(d)) { | 
|  | return -EINVAL; | 
|  | } | 
|  | memcpy(&d, us->d, sizeof(d)); | 
|  | *val = ntohl(d); | 
|  | us->d += sizeof(d); | 
|  | us->l -= sizeof(d); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int consume_string(struct unsave_state *us, char **str, size_t *len) | 
|  | { | 
|  | size_t l; | 
|  |  | 
|  | if (!us->l) { | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | l = strnlen((void *)us->d, us->l); | 
|  | if (l == us->l) { | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | if (str) { | 
|  | *str = (void *)us->d; | 
|  | } | 
|  | if (len) { | 
|  | *len = l; | 
|  | } | 
|  |  | 
|  | us->d += l + 1; | 
|  | us->l -= l + 1; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static XsNode *lookup_node(XsNode *n, char *path) | 
|  | { | 
|  | char *slash = strchr(path, '/'); | 
|  | XsNode *child; | 
|  |  | 
|  | if (path[0] == '\0') { | 
|  | return n; | 
|  | } | 
|  |  | 
|  | if (slash) { | 
|  | *slash = '\0'; | 
|  | } | 
|  |  | 
|  | if (!n->children) { | 
|  | return NULL; | 
|  | } | 
|  | child = g_hash_table_lookup(n->children, path); | 
|  | if (!slash) { | 
|  | return child; | 
|  | } | 
|  |  | 
|  | *slash = '/'; | 
|  | if (!child) { | 
|  | return NULL; | 
|  | } | 
|  | return lookup_node(child, slash + 1); | 
|  | } | 
|  |  | 
|  | static XsNode *lookup_tx_node(struct unsave_state *us, unsigned int tx_id) | 
|  | { | 
|  | XsTransaction *t; | 
|  | if (tx_id == us->s->root_tx) { | 
|  | return lookup_node(us->s->root, us->path + 1); | 
|  | } | 
|  |  | 
|  | t = g_hash_table_lookup(us->s->transactions, GINT_TO_POINTER(tx_id)); | 
|  | if (!t) { | 
|  | return NULL; | 
|  | } | 
|  | g_assert(t->root); | 
|  | return lookup_node(t->root, us->path + 1); | 
|  | } | 
|  |  | 
|  | static void count_child_nodes(gpointer key, gpointer value, gpointer user_data) | 
|  | { | 
|  | unsigned int *nr_nodes = user_data; | 
|  | XsNode *n = value; | 
|  |  | 
|  | (*nr_nodes)++; | 
|  |  | 
|  | if (n->children) { | 
|  | g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); | 
|  | } | 
|  | } | 
|  |  | 
|  | static int consume_node(struct unsave_state *us, XsNode **nodep, | 
|  | unsigned int *nr_nodes) | 
|  | { | 
|  | XsNode *n = NULL; | 
|  | uint8_t flags; | 
|  | int ret; | 
|  |  | 
|  | if (us->l < 1) { | 
|  | return -EINVAL; | 
|  | } | 
|  | flags = us->d[0]; | 
|  | us->d++; | 
|  | us->l--; | 
|  |  | 
|  | if (flags == NODE_REF) { | 
|  | unsigned int tx; | 
|  |  | 
|  | ret = consume_be32(us, &tx); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | n = lookup_tx_node(us, tx); | 
|  | if (!n) { | 
|  | return -EINVAL; | 
|  | } | 
|  | n->ref++; | 
|  | if (n->children) { | 
|  | g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); | 
|  | } | 
|  | } else { | 
|  | uint32_t datalen; | 
|  |  | 
|  | if (flags & ~(DELETED_IN_TX | MODIFIED_IN_TX)) { | 
|  | return -EINVAL; | 
|  | } | 
|  | n = xs_node_new(); | 
|  |  | 
|  | if (flags & DELETED_IN_TX) { | 
|  | n->deleted_in_tx = true; | 
|  | } | 
|  | if (flags & MODIFIED_IN_TX) { | 
|  | n->modified_in_tx = true; | 
|  | } | 
|  | ret = consume_be32(us, &datalen); | 
|  | if (ret) { | 
|  | xs_node_unref(n); | 
|  | return -EINVAL; | 
|  | } | 
|  | if (datalen) { | 
|  | if (datalen > us->l) { | 
|  | xs_node_unref(n); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | GByteArray *node_data = g_byte_array_new(); | 
|  | g_byte_array_append(node_data, us->d, datalen); | 
|  | us->d += datalen; | 
|  | us->l -= datalen; | 
|  | n->content = node_data; | 
|  |  | 
|  | if (us->root_walk) { | 
|  | n->modified_in_tx = true; | 
|  | } | 
|  | } | 
|  | while (1) { | 
|  | char *perm = NULL; | 
|  | size_t permlen = 0; | 
|  |  | 
|  | ret = consume_string(us, &perm, &permlen); | 
|  | if (ret) { | 
|  | xs_node_unref(n); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | if (!permlen) { | 
|  | break; | 
|  | } | 
|  |  | 
|  | n->perms = g_list_append(n->perms, g_strdup(perm)); | 
|  | } | 
|  |  | 
|  | /* Now children */ | 
|  | while (1) { | 
|  | size_t childlen; | 
|  | char *childname; | 
|  | char *pathend; | 
|  | XsNode *child = NULL; | 
|  |  | 
|  | ret = consume_string(us, &childname, &childlen); | 
|  | if (ret) { | 
|  | xs_node_unref(n); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | if (!childlen) { | 
|  | break; | 
|  | } | 
|  |  | 
|  | pathend = us->path + strlen(us->path); | 
|  | strncat(us->path, "/", sizeof(us->path) - 1); | 
|  | strncat(us->path, childname, sizeof(us->path) - 1); | 
|  |  | 
|  | ret = consume_node(us, &child, nr_nodes); | 
|  | *pathend = '\0'; | 
|  | if (ret) { | 
|  | xs_node_unref(n); | 
|  | return ret; | 
|  | } | 
|  | g_assert(child); | 
|  | xs_node_add_child(n, childname, child); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * If the node has no data and no children we still want to fire | 
|  | * a watch on it. | 
|  | */ | 
|  | if (us->root_walk && !n->children) { | 
|  | n->modified_in_tx = true; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (!n->deleted_in_tx) { | 
|  | (*nr_nodes)++; | 
|  | } | 
|  |  | 
|  | *nodep = n; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int consume_tree(struct unsave_state *us, XsTransaction *t) | 
|  | { | 
|  | int ret; | 
|  |  | 
|  | ret = consume_be32(us, &t->tx_id); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | if (t->tx_id > us->s->last_tx) { | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | us->path[0] = '\0'; | 
|  |  | 
|  | return consume_node(us, &t->root, &t->nr_nodes); | 
|  | } | 
|  |  | 
|  | int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes, | 
|  | unsigned int dom_id, xs_impl_watch_fn watch_fn, | 
|  | void *watch_opaque) | 
|  | { | 
|  | struct unsave_state us; | 
|  | XsTransaction base_t = { 0 }; | 
|  | int ret; | 
|  |  | 
|  | us.s = s; | 
|  | us.bytes = bytes; | 
|  | us.d = bytes->data; | 
|  | us.l = bytes->len; | 
|  |  | 
|  | xs_impl_reset_watches(s, dom_id); | 
|  | g_hash_table_remove_all(s->transactions); | 
|  |  | 
|  | xs_node_unref(s->root); | 
|  | s->root = NULL; | 
|  | s->root_tx = s->last_tx = XBT_NULL; | 
|  |  | 
|  | ret = consume_be32(&us, &s->last_tx); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Consume the base tree into a transaction so that watches can be | 
|  | * fired as we commit it. By setting us.root_walk we cause the nodes | 
|  | * to be marked as 'modified_in_tx' as they are created, so that the | 
|  | * watches are triggered on them. | 
|  | */ | 
|  | base_t.dom_id = dom_id; | 
|  | base_t.base_tx = XBT_NULL; | 
|  | us.root_walk = true; | 
|  | ret = consume_tree(&us, &base_t); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  | us.root_walk = false; | 
|  |  | 
|  | /* | 
|  | * Commit the transaction now while the refcount on all nodes is 1. | 
|  | * Note that we haven't yet reinstated the *guest* watches but that's | 
|  | * OK because we don't want the guest to see any changes. Even any | 
|  | * backend nodes which get recreated should be *precisely* as they | 
|  | * were before the migration. Back ends may have been instantiated | 
|  | * already, and will see the frontend magically blink into existence | 
|  | * now (well, from the aio_bh which fires the watches). It's their | 
|  | * responsibility to rebuild everything precisely as it was before. | 
|  | */ | 
|  | ret = transaction_commit(s, &base_t); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | while (1) { | 
|  | unsigned int base_tx; | 
|  | XsTransaction *t; | 
|  |  | 
|  | ret = consume_be32(&us, &base_tx); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  | if (base_tx == XBT_NULL) { | 
|  | break; | 
|  | } | 
|  |  | 
|  | t = g_new0(XsTransaction, 1); | 
|  | t->base_tx = base_tx; | 
|  |  | 
|  | ret = consume_be32(&us, &t->dom_id); | 
|  | if (!ret) { | 
|  | ret = consume_tree(&us, t); | 
|  | } | 
|  | if (ret) { | 
|  | g_free(t); | 
|  | return ret; | 
|  | } | 
|  | g_assert(t->root); | 
|  | if (t->dom_id) { | 
|  | s->nr_domu_transactions++; | 
|  | } | 
|  | g_hash_table_insert(s->transactions, GINT_TO_POINTER(t->tx_id), t); | 
|  | } | 
|  |  | 
|  | while (1) { | 
|  | char *path, *token; | 
|  | size_t pathlen, toklen; | 
|  |  | 
|  | ret = consume_string(&us, &path, &pathlen); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  | if (!pathlen) { | 
|  | break; | 
|  | } | 
|  |  | 
|  | ret = consume_string(&us, &token, &toklen); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | if (!watch_fn) { | 
|  | continue; | 
|  | } | 
|  |  | 
|  | ret = do_xs_impl_watch(s, dom_id, path, token, watch_fn, watch_opaque); | 
|  | if (ret) { | 
|  | return ret; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (us.l) { | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } |