On Thu, 2016-02-18 at 15:50 +0700, Duy Nguyen wrote: > Caveat: I did not study how to use lmdb. I just guessed what it does > based on function names. I don't know much about refs handling either > (especially since the transaction thing is introduced) > > > diff --git a/Documentation/technical/refs-lmdb-backend.txt > > b/Documentation/technical/refs-lmdb-backend.txt > > new file mode 100644 > > index 0000000..eb81465 > > --- /dev/null > > +++ b/Documentation/technical/refs-lmdb-backend.txt > > +Reflog values are in the same format as the original files-based > > +reflog, including the trailing LF. The date in the reflog value > > +matches the date in the timestamp field. > > ..except that SHA-1s are stored in raw values instead of hex strings. > > > diff --git a/Documentation/technical/repository-version.txt > > b/Documentation/technical/repository-version.txt > > index 00ad379..fca5ecd 100644 > > --- a/Documentation/technical/repository-version.txt > > +++ b/Documentation/technical/repository-version.txt > > @@ -86,3 +86,8 @@ for testing format-1 compatibility. > > When the config key `extensions.preciousObjects` is set to `true`, > > objects in the repository MUST NOT be deleted (e.g., by `git > > -prune` or > > `git repack -d`). > > + > > +`refStorage` > > +~~~~~~~~~~~~ > > +This extension allows the use of alternate ref storage backends. > > The > > +only defined value is `lmdb`. > > refStorage accepts empty string and `files` as well, should probably > be worth mentioning. > > > diff --git a/refs/lmdb-backend.c b/refs/lmdb-backend.c > > +#include "../cache.h" > > +#include <lmdb.h> > > +#include "../object.h" > > +#include "../refs.h" > > +#include "refs-internal.h" > > +#include "../tag.h" > > +#include "../lockfile.h" > > I'm quite sure we don't need "../". We have plenty of source files in > subdirs and many of them (haven't checked all) just go with > #include "cache.h". > > > +struct lmdb_transaction transaction; > > static? > > > + > > +static int in_write_transaction(void) > > +{ > > + return transaction.txn && !(transaction.flags & > > MDB_RDONLY); > > +} > > + > > +static void init_env(MDB_env **env, const char *path) > > +{ > > + int ret; > > + if (*env) > > + return; > > + > > + assert(path); > > + > > + ret = mdb_env_create(env); > > + if (ret) > > + die("BUG: mdb_env_create failed: %s", > > mdb_strerror(ret)); > > + ret = mdb_env_set_maxreaders(*env, 1000); > > + if (ret) > > + die("BUG: mdb_env_set_maxreaders failed: %s", > > mdb_strerror(ret)); > > + ret = mdb_env_set_mapsize(*env, (1<<30)); > > + if (ret) > > + die("BUG: mdb_set_mapsize failed: %s", > > mdb_strerror(ret)); > > + ret = mdb_env_open(*env, path, 0 , 0664); > > This permission makes me wonder if we need adjust_shared_perm() here > and some other places. > > > + if (ret) > > + die("BUG: mdb_env_open (%s) failed: %s", path, > > + mdb_strerror(ret)); > > +} > > + > > +static int lmdb_init_db(int shared, struct strbuf *err) > > +{ > > + /* > > + * To create a db, all we need to do is make a directory > > for > > + * it to live in; lmdb will do the rest. > > + */ > > + > > + if (!db_path) > > + db_path = > > xstrdup(real_path(git_path("refs.lmdb"))); > > + > > + if (mkdir(db_path, 0775) && errno != EEXIST) { > > + strbuf_addf(err, "%s", strerror(errno)); > > maybe strbuf_addstr, unless want to add something more, "mkdir > failed"? > > > +static int read_per_worktree_ref(const char *submodule, const char > > *refname, > > + struct MDB_val *val, int > > *needs_free) > > From what I read, I suspect these _per_worktree functions will be > identical for the next backend. Should we just hand over the job for > files backend? For all entry points that may deal with per-worktree > refs, e.g. lmdb_resolve_ref_unsafe, can we check ref_type() first > thing, if it's per-worktree we call > refs_be_files.resolve_ref_unsafe() > instead? It could even be done at frontend level, > e.g. refs.c:resolve_ref_unsafe(). > > Though I may be talking rubbish here because I don't know how whether > it has anything to do with transactions. > > > +{ > > + struct strbuf sb = STRBUF_INIT; > > + struct strbuf path = STRBUF_INIT; > > + struct stat st; > > + int ret = -1; > > + > > + submodule_path(&path, submodule, refname); > > + > > +#ifndef NO_SYMLINK_HEAD > > It started with the compiler warns about unused "st" when this macro > is defined. Which makes me wonder, should we do something like this > to > make sure this code compiles unconditionally? > > +#ifndef NO_SYMLINK_HEAD > + int no_symlink_head = 0; > +#else > + int no_symlink_head = 1; > +#endif > ... > + if (!no_symlink_head) { > ... > > > > +int lmdb_transaction_begin_flags(struct strbuf *err, unsigned int > > flags) > > static? > > > +#define MAXDEPTH 5 > > + > > +static const char *parse_ref_data(struct lmdb_transaction > > *transaction, > > + const char *refname, const char > > *ref_data, > > + unsigned char *sha1, int > > resolve_flags, > > + int *flags, int bad_name) > > +{ > > + int depth = MAXDEPTH; > > + const char *buf; > > + static struct strbuf refname_buffer = STRBUF_INIT; > > + static struct strbuf refdata_buffer = STRBUF_INIT; > > + MDB_val key, val; > > + int needs_free = 0; > > + > > + for (;;) { > > + if (--depth < 0) > > + return NULL; > > + > > + if (!starts_with(ref_data, "ref:")) { > > + if (get_sha1_hex(ref_data, sha1) || > > + (ref_data[40] != '\0' && > > !isspace(ref_data[40]))) { > > + if (flags) > > + *flags |= REF_ISBROKEN; > > + errno = EINVAL; > > + return NULL; > > + } > > + > > + if (bad_name) { > > + hashclr(sha1); > > + if (flags) > > + *flags |= REF_ISBROKEN; > > + } else if (is_null_sha1(sha1)) { > > + if (flags) > > + *flags |= REF_ISBROKEN; > > + } > > + return refname; > > + } > > + if (flags) > > + *flags |= REF_ISSYMREF; > > + buf = ref_data + 4; > > + while (isspace(*buf)) > > + buf++; > > + strbuf_reset(&refname_buffer); > > + strbuf_addstr(&refname_buffer, buf); > > + refname = refname_buffer.buf; > > + if (resolve_flags & RESOLVE_REF_NO_RECURSE) { > > + hashclr(sha1); > > + return refname; > > + } > > + if (check_refname_format(buf, > > REFNAME_ALLOW_ONELEVEL)) { > > + if (flags) > > + *flags |= REF_ISBROKEN; > > + > > + if (!(resolve_flags & > > RESOLVE_REF_ALLOW_BAD_NAME) || > > + !refname_is_safe(buf)) { > > + errno = EINVAL; > > + return NULL; > > + } > > + bad_name = 1; > > + } > > This code looks a lot like near the end of resolve_ref_1(). Maybe we > could share the code in refs/backend-common.c or something and call > here instead? Something like the following? commit aad6b84fd1869f6e1cf6ed15bcece0c2f6429e9d Author: David Turner <dturner@xxxxxxxxxxxxxxxx> Date: Thu Feb 18 17:09:29 2016 -0500 refs: break out some functions from resolve_ref_1 A bunch of resolve_ref_1 is not backend-specific, so we can break it out into separate internal functions that other backends can use. Signed-off-by: David Turner <dturner@xxxxxxxxxxxxxxxx> diff --git a/refs.c b/refs.c index c9fa34d..680c2a5 100644 --- a/refs.c +++ b/refs.c @@ -1221,6 +1221,66 @@ int for_each_rawref(each_ref_fn fn, void *cb_data) DO_FOR_EACH_INCLUDE_BROKEN, cb_data); } +int parse_simple_ref(const char *buf, unsigned char *sha1, unsigned int *flags, int bad_name) +{ + /* + * Please note that FETCH_HEAD has a second + * line containing other data. + */ + if (get_sha1_hex(buf, sha1) || + (buf[40] != '\0' && !isspace(buf[40]))) { + if (flags) + *flags |= REF_ISBROKEN; + errno = EINVAL; + return -1; + } + if (bad_name) { + hashclr(sha1); + if (flags) + *flags |= REF_ISBROKEN; + } + return 0; +} + +int check_bad_refname(const char *refname, int *flags, int resolve_flags) +{ + if (!check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) + return 0; + + if (flags) + *flags |= REF_BAD_NAME; + + if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) || + !refname_is_safe(refname)) { + errno = EINVAL; + return -1; + } + /* + * dwim_ref() uses REF_ISBROKEN to distinguish between + * missing refs and refs that were present but invalid, + * to complain about the latter to stderr. + * + * We don't know whether the ref exists, so don't set + * REF_ISBROKEN yet. + */ + return 1; +} + +/* + * Parse a refname out of the contents of a symref into a provided + * strbuf. Return a pointer to the strbuf's contents. + */ +char *parse_symref_data(const char *buf, struct strbuf *sb_refname) +{ + buf += 4; + while (isspace(*buf)) + buf++; + strbuf_reset(sb_refname); + strbuf_addstr(sb_refname, buf); + return sb_refname->buf; +} + + /* backend functions */ int refs_init_db(int shared, struct strbuf *err) { diff --git a/refs/files-backend.c b/refs/files-backend.c index da06408..52972e6 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1427,25 +1427,9 @@ static const char *resolve_ref_1(const char *refname, if (flags) *flags = 0; - if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) { - if (flags) - *flags |= REF_BAD_NAME; - - if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) || - !refname_is_safe(refname)) { - errno = EINVAL; - return NULL; - } - /* - * dwim_ref() uses REF_ISBROKEN to distinguish between - * missing refs and refs that were present but invalid, - * to complain about the latter to stderr. - * - * We don't know whether the ref exists, so don't set - * REF_ISBROKEN yet. - */ - bad_name = 1; - } + bad_name = check_bad_refname(refname, flags, resolve_flags); + if (bad_name < 0) + return NULL; for (;;) { const char *path; struct stat st; @@ -1541,47 +1525,20 @@ static const char *resolve_ref_1(const char *refname, * Is it a symbolic ref? */ if (!starts_with(sb_contents->buf, "ref:")) { - /* - * Please note that FETCH_HEAD has a second - * line containing other data. - */ - if (get_sha1_hex(sb_contents->buf, sha1) || - (sb_contents->buf[40] != '\0' && !isspace(sb_contents->buf[40]))) { - if (flags) - *flags |= REF_ISBROKEN; - errno = EINVAL; - return NULL; - } - if (bad_name) { - hashclr(sha1); - if (flags) - *flags |= REF_ISBROKEN; - } + if (parse_simple_ref(sb_contents->buf, sha1, flags, bad_name)) + refname = NULL; return refname; } if (flags) *flags |= REF_ISSYMREF; - buf = sb_contents->buf + 4; - while (isspace(*buf)) - buf++; - strbuf_reset(sb_refname); - strbuf_addstr(sb_refname, buf); - refname = sb_refname->buf; + refname = parse_symref_data(sb_contents->buf, sb_refname); if (resolve_flags & RESOLVE_REF_NO_RECURSE) { hashclr(sha1); return refname; } - if (check_refname_format(buf, REFNAME_ALLOW_ONELEVEL)) { - if (flags) - *flags |= REF_ISBROKEN; - - if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) || - !refname_is_safe(buf)) { - errno = EINVAL; - return NULL; - } - bad_name = 1; - } + bad_name |= check_bad_refname(refname, flags, resolve_flags); + if (bad_name < 0) + return NULL; } } diff --git a/refs/refs-internal.h b/refs/refs-internal.h index efdde82..7cdfffe 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -218,6 +218,26 @@ int do_for_each_per_worktree_ref(const char *submodule, const char *base, int do_for_each_ref(const char *submodule, const char *base, each_ref_fn fn, int trim, int flags, void *cb_data); +/* + * Parse a non-symref -- a buf hopefully containing 40 hex characters. + * Set errno and flags appropriately. If the buf can be parsed but + * bad_name is set, the ref is broken: zero out sha1. + */ +int parse_simple_ref(const char *buf, unsigned char *sha1, unsigned int *flags, + int bad_name); +/* + * Parse a refname out of the contents of a symref into a provided + * strbuf. Return a pointer to the strbuf's contents. + */ +char *parse_symref_data(const char *buf, struct strbuf *sb_refname); + +/* + * Check the format of refname. Set flags and errno appropriately. + * Returns 0 if the refname is good, -1 if it is bad enough that we + * have to stop parsing and 1 if we just have to note that it is bad. + */ +int check_bad_refname(const char *refname, int *flags, int resolve_flags); + /* refs backends */ typedef int ref_init_db_fn(int shared, struct strbuf *err); typedef int ref_transaction_commit_fn(struct ref_transaction *transaction, followed by this version of parse_ref_data: static const char *parse_ref_data(struct lmdb_transaction *transaction, const char *refname, const char *ref_data, unsigned char *sha1, int resolve_flags, int *flags, int bad_name) { int depth = MAXDEPTH; const char *buf; static struct strbuf refname_buffer = STRBUF_INIT; static struct strbuf refdata_buffer = STRBUF_INIT; MDB_val key, val; int needs_free = 0; for (;;) { if (--depth < 0) return NULL; /* * Is it a symbolic ref? */ if (!starts_with(ref_data, "ref:")) { if (parse_simple_ref(ref_data, sha1, flags, bad_name)) refname = NULL; if (is_null_sha1(sha1) && flags) *flags |= REF_ISBROKEN; return refname; } if (flags) *flags |= REF_ISSYMREF; refname = parse_symref_data(ref_data, &refname_buffer); if (resolve_flags & RESOLVE_REF_NO_RECURSE) { hashclr(sha1); return refname; } bad_name |= check_bad_refname(refname, flags, resolve_flags); if (bad_name < 0) return NULL; key.mv_data = (char *)refname; key.mv_size = strlen(refname) + 1; if (mdb_get_or_die(transaction, &key, &val, &needs_free)) { hashclr(sha1); if (bad_name) { if (flags) *flags |= REF_ISBROKEN; } if (resolve_flags & RESOLVE_REF_READING) return NULL; return refname; } strbuf_reset(&refdata_buffer); strbuf_add(&refdata_buffer, val.mv_data, val.mv_size); if (needs_free) free(val.mv_data); ref_data = refdata_buffer.buf; } return refname; } ---------------- I'm not sure I like it, because it breaks out these weird tiny functions that take a lot of arguments. But maybe it's worth it? What do you think? -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html