On Fri, Mar 21, 2014 at 08:51:18AM +0700, Duy Nguyen wrote: > On Thu, Mar 20, 2014 at 11:08 PM, Stefan Zager <szager@xxxxxxxxxxxx> wrote: > > Duy, would you like to re-post your patch without the new pread implementation? > > I will but let me try out the sliding window idea first. My quick > tests on git.git show me we may only need 21k mmap instead of 177k > pread. That hints some potential performance improvement. The patch at the bottom reuses (un)use_pack() instead of pread(). The results on linux-2.6 do not look any different. I guess we can drop the idea. It makes me wonder, though, what's wrong a simple patch like this to make pread in index-pack thread-safe? It does not look any different either from the performance point of view, perhaps because unpack_data() reads small deltas most of the time -- 8< -- diff --git a/builtin/index-pack.c b/builtin/index-pack.c index a6b1c17..b91f4f8 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -40,11 +40,6 @@ struct base_data { int ofs_first, ofs_last; }; -#if !defined(NO_PTHREADS) && defined(NO_THREAD_SAFE_PREAD) -/* pread() emulation is not thread-safe. Disable threading. */ -#define NO_PTHREADS -#endif - struct thread_local { #ifndef NO_PTHREADS pthread_t thread; @@ -175,6 +170,22 @@ static void cleanup_thread(void) #endif +#if defined(NO_THREAD_SAFE_PREAD) +static inline ssize_t pread_safe(int fd, void *buf, size_t count, off_t off) +{ + int ret; + read_lock(); + ret = pread(fd, buf, count, off); + read_unlock(); + return ret; +} +#else +static inline ssize_t pread_safe(int fd, void *buf, size_t count, off_t off) +{ + return pread(fd, buf, count, off); +} +#endif + static int mark_link(struct object *obj, int type, void *data) { if (!obj) @@ -542,7 +553,7 @@ static void *unpack_data(struct object_entry *obj, do { ssize_t n = (len < 64*1024) ? len : 64*1024; - n = pread(pack_fd, inbuf, n, from); + n = pread_safe(pack_fd, inbuf, n, from); if (n < 0) die_errno(_("cannot pread pack file")); if (!n) -- 8< -- And the sliding window patch for the list archive -- 8< -- diff --git a/builtin/index-pack.c b/builtin/index-pack.c index a6b1c17..6f5c6d9 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -91,7 +91,8 @@ static off_t consumed_bytes; static unsigned deepest_delta; static git_SHA_CTX input_ctx; static uint32_t input_crc32; -static int input_fd, output_fd, pack_fd; +static int input_fd, output_fd; +static struct packed_git *pack; #ifndef NO_PTHREADS @@ -224,8 +225,10 @@ static unsigned check_objects(void) static void flush(void) { if (input_offset) { - if (output_fd >= 0) + if (output_fd >= 0) { write_or_die(output_fd, input_buffer, input_offset); + pack->pack_size += input_offset; + } git_SHA1_Update(&input_ctx, input_buffer, input_offset); memmove(input_buffer, input_buffer + input_offset, input_len); input_offset = 0; @@ -277,6 +280,10 @@ static void use(int bytes) static const char *open_pack_file(const char *pack_name) { + pack = xmalloc(sizeof(*pack) + 1); + memset(pack, 0, sizeof(*pack)); + pack->pack_name[0] = '\0'; + if (from_stdin) { input_fd = 0; if (!pack_name) { @@ -288,13 +295,17 @@ static const char *open_pack_file(const char *pack_name) output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600); if (output_fd < 0) die_errno(_("unable to create '%s'"), pack_name); - pack_fd = output_fd; + pack->pack_fd = output_fd; } else { + struct stat st; input_fd = open(pack_name, O_RDONLY); if (input_fd < 0) die_errno(_("cannot open packfile '%s'"), pack_name); + if (lstat(pack_name, &st)) + die_errno(_("cannot stat packfile '%s'"), pack_name); output_fd = -1; - pack_fd = input_fd; + pack->pack_fd = input_fd; + pack->pack_size = st.st_size; } git_SHA1_Init(&input_ctx); return pack_name; @@ -531,9 +542,15 @@ static void *unpack_data(struct object_entry *obj, unsigned char *data, *inbuf; git_zstream stream; int status; + struct pack_window *w_cursor = NULL; + + if (from + len > pack->pack_size) + die(Q_("premature end of pack file, %lu byte missing", + "premature end of pack file, %lu bytes missing", + from + len - pack->pack_size), + (unsigned long)(from + len - pack->pack_size)); data = xmalloc(consume ? 64*1024 : obj->size); - inbuf = xmalloc((len < 64*1024) ? len : 64*1024); memset(&stream, 0, sizeof(stream)); git_inflate_init(&stream); @@ -541,15 +558,12 @@ static void *unpack_data(struct object_entry *obj, stream.avail_out = consume ? 64*1024 : obj->size; do { - ssize_t n = (len < 64*1024) ? len : 64*1024; - n = pread(pack_fd, inbuf, n, from); - if (n < 0) - die_errno(_("cannot pread pack file")); - if (!n) - die(Q_("premature end of pack file, %lu byte missing", - "premature end of pack file, %lu bytes missing", - len), - len); + ssize_t n; + unsigned long left; + read_lock(); + inbuf = use_pack(pack, &w_cursor, from, &left); + read_unlock(); + n = left > len ? len : left; from += n; len -= n; stream.next_in = inbuf; @@ -568,6 +582,9 @@ static void *unpack_data(struct object_entry *obj, stream.avail_out = 64*1024; } while (status == Z_OK && stream.avail_in); } + read_lock(); + unuse_pack(&w_cursor); + read_unlock(); } while (len && status == Z_OK && !stream.avail_in); /* This has been inflated OK when first encountered, so... */ @@ -575,7 +592,6 @@ static void *unpack_data(struct object_entry *obj, die(_("serious inflate inconsistency")); git_inflate_end(&stream); - free(inbuf); if (consume) { free(data); data = NULL; @@ -1657,6 +1673,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) free(objects); free(index_name_buf); free(keep_name_buf); + close_pack_windows(pack); + free(pack); if (pack_name == NULL) free((void *) curr_pack); if (index_name == NULL) diff --git a/sha1_file.c b/sha1_file.c index 187f5a6..aa0b16d 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -977,7 +977,13 @@ unsigned char *use_pack(struct packed_git *p, */ if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p)) die("packfile %s cannot be accessed", p->pack_name); - if (offset > (p->pack_size - 20)) + /* + * index-pack uses this function even if the pack is not + * complete yet (i.e. trailing SHA-1 missing). Loosen the + * check a bit in this case (pack_empty name uses as the + * indicator). + */ + if (offset > (p->pack_size - (*p->pack_name ? 20 : 0))) die("offset beyond end of packfile (truncated pack?)"); if (!win || !in_window(win, offset)) { -- 8< -- -- Duy -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html