Re: [PATCHv2] submodule--helper: initial clone learns retry logic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jun 9, 2016 at 5:35 PM, Stefan Beller <sbeller@xxxxxxxxxx> wrote:
> Each submodule that is attempted to be cloned, will be retried once in
> case of failure after all other submodules were cloned. This helps to
> mitigate ephemeral server failures and increases chances of a reliable
> clone of a repo with hundreds of submodules immensely.
>
> Signed-off-by: Stefan Beller <sbeller@xxxxxxxxxx>
> ---
>
>  This patch doesn't do pointer forbidden magic, also we don't abuse the
>  prio queue.
>
>  The API for running parallel commands isn't quite designed for this
>  though as we need to pass around an int instead of a pointer, so we wrap
>  the int into a int*, which will create a xmalloc/free for each submodule.
>
>  This replaces the patch [1/2] in the series
>  "[PATCH 0/2] Dealing with a lot of submodules"
>
>  Thanks,
>  Stefan
>
>  builtin/submodule--helper.c | 66 ++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 59 insertions(+), 7 deletions(-)
>
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index c7deb55..6424b40 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -590,10 +590,14 @@ struct submodule_update_clone {
>
>         /* If we want to stop as fast as possible and return an error */
>         unsigned quickstop : 1;
> +
> +       /* failed clones to be retried again */
> +       const struct cache_entry **failed_clones;
> +       int failed_clones_nr, failed_clones_alloc;
>  };
>  #define SUBMODULE_UPDATE_CLONE_INIT {0, MODULE_LIST_INIT, 0, \
>         SUBMODULE_UPDATE_STRATEGY_INIT, 0, -1, NULL, NULL, NULL, NULL, \
> -       STRING_LIST_INIT_DUP, 0}
> +       STRING_LIST_INIT_DUP, 0, NULL, 0, 0}
>
>
>  static void next_submodule_warn_missing(struct submodule_update_clone *suc,
> @@ -718,23 +722,47 @@ cleanup:
>  static int update_clone_get_next_task(struct child_process *child,
>                                       struct strbuf *err,
>                                       void *suc_cb,
> -                                     void **void_task_cb)
> +                                     void **idx_task_cb)
>  {
>         struct submodule_update_clone *suc = suc_cb;
> +       const struct cache_entry *ce;
> +       int index;
>
>         for (; suc->current < suc->list.nr; suc->current++) {
> -               const struct cache_entry *ce = suc->list.entries[suc->current];
> +               ce = suc->list.entries[suc->current];
>                 if (prepare_to_clone_next_submodule(ce, child, suc, err)) {
> +                       int *p = xmalloc(sizeof(*p));
> +                       *p = suc->current;
> +                       *idx_task_cb = p;
>                         suc->current++;
>                         return 1;
>                 }
>         }
> +
> +       /*
> +        * The loop above tried cloning each submodule once, now try the
> +        * stragglers again, which we can imagine as an extension of the
> +        * entry list.
> +        */
> +       index = suc->current - suc->list.nr;
> +       if (index < suc->failed_clones_nr) {
> +               int *p;
> +               ce = suc->failed_clones[index];
> +               if (!prepare_to_clone_next_submodule(ce, child, suc, err))
> +                       die("BUG: ce was a submodule before?");
> +               p = xmalloc(sizeof(*p));
> +               *p = suc->current;
> +               *idx_task_cb = p;
> +               suc->current ++;
> +               return 1;
> +       }
> +
>         return 0;
>  }
>
>  static int update_clone_start_failure(struct strbuf *err,
>                                       void *suc_cb,
> -                                     void *void_task_cb)
> +                                     void *idx_task_cb)
>  {

Here we would leak the pointer, but as we quickstop soon, this may not
be a problem.

>         struct submodule_update_clone *suc = suc_cb;
>         suc->quickstop = 1;
> @@ -744,15 +772,39 @@ static int update_clone_start_failure(struct strbuf *err,
>  static int update_clone_task_finished(int result,
>                                       struct strbuf *err,
>                                       void *suc_cb,
> -                                     void *void_task_cb)
> +                                     void *idx_task_cb)
>  {
> +       const struct cache_entry *ce;
>         struct submodule_update_clone *suc = suc_cb;
>
> +       int *idxP = *(int**)idx_task_cb;
> +       int idx = *idxP;
> +       free(idxP);
> +
>         if (!result)
>                 return 0;
>
> -       suc->quickstop = 1;
> -       return 1;
> +       if (idx < suc->list.nr) {
> +               ce  = suc->list.entries[idx];
> +               strbuf_addf(err, _("Failed to clone '%s'. Retry scheduled"),
> +                           ce->name);
> +               strbuf_addch(err, '\n');
> +               ALLOC_GROW(suc->failed_clones,
> +                          suc->failed_clones_nr + 1,
> +                          suc->failed_clones_alloc);
> +               suc->failed_clones[suc->failed_clones_nr++] = ce;
> +               return 0;
> +       } else {
> +               idx = suc->current - suc->list.nr;
> +               ce  = suc->failed_clones[idx];
> +               strbuf_addf(err, _("Failed to clone '%s' a second time, aborting"),
> +                           ce->name);
> +               strbuf_addch(err, '\n');
> +               suc->quickstop = 1;
> +               return 1;
> +       }
> +
> +       return 0;
>  }

I wonder how we can test this properly as the git binary we have here
is too reliable,
but I observed the correctness of this patch when cloning a repo with
lots of submodules
and one of them failing.

Thanks,
Stefan
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]