Re: [PATCH v3 1/2] compat: add a mkstemps() compatibility function

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi David

I think I see a few minor issues with the code dealing with random six
letters generation. Please correct me if I am wrong.

On Thu, May 28, 2009 at 3:13 PM, David Aguilar <davvid@xxxxxxxxx> wrote:
> mkstemps() is a BSD extension so provide an implementation
> for cross-platform use.
>
> Signed-off-by: David Aguilar <davvid@xxxxxxxxx>
> Tested-by: Johannes Sixt <j6t@xxxxxxxx> (Windows)
> ---
>  Makefile          |   19 +++++++++++++++
>  compat/mkstemps.c |   67 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  config.mak.in     |    1 +
>  configure.ac      |    6 ++++
>  git-compat-util.h |    5 ++++
>  5 files changed, 98 insertions(+), 0 deletions(-)
>  create mode 100644 compat/mkstemps.c
>
> diff --git a/Makefile b/Makefile
> index eaae45d..a70b5f0 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -52,6 +52,8 @@ all::
>  #
>  # Define NO_MKDTEMP if you don't have mkdtemp in the C library.
>  #
> +# Define NO_MKSTEMPS if you don't have mkstemps in the C library.
> +#
>  # Define NO_SYS_SELECT_H if you don't have sys/select.h.
>  #
>  # Define NO_SYMLINK_HEAD if you never want .git/HEAD to be a symbolic link.
> @@ -636,10 +638,12 @@ EXTLIBS =
>
>  ifeq ($(uname_S),Linux)
>        NO_STRLCPY = YesPlease
> +       NO_MKSTEMPS = YesPlease
>        THREADED_DELTA_SEARCH = YesPlease
>  endif
>  ifeq ($(uname_S),GNU/kFreeBSD)
>        NO_STRLCPY = YesPlease
> +       NO_MKSTEMPS = YesPlease
>        THREADED_DELTA_SEARCH = YesPlease
>  endif
>  ifeq ($(uname_S),UnixWare)
> @@ -651,6 +655,7 @@ ifeq ($(uname_S),UnixWare)
>        SHELL_PATH = /usr/local/bin/bash
>        NO_IPV6 = YesPlease
>        NO_HSTRERROR = YesPlease
> +       NO_MKSTEMPS = YesPlease
>        BASIC_CFLAGS += -Kthread
>        BASIC_CFLAGS += -I/usr/local/include
>        BASIC_LDFLAGS += -L/usr/local/lib
> @@ -674,6 +679,7 @@ ifeq ($(uname_S),SCO_SV)
>        SHELL_PATH = /usr/bin/bash
>        NO_IPV6 = YesPlease
>        NO_HSTRERROR = YesPlease
> +       NO_MKSTEMPS = YesPlease
>        BASIC_CFLAGS += -I/usr/local/include
>        BASIC_LDFLAGS += -L/usr/local/lib
>        NO_STRCASESTR = YesPlease
> @@ -702,6 +708,7 @@ ifeq ($(uname_S),SunOS)
>        NO_MEMMEM = YesPlease
>        NO_HSTRERROR = YesPlease
>        NO_MKDTEMP = YesPlease
> +       NO_MKSTEMPS = YesPlease
>        OLD_ICONV = UnfortunatelyYes
>        ifeq ($(uname_R),5.8)
>                NO_UNSETENV = YesPlease
> @@ -724,6 +731,7 @@ ifeq ($(uname_O),Cygwin)
>        NO_D_INO_IN_DIRENT = YesPlease
>        NO_STRCASESTR = YesPlease
>        NO_MEMMEM = YesPlease
> +       NO_MKSTEMPS = YesPlease
>        NO_SYMLINK_HEAD = YesPlease
>        NEEDS_LIBICONV = YesPlease
>        NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes
> @@ -767,11 +775,13 @@ ifeq ($(uname_S),NetBSD)
>        BASIC_LDFLAGS += -L/usr/pkg/lib $(CC_LD_DYNPATH)/usr/pkg/lib
>        THREADED_DELTA_SEARCH = YesPlease
>        USE_ST_TIMESPEC = YesPlease
> +       NO_MKSTEMPS = YesPlease
>  endif
>  ifeq ($(uname_S),AIX)
>        NO_STRCASESTR=YesPlease
>        NO_MEMMEM = YesPlease
>        NO_MKDTEMP = YesPlease
> +       NO_MKSTEMPS = YesPlease
>        NO_STRLCPY = YesPlease
>        NO_NSEC = YesPlease
>        FREAD_READS_DIRECTORIES = UnfortunatelyYes
> @@ -787,12 +797,14 @@ endif
>  ifeq ($(uname_S),GNU)
>        # GNU/Hurd
>        NO_STRLCPY=YesPlease
> +       NO_MKSTEMPS = YesPlease
>  endif
>  ifeq ($(uname_S),IRIX64)
>        NO_IPV6=YesPlease
>        NO_SETENV=YesPlease
>        NO_STRCASESTR=YesPlease
>        NO_MEMMEM = YesPlease
> +       NO_MKSTEMPS = YesPlease
>        NO_STRLCPY = YesPlease
>        NO_SOCKADDR_STORAGE=YesPlease
>        SHELL_PATH=/usr/gnu/bin/bash
> @@ -805,6 +817,7 @@ ifeq ($(uname_S),HP-UX)
>        NO_SETENV=YesPlease
>        NO_STRCASESTR=YesPlease
>        NO_MEMMEM = YesPlease
> +       NO_MKSTEMPS = YesPlease
>        NO_STRLCPY = YesPlease
>        NO_MKDTEMP = YesPlease
>        NO_UNSETENV = YesPlease
> @@ -834,6 +847,7 @@ ifneq (,$(findstring MINGW,$(uname_S)))
>        NO_C99_FORMAT = YesPlease
>        NO_STRTOUMAX = YesPlease
>        NO_MKDTEMP = YesPlease
> +       NO_MKSTEMPS = YesPlease
>        SNPRINTF_RETURNS_BOGUS = YesPlease
>        NO_SVN_TESTS = YesPlease
>        NO_PERL_MAKEMAKER = YesPlease
> @@ -853,6 +867,7 @@ ifneq (,$(findstring MINGW,$(uname_S)))
>  endif
>  ifneq (,$(findstring arm,$(uname_M)))
>        ARM_SHA1 = YesPlease
> +       NO_MKSTEMPS = YesPlease
>  endif
>
>  -include config.mak.autogen
> @@ -1011,6 +1026,10 @@ ifdef NO_MKDTEMP
>        COMPAT_CFLAGS += -DNO_MKDTEMP
>        COMPAT_OBJS += compat/mkdtemp.o
>  endif
> +ifdef NO_MKSTEMPS
> +       COMPAT_CFLAGS += -DNO_MKSTEMPS
> +       COMPAT_OBJS += compat/mkstemps.o
> +endif
>  ifdef NO_UNSETENV
>        COMPAT_CFLAGS += -DNO_UNSETENV
>        COMPAT_OBJS += compat/unsetenv.o
> diff --git a/compat/mkstemps.c b/compat/mkstemps.c
> new file mode 100644
> index 0000000..87ebc2a
> --- /dev/null
> +++ b/compat/mkstemps.c
> @@ -0,0 +1,67 @@
> +#include "../git-compat-util.h"
> +
> +#ifndef TMP_MAX
> +#define TMP_MAX 16384
> +#endif
> +
> +/* Adapted from libiberty's mkstemp.c. */
> +int gitmkstemps(char *pattern, int suffix_len)
> +{
> +       static const char letters[] =
> +               "abcdefghijklmnopqrstuvwxyz"
> +               "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
> +               "0123456789";
> +       static const int num_letters = 62;
> +       uint64_t value;
> +       struct timeval tv;
> +       char *template;
> +       size_t len;
> +       int fd, count;
> +
> +       len = strlen(pattern);
> +
> +       if (len < 6 + suffix_len) {
> +               errno = EINVAL;
> +               return -1;
> +       }
> +
> +       if (strncmp(&pattern[len - 6 - suffix_len], "XXXXXX", 6)) {
> +               errno = EINVAL;
> +               return -1;
> +       }
> +
> +       /* Replace pattern's XXXXXX characters with randomness.
> +        * Try TMP_MAX different filenames.
> +        */
> +       gettimeofday(&tv, NULL);
> +       value = ((size_t)(tv.tv_usec << 16)) ^ tv.tv_sec ^ getpid();
> +       template = &pattern[len - 6 - suffix_len];
> +       for (count = 0; count < TMP_MAX; ++count) {
> +               uint64_t v = value;
> +               /* Fill in the random bits. */
> +               template[0] = letters[v % num_letters]; v/= num_letters;
> +               template[1] = letters[v % num_letters]; v/= num_letters;
> +               template[2] = letters[v % num_letters]; v/= num_letters;
> +               template[3] = letters[v % num_letters]; v/= num_letters;
> +               template[4] = letters[v % num_letters]; v/= num_letters;
> +               template[5] = letters[v % num_letters]; v/= num_letters;

v is divided 6 times by 62, and a decent amount of randomness shall be
ensured only if 'value' is usually greater than 62^6.

If it is assumed to be ok for 'v' to become zero in the last one odd
steps sometimes (in fact, _frequently_), then you may ignore what I am
pointing out here.

1. On a couple of systems that I checked on, all variables/types on
the right hand side of
      value = ((size_t)(tv.tv_usec << 16)) ^ tv.tv_sec ^ getpid();
are 4 bytes long. So 'value' is ultimately going to be assigned a
number that fits in 4 bytes, ie value < 2^32 (at least on a few
systems).
The systems that I checked on have these `uname -a` outputs:
-   Linux host 2.6.9-55.ELsmp #1 SMP Fri Apr 20 17:03:35 EDT 2007 i686
i686 i386 GNU/Linux
-   Linux host 2.4.21-50.ELsmp #1 SMP Tue May 8 17:18:29 EDT 2007 i686
i686 i386 GNU/Linux

Also, even if size_t were 64 bits, typecasting _after_ the shift does
not help much. And given the uncertainty about sizeof (size_t), we
could use:
  ((uint64_t) tv.tv_usec) << 16)


2. tv_usec has a decimal value range of 0-999999  (10^6 usec make 1
sec). Which means that tv_usec fits completely in 20 bits (or less).
(tv_usec << 16) yields a number that fits completely in 36 bits (or
less). Max value of this number is 999999 * 2^16, or for convenience,
about M = 10^6 * 2^16. This number (in the range of 0 to M) goes on to
be divided by D=62^6. Also, M > D. Thus, there is about a  D / M * 100
~ 87 % probability of the division M / D working out to be zero. i.e,
in 87% cases, the last division (v/= num_letters) will cause 'v' to
become 0.
Therefore, I think an additional shift of 7 or more bits will help in
improving randomness of template[5]. I would suggest left shifting by
24 bits. This seemed ok in some tests I did.


Combining #1 and #2, I guess we could have the computation of 'value' as:
      value = (((uint64_t)tv.tv_usec) << 24) ^ tv.tv_sec ^ getpid();


- Antriksh
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]