Re: [mdadm PATCH] bcache: add bcache superblock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Dan,

So this is the alternate interface for bcache tools using mdadm to
manage bcache?
If so, could you give a example of how to using this.

Best regards.

Jack

2012/5/12 Dan Williams <dan.j.williams@xxxxxxxxx>:
> This is a hybrid proposal for supporting bcache as a md device.
> Somewhat similar to the v1.x metadata format, where array assembly is
> handled in userspace, but managed in the kernel.  In the bcache case it
> is an "external" metadata format, but then the expectation is that the
> kernel "bcache" personality takes over runtime maintenance of the
> metadata.
>
> The container id for bcache is the "cache_set".  The subvolume is the
> backing device identifier.
>
> This initial version only supports the runtime static portion of the
> superblock, it will need to grow the ability to read the journal to
> report the backing devices associated with a given cache set (i.e. in
> the superblock backing devices know their cache_set container, but cache
> devices need to look elsewhere to find their backing devices).
>
> Cc: Kent Overstreet <koverstreet@xxxxxxxxxx>
> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
> ---
>  Assemble.c     |    1
>  Makefile       |   11 +
>  bcache.h       |   98 +++++++++
>  crc64.c        |  129 +++++++++++
>  maps.c         |    2
>  mdadm.h        |    2
>  super-bcache.c |  634 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  util.c         |    2
>  8 files changed, 873 insertions(+), 6 deletions(-)
>  create mode 100644 bcache.h
>  create mode 100644 crc64.c
>  create mode 100644 super-bcache.c
>
> diff --git a/Assemble.c b/Assemble.c
> index fd94461..267a2ce 100644
> --- a/Assemble.c
> +++ b/Assemble.c
> @@ -1594,6 +1594,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
>                } else switch(content->array.level) {
>                case LEVEL_LINEAR:
>                case LEVEL_MULTIPATH:
> +               case LEVEL_BCACHE:
>                case 0:
>                        err = sysfs_set_str(content, NULL, "array_state",
>                                            "active");
> diff --git a/Makefile b/Makefile
> index b8d363f..7886d13 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -103,8 +103,8 @@ OBJS =  mdadm.o config.o policy.o mdstat.o  ReadMe.o util.o maps.o lib.o \
>        Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
>        Incremental.o \
>        mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
> -       super-mbr.o super-gpt.o \
> -       restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o \
> +       super-mbr.o super-gpt.o super-bcache.o \
> +       restripe.o sysfs.o sha1.o mapfile.o crc32.o crc64.o sg_io.o msg.o \
>        platform-intel.o probe_roms.o
>
>  CHECK_OBJS = restripe.o sysfs.o maps.o lib.o
> @@ -116,8 +116,8 @@ INCL = mdadm.h part.h bitmap.h
>  MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \
>        config.o policy.o lib.o \
>        Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \
> -       super-mbr.o super-gpt.o \
> -       super-ddf.o sha1.o crc32.o msg.o bitmap.o \
> +       super-mbr.o super-gpt.o super-bcache.o \
> +       super-ddf.o sha1.o crc32.o crc64.o msg.o bitmap.o \
>        platform-intel.o probe_roms.o
>
>  MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS))
> @@ -128,7 +128,8 @@ STATICOBJS = pwgr.o
>  ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \
>        maps.c lib.c \
>        super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \
> -       platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c
> +       platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c \
> +       super-bcache.c crc64.c
>  ASSEMBLE_AUTO_SRCS := mdopen.c
>  ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
>  ifdef MDASSEMBLE_AUTO
> diff --git a/bcache.h b/bcache.h
> new file mode 100644
> index 0000000..765e369
> --- /dev/null
> +++ b/bcache.h
> @@ -0,0 +1,98 @@
> +#ifndef _BCACHE_H
> +#define _BCACHE_H
> +
> +#include <stdint.h>
> +
> +#define BITMASK(name, type, field, offset, size)               \
> +static inline uint64_t name(const type *k)                     \
> +{                                                              \
> +       uint64_t field = __le64_to_cpu(k->field);               \
> +       return (field >> offset) & ~(((uint64_t) ~0) << size);  \
> +}                                                              \
> +                                                               \
> +static inline void SET_##name(type *k, uint64_t v)             \
> +{                                                              \
> +       uint64_t field = __le64_to_cpu(k->field);               \
> +       field &= ~(~((uint64_t) ~0 << size) << offset);         \
> +       field |= v << offset;                                   \
> +       k->field = __cpu_to_le64(field);                        \
> +}
> +
> +static const char bcache_magic[] = {
> +       0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
> +       0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 };
> +
> +/* Version 1: Backing dev
> + * Version 2: Seed pointer into btree node checksum
> + * Version 3: Backing dev superblock has offset of start of data
> + */
> +
> +#define BCACHE_SB_BDEV_VERSION 3
> +#define BCACHE_SB_MAX_VERSION  3
> +
> +#define SB_SECTOR              8
> +#define SB_SIZE                        16 /* default data_offset in bcache-tools (?) */
> +#define SB_LABEL_SIZE          32
> +
> +struct cache_sb {
> +       uint64_t                csum;
> +       uint64_t                offset; /* sector where this sb was written */
> +       uint64_t                version;
> +#define CACHE_BACKING_DEV      1
> +
> +       uint8_t                 magic[16];
> +
> +       uint8_t                 uuid[16];
> +       union {
> +               uint8_t         set_uuid[16];
> +               uint64_t        set_magic;
> +       };
> +       uint8_t                 label[SB_LABEL_SIZE];
> +
> +       uint64_t                flags;
> +       uint64_t                seq;
> +       uint64_t                pad[8];
> +
> +       uint64_t                nbuckets;       /* device size */
> +       uint16_t                block_size;     /* sectors */
> +       uint16_t                bucket_size;    /* sectors */
> +
> +       uint16_t                nr_in_set;
> +       uint16_t                nr_this_dev;
> +
> +       uint32_t                last_mount;     /* time_t */
> +
> +       uint16_t                first_bucket;
> +       uint16_t                keys;           /* number of journal buckets */
> +       uint64_t                d[];            /* journal buckets */
> +};
> +
> +static inline int SB_BDEV(struct cache_sb *c)
> +{
> +       return __le64_to_cpu(c->version) == CACHE_BACKING_DEV;
> +}
> +
> +BITMASK(CACHE_SYNC,     struct cache_sb, flags, 0, 1);
> +BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
> +BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
> +
> +BITMASK(BDEV_CACHE_MODE,        struct cache_sb, flags, 0, 4);
> +#define CACHE_MODE_WRITETHROUGH 0U
> +#define CACHE_MODE_WRITEBACK    1U
> +#define CACHE_MODE_WRITEAROUND  2U
> +#define CACHE_MODE_NONE         3U
> +BITMASK(BDEV_STATE,             struct cache_sb, flags, 61, 2);
> +#define BDEV_STATE_NONE         0U
> +#define BDEV_STATE_CLEAN        1U
> +#define BDEV_STATE_DIRTY        2U
> +#define BDEV_STATE_STALE        3U
> +
> +inline uint64_t crc64(const void *_data, size_t len);
> +
> +#define node(i, j)             ((void *) ((i)->d + (j)))
> +#define end(i)                 node(i, (i)->keys)
> +
> +#define csum_set(i)                                                    \
> +       crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8))
> +
> +#endif
> diff --git a/crc64.c b/crc64.c
> new file mode 100644
> index 0000000..8f37445
> --- /dev/null
> +++ b/crc64.c
> @@ -0,0 +1,129 @@
> +#define _GNU_SOURCE
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <unistd.h>
> +
> +/*
> + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
> + * use permitted, subject to terms of PostgreSQL license; see.)
> +
> + * If we have a 64-bit integer type, then a 64-bit CRC looks just like the
> + * usual sort of implementation. (See Ross Williams' excellent introduction
> + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
> + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.)
> + * If we have no working 64-bit type, then fake it with two 32-bit registers.
> + *
> + * The present implementation is a normal (not "reflected", in Williams'
> + * terms) 64-bit CRC, using initial all-ones register contents and a final
> + * bit inversion. The chosen polynomial is borrowed from the DLT1 spec
> + * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM):
> + *
> + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
> + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
> + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
> + * x^7 + x^4 + x + 1
> +*/
> +
> +static const uint64_t crc_table[256] = {
> +       0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL,
> +       0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL,
> +       0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL,
> +       0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL,
> +       0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL,
> +       0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL,
> +       0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL,
> +       0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL,
> +       0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL,
> +       0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL,
> +       0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL,
> +       0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL,
> +       0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL,
> +       0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL,
> +       0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL,
> +       0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL,
> +       0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL,
> +       0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL,
> +       0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL,
> +       0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL,
> +       0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL,
> +       0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL,
> +       0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL,
> +       0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL,
> +       0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL,
> +       0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL,
> +       0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL,
> +       0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL,
> +       0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL,
> +       0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL,
> +       0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL,
> +       0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL,
> +       0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL,
> +       0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL,
> +       0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL,
> +       0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL,
> +       0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL,
> +       0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL,
> +       0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL,
> +       0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL,
> +       0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL,
> +       0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL,
> +       0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL,
> +       0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL,
> +       0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL,
> +       0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL,
> +       0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL,
> +       0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL,
> +       0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL,
> +       0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL,
> +       0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL,
> +       0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL,
> +       0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL,
> +       0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL,
> +       0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL,
> +       0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL,
> +       0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL,
> +       0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL,
> +       0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL,
> +       0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL,
> +       0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL,
> +       0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL,
> +       0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL,
> +       0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL,
> +       0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL,
> +       0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL,
> +       0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL,
> +       0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL,
> +       0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL,
> +       0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL,
> +       0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL,
> +       0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL,
> +       0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL,
> +       0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL,
> +       0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL,
> +       0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL,
> +       0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL,
> +       0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL,
> +       0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL,
> +       0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL,
> +       0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL,
> +       0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL,
> +       0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL,
> +       0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL,
> +       0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL,
> +       0x9AFCE626CE85B507ULL
> +};
> +
> +inline uint64_t crc64(const void *_data, size_t len)
> +{
> +       uint64_t crc = 0xFFFFFFFFFFFFFFFFULL;
> +       const unsigned char *data = _data;
> +
> +       while (len--) {
> +               int i = ((int) (crc >> 56) ^ *data++) & 0xFF;
> +               crc = crc_table[i] ^ (crc << 8);
> +       }
> +
> +       return crc ^ 0xFFFFFFFFFFFFFFFFULL;
> +}
> diff --git a/maps.c b/maps.c
> index f2ba9a7..cedf548 100644
> --- a/maps.c
> +++ b/maps.c
> @@ -94,6 +94,8 @@ mapping_t pers[] = {
>        { "10", 10},
>        { "faulty", LEVEL_FAULTY},
>        { "container", LEVEL_CONTAINER},
> +       { "bcache", LEVEL_BCACHE},
> +       { "11", LEVEL_BCACHE},
>        { NULL, 0}
>  };
>
> diff --git a/mdadm.h b/mdadm.h
> index 3bcd052..a0ccff6 100644
> --- a/mdadm.h
> +++ b/mdadm.h
> @@ -816,6 +816,7 @@ extern struct superswitch {
>  extern struct superswitch super0, super1;
>  extern struct superswitch super_imsm, super_ddf;
>  extern struct superswitch mbr, gpt;
> +extern struct superswitch super_bcache;
>
>  struct metadata_update {
>        int     len;
> @@ -1296,6 +1297,7 @@ static inline int xasprintf(char **strp, const char *fmt, ...) {
>  #define        LEVEL_MULTIPATH         (-4)
>  #define        LEVEL_LINEAR            (-1)
>  #define        LEVEL_FAULTY            (-5)
> +#define LEVEL_BCACHE           (0xb)
>
>  /* kernel module doesn't know about these */
>  #define LEVEL_CONTAINER                (-100)
> diff --git a/super-bcache.c b/super-bcache.c
> new file mode 100644
> index 0000000..ec8f3db
> --- /dev/null
> +++ b/super-bcache.c
> @@ -0,0 +1,634 @@
> +/*
> + * mdadm - bcache support
> + *
> + * Copyright (C) 2012 Intel Corporation
> + *
> + * bcache definitions copied from bcache-tools:
> + * git://evilpiepirate.org/~kent/bcache-tools.git
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +#define HAVE_STDINT_H 1
> +#include "mdadm.h"
> +#include "bcache.h"
> +
> +struct bcache_super {
> +       union {
> +               struct cache_sb *sb;
> +               void *buf;
> +       };
> +       struct dl {
> +               int major, minor;
> +               char *devname;
> +               int fd;
> +       } *disk;
> +       int vol;
> +       struct bcache_super *next;
> +};
> +
> +enum {
> +       /* FIXME this is a function of the bucket size */
> +       BCACHE_MAX_DEVICES = 2,
> +};
> +
> +static int load_cache_sb(struct bcache_super *super, int keep_fd)
> +{
> +       struct dl *d = super->disk;
> +       int rc, fd = d->fd;
> +       struct cache_sb *c;
> +       struct stat s;
> +
> +       if (!keep_fd)
> +               d->fd = -1;
> +
> +       rc = fstat(fd, &s);
> +       if (rc)
> +               return rc;
> +       d->major = major(s.st_rdev);
> +       d->minor = minor(s.st_rdev);
> +
> +       rc = posix_memalign(&super->buf, 4096, 4096);
> +       if (rc)
> +               return rc;
> +       c = super->sb;
> +
> +       if (pread(fd, c, 4096, SB_SECTOR << 9) != 4096)
> +               return errno;
> +
> +       if (csum_set(c) != __le64_to_cpu(c->csum))
> +               return ENODEV;
> +
> +       if (memcmp(c->magic, bcache_magic, sizeof(bcache_magic)) != 0)
> +               return ENODEV;
> +
> +       return 0;
> +}
> +
> +static void __free_bcache(struct bcache_super *super)
> +{
> +       if (!super)
> +               return;
> +
> +       while (super) {
> +               struct bcache_super *next = super->next;
> +               struct dl *d = super->disk;
> +
> +               d = super->disk;
> +               if (d->fd >= 0)
> +                       close(d->fd);
> +               free(d->devname);
> +               free(d);
> +               free(super->sb);
> +               free(super);
> +               super = next;
> +       }
> +}
> +
> +static void free_bcache(struct supertype *st)
> +{
> +       struct bcache_super *super = st->sb;
> +
> +       __free_bcache(super);
> +       st->sb = NULL;
> +}
> +
> +#ifndef MDASSEMBLE
> +static void examine_bcache(struct supertype *st, char *homehost)
> +{
> +       const char *const cache_policies[] = { "lru", "fifo", "random", "" };
> +       const char *const bdev_states[] = { "none", "clean", "dirty", "stale" };
> +       const char *const bdev_modes[16] = { "writethrough", "writeback", "writearound", "none" };
> +       struct bcache_super *super = st->sb;
> +       uint16_t first_bucket, bucket_size;
> +       struct cache_sb *c = super->sb;
> +       uint64_t nbuckets, csum;
> +       unsigned long long sz;
> +       char nbuf[64];
> +
> +       printf("       Magic : %s\n",
> +              memcmp(bcache_magic, c->magic, 16) ? "<unknown>" : "<bcache>");
> +       printf("     Version : %d\n", (int) c->version);
> +       printf("        Role : %s\n", SB_BDEV(c) ? "backing-device" : "cache");
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       printf("    Set UUID : %s\n", nbuf + 5);
> +       __fname_from_uuid((int *) c->uuid, 0, nbuf, ':');
> +       printf("  Cache Devs : %u\n", c->nr_in_set);
> +       /* FIXME: list all cache dev uuids in the load_container case */
> +       printf(" Device UUID : %s\n", nbuf + 5);
> +       printf("       Flags :%s%s\n", CACHE_DISCARD(c) ? " discard" : "",
> +                                      CACHE_SYNC(c) ? " sync" : "");
> +       if (SB_BDEV(c)) {
> +               printf("       State : %s\n", bdev_states[BDEV_STATE(c)]);
> +               printf("        Mode : %s\n", bdev_modes[BDEV_CACHE_MODE(c)]);
> +       } else {
> +               printf("      Policy : %s\n", cache_policies[CACHE_REPLACEMENT(c)]);
> +               /* FIXME: add reporting of backing device uuids in the cache caase */
> +       }
> +       printf("       Label : %.32s\n", c->label);
> +       csum = __le64_to_cpu(c->csum);
> +       nbuckets = __le64_to_cpu(c->nbuckets);
> +       bucket_size = __le16_to_cpu(c->bucket_size);
> +       first_bucket = __le16_to_cpu(c->first_bucket);
> +       sz = (nbuckets - first_bucket) * bucket_size;
> +       printf(" Device Size : %llu%s\n", sz, human_size(sz * 512));
> +       printf(" Bucket Size : %u\n", bucket_size);
> +       printf(" Num Buckets : %llu\n", (unsigned long long) nbuckets);
> +       printf("    this dev : %u\n", __le16_to_cpu(c->nr_this_dev));
> +       printf("First Bucket : %u\n", first_bucket);
> +       printf("    Checksum : %llx %s\n", (unsigned long long) csum,
> +              csum == csum_set(c) ? "correct" : "incorrect");
> +}
> +
> +static void brief_examine_bcache(struct supertype *st, int verbose)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +       char nbuf[64];
> +
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       printf("ARRAY metadata=bcache UUID=%s\n", nbuf + 5);
> +}
> +
> +static void brief_examine_subarrays_bcache(struct supertype *st, int verbose)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +       char nbuf[64], nbuf1[64];
> +
> +       /* FIXME this needs to parse the cache device journal to find
> +        * and report the backing dev uuid list
> +        */
> +       if (!SB_BDEV(c))
> +               return;
> +
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       __fname_from_uuid((int *) c->uuid, 0, nbuf1, ':');
> +
> +       printf("ARRAY container=%s UUID=%s\n", nbuf + 5, nbuf1 + 5);
> +}
> +
> +static void export_examine_bcache(struct supertype *st)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +       char nbuf[64];
> +
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       printf("MD_METADATA=bcache\n");
> +       printf("MD_LEVEL=container\n");
> +       printf("MD_UUID=%s\n", nbuf+5);
> +       printf("MD_DEVICES=%d\n", __le16_to_cpu(c->nr_in_set) + 1);
> +}
> +
> +static void detail_bcache(struct supertype *st, char *homehost)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +       char nbuf[64];
> +
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       printf("\n           UUID : %s\n", nbuf + 5);
> +}
> +
> +static void brief_detail_bcache(struct supertype *st)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +       char nbuf[64];
> +
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       printf(" UUID=%s", nbuf + 5);
> +}
> +
> +static struct bcache_super *alloc_super(const char *func)
> +{
> +       struct bcache_super *super = calloc(1, sizeof(*super));
> +       struct dl *d = calloc(1, sizeof(*d));
> +
> +       if (!super || !d) {
> +               fprintf(stderr, Name "%s: %s failed\n", func, __func__);
> +               free(super);
> +               free(d);
> +               return NULL;
> +       }
> +
> +       super->vol = -1;
> +       super->disk = d;
> +
> +       return super;
> +}
> +
> +static int load_container_bcache(struct supertype *st, int fd, char *devname)
> +{
> +       struct bcache_super *list = NULL;
> +       int rc, i, cdev = 0, bdev = 0;
> +       int devnum = fd2devnum(fd);
> +       struct mdinfo *sra, *sd;
> +
> +       sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
> +       if (!sra)
> +               return 1;
> +
> +       if (sra->array.major_version != -1 ||
> +           sra->array.minor_version != -2 ||
> +           strcmp(sra->text_version, "bcache") != 0) {
> +               rc = 1;
> +               goto error;
> +       }
> +
> +       for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
> +               struct bcache_super *super = alloc_super(__func__);
> +               struct cache_sb *c;
> +               char nm[32];
> +               int fd;
> +
> +               rc = 1;
> +               if (!super)
> +                       goto error;
> +               super->next = list;
> +               list = super;
> +
> +               rc = 2;
> +               sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
> +               fd = dev_open(nm, O_RDWR);
> +               if (fd < 0)
> +                       goto error;
> +
> +               super->disk->fd = fd;
> +               rc = load_cache_sb(super, 1);
> +               if (rc)
> +                       goto error;
> +               c = super->sb;
> +               if (SB_BDEV(c))
> +                       bdev++;
> +               else
> +                       cdev++;
> +       }
> +       rc = 0;
> +
> +       /* FIXME disambiguate multiple bdevs per set, support multiple
> +        * cache devices
> +        */
> +       if (bdev > 1) {
> +               fprintf(stderr, Name ": %d backing devices detected\n", bdev);
> +               rc = 3;
> +       }
> +       if (cdev > 1) {
> +               fprintf(stderr, Name ": %d cache devices detected\n", cdev);
> +               rc = 3;
> +       }
> +       if (rc)
> +               goto error;
> +       st->sb = list;
> +       list = NULL;
> +
> +error:
> +       if (list)
> +               __free_bcache(list);
> +       sysfs_free(sra);
> +
> +       st->container_dev = devnum;
> +       if (rc == 0 && st->ss == NULL) {
> +               st->ss = &super_bcache;
> +               st->minor_version = 0;
> +               st->max_devs = BCACHE_MAX_DEVICES;
> +       }
> +       return rc;
> +}
> +#endif
> +
> +static int load_bcache(struct supertype *st, int fd, char *devname)
> +{
> +       struct bcache_super *super;
> +       struct dl *d;
> +       int rc;
> +
> +       free_bcache(st);
> +
> +       super = alloc_super(__func__);
> +       if (!super)
> +               return 1;
> +
> +       st->sb = super;
> +       d = super->disk;
> +       d->devname = devname ? strdup(devname) : NULL;
> +       d->fd = fd;
> +       rc = load_cache_sb(super, 0);
> +       if (rc) {
> +               free_bcache(st);
> +               if (!devname)
> +                       return rc;
> +               fprintf(stderr, Name ": %s failed on %s (%s)\n", __func__,
> +                       devname, strerror(rc));
> +               return rc;
> +       }
> +
> +       if (st->ss == NULL) {
> +               st->ss = &super_bcache;
> +               st->minor_version = 0;
> +               st->max_devs = BCACHE_MAX_DEVICES;
> +       }
> +
> +       return 0;
> +}
> +
> +static int store_bcache(struct supertype *st, int fd)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +
> +       if (!c)
> +               return 1;
> +
> +       if (pwrite(fd, c, sizeof(*c), SB_SECTOR << 9) != sizeof(*c))
> +               return 1;
> +
> +       return 0;
> +}
> +
> +static int compare_bcache(struct supertype *st, struct supertype *tst)
> +{
> +       struct bcache_super *a = st->sb;
> +       struct bcache_super *b = tst->sb;
> +
> +        if (!st->sb) {
> +                st->sb = tst->sb;
> +                tst->sb = NULL;
> +                return 0;
> +        }
> +
> +       if (memcmp(a->sb->set_uuid, b->sb->set_uuid, sizeof(b->sb->set_uuid)) != 0)
> +               return 2;
> +
> +       return 0;
> +}
> +
> +static __u64 avail_size_bcache(struct supertype *st, __u64 devsize)
> +{
> +       /* 4k from start, 8k min data offset */
> +       const uint32_t reserved_sectors = (4+8) * 2;
> +
> +       if (devsize < reserved_sectors)
> +               return 0;
> +
> +       return devsize - reserved_sectors;
> +}
> +
> +static struct supertype *match_metadata_desc_bcache(char *arg)
> +{
> +       struct supertype *st;
> +
> +       if (strcmp(arg, "bcache") != 0 &&
> +           strcmp(arg, "default") != 0)
> +               return NULL;
> +
> +       st = calloc(1, sizeof(*st));
> +       if (!st)
> +               return NULL;
> +       st->container_dev = NoMdDev;
> +       st->ss = &super_bcache;
> +       st->max_devs = BCACHE_MAX_DEVICES;
> +       st->minor_version = 0;
> +       st->sb = NULL;
> +
> +       return st;
> +}
> +
> +static int match_home_bcache(struct supertype *st, char *homehost)
> +{
> +       /* the bcache superblock does not specify any host
> +        * identification information.  maybe it should...
> +        */
> +
> +       return -1;
> +}
> +
> +static void uuid_from_bcache(struct supertype *st, int uuid[4])
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +
> +       memcpy(uuid, c->set_uuid, sizeof(c->set_uuid));
> +}
> +
> +static void getinfo_bcache_volume(struct supertype *st, struct mdinfo *info, int map_disks, char *dmap)
> +{
> +       char *name = devnum2devname(st->container_dev);
> +       struct bcache_super *super = st->sb;
> +       uint16_t bucket_size, first_bucket;
> +       struct cache_sb *c = super->sb;
> +       unsigned long long sz;
> +       uint64_t nbuckets;
> +
> +       nbuckets = __le64_to_cpu(c->nbuckets);
> +       bucket_size = __le16_to_cpu(c->bucket_size);
> +       first_bucket = __le16_to_cpu(c->first_bucket);
> +       sz = (nbuckets - first_bucket) * bucket_size;
> +
> +       info->container_member    = super->vol;
> +       info->custom_array_size   = sz;
> +       info->component_size      = sz;
> +       info->recovery_start      = MaxSector;
> +       info->data_offset         = SB_SECTOR + SB_SIZE;
> +       sprintf(info->text_version, "/%s/%d", name, super->vol);
> +       snprintf(info->name, sizeof(info->name), "%s", c->label);
> +       memcpy(info->uuid, c->uuid, sizeof(c->uuid));
> +
> +       info->array.raid_disks    = __le16_to_cpu(c->nr_in_set) + 1;
> +       info->array.level         = LEVEL_BCACHE;
> +       info->array.layout        = 0;
> +       info->array.md_minor      = -1;
> +       info->array.ctime         = 0;
> +       info->array.utime         = 0;
> +       info->array.chunk_size    = bucket_size * 512;
> +       info->array.major_version = -1;
> +       info->array.minor_version = -2;
> +
> +       info->disk.major = 0;
> +       info->disk.minor = 0;
> +       info->disk.raid_disk = SB_BDEV(c);
> +       info->disk.number = SB_BDEV(c);
> +       info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC;
> +}
> +
> +static void getinfo_bcache(struct supertype *st, struct mdinfo *info, char *dmap)
> +{
> +       int i, cset, bdev, map_disks = info->array.raid_disks;
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +
> +       memset(info, 0, sizeof(*info));
> +
> +       if (super->vol >= 0)
> +               return getinfo_bcache_volume(st, info, map_disks, dmap);
> +
> +       /* make Assemble choose the cache target */
> +       info->events = SB_BDEV(c);
> +       info->recovery_start = MaxSector;
> +       info->data_offset = SB_SECTOR;
> +       info->component_size = SB_SIZE;
> +       strcpy(info->text_version, "bcache");
> +       memcpy(info->uuid, c->set_uuid, sizeof(c->set_uuid));
> +
> +       info->array.raid_disks    = __le16_to_cpu(c->nr_in_set) + 1;
> +       info->array.level         = LEVEL_CONTAINER;
> +       info->array.layout        = 0;
> +       info->array.md_minor      = -1;
> +       info->array.ctime         = 0;
> +       info->array.utime         = 0;
> +       info->array.chunk_size    = __le16_to_cpu(c->bucket_size) * 512;
> +       info->array.major_version = -1;
> +       info->array.minor_version = -2;
> +
> +       info->disk.major = 0;
> +       info->disk.minor = 0;
> +       info->disk.raid_disk = SB_BDEV(c);
> +       info->disk.number = SB_BDEV(c);
> +       /* FIXME: need bcache superblock to identify failed devices */
> +       info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC;
> +
> +       /* FIXME need to parse the journal uuid_bucket to understand
> +        * which cache devs are consistent with the set
> +        */
> +       for (i = 0; dmap && i < map_disks; i++)
> +               dmap[i] = 1;
> +
> +       cset = 0;
> +       bdev = 0;
> +       while (super) {
> +               c = super->sb;
> +
> +               /* FIXME filter out-of-sync devices */
> +               if (SB_BDEV(c))
> +                       bdev++;
> +               else
> +                       cset++;
> +               super = super->next;
> +       }
> +
> +       if (cset + bdev == __le16_to_cpu(c->nr_in_set) + 1)
> +               info->container_enough = 1;
> +       else
> +               info->container_enough = -1;
> +}
> +
> +static int update_bcache(struct supertype *st, struct mdinfo *i, char *update,
> +                        char *devname, int verbose, int uuid_set, char *homehost)
> +{
> +       /* FIXME */
> +       if (strcmp(update, "grow") == 0) {
> +               return 0;
> +       } else if (strcmp(update, "resync") == 0) {
> +               return 0;
> +       } else if (strcmp(update, "homehost") == 0) {
> +               return -1;
> +       } else if (strcmp(update, "name") == 0) {
> +               return -1;
> +       } else if (strcmp(update, "_reshape_progress") == 0) {
> +               return 0;
> +       } else if (strcmp(update, "assemble") == 0 ) {
> +               return 0;
> +       } else {
> +               return -1;
> +       }
> +}
> +
> +static struct mdinfo *container_content_bcache(struct supertype *st, char *subarray)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct mdinfo *info, *disk = NULL;
> +       char *ep;
> +
> +       info = calloc(1, sizeof(*info));
> +       if (!info) {
> +               fprintf(stderr, Name ": failed to allocate %zu bytes\n",
> +                       sizeof(*info));
> +               return NULL;
> +       }
> +
> +       /* don't support multiple backing disks per cache set */
> +       if (subarray && (strtoul(subarray, &ep, 10) > 0 || *ep != '\0'))
> +               goto error;
> +
> +       super->vol = 0;
> +       getinfo_bcache(st, info, NULL);
> +
> +       for (; super; super = super->next) {
> +               struct dl *d = super->disk;
> +               struct cache_sb *c = super->sb;
> +
> +               disk = calloc(1, sizeof(*disk));
> +               if (!disk) {
> +                       fprintf(stderr, Name ": failed to allocate disk\n");
> +                       goto error;
> +               }
> +               disk->next = info->devs;
> +               info->devs = disk;
> +
> +               disk->disk.number = SB_BDEV(c);
> +               disk->disk.raid_disk = SB_BDEV(c);
> +               disk->disk.major = d->major;
> +               disk->disk.minor = d->minor;
> +               disk->recovery_start = MaxSector;
> +               disk->disk.state = 1 << MD_DISK_ACTIVE;
> +               disk->data_offset = info->data_offset;
> +               disk->component_size = info->component_size;
> +
> +               info->array.working_disks++;
> +       }
> +
> +       return info;
> +
> + error:
> +       disk = info->devs;
> +       while (disk) {
> +               struct mdinfo *next = disk->next;
> +
> +               free(disk);
> +               disk = next;
> +       }
> +
> +       free(info);
> +       return NULL;
> +}
> +
> +
> +struct superswitch super_bcache = {
> +#ifndef        MDASSEMBLE
> +       .examine_super           = examine_bcache,
> +       .brief_examine_super     = brief_examine_bcache,
> +       .brief_examine_subarrays = brief_examine_subarrays_bcache,
> +       .export_examine_super    = export_examine_bcache,
> +       .detail_super            = detail_bcache,
> +       .brief_detail_super      = brief_detail_bcache,
> +       .load_container          = load_container_bcache,
> +#endif
> +       .match_home              = match_home_bcache,
> +       .uuid_from_super         = uuid_from_bcache,
> +       .getinfo_super           = getinfo_bcache,
> +       .update_super            = update_bcache,
> +
> +       .avail_size              = avail_size_bcache,
> +
> +       .compare_super           = compare_bcache,
> +
> +       .load_super              = load_bcache,
> +       .store_super             = store_bcache,
> +       .free_super              = free_bcache,
> +       .match_metadata_desc     = match_metadata_desc_bcache,
> +       .container_content       = container_content_bcache,
> +
> +       .external                = 1,
> +       .name                    = "bcache",
> +};
> diff --git a/util.c b/util.c
> index 6985a70..d9e49cf 100644
> --- a/util.c
> +++ b/util.c
> @@ -919,7 +919,7 @@ struct superswitch *superlist[] =
>  {
>        &super0, &super1,
>        &super_ddf, &super_imsm,
> -       &mbr, &gpt,
> +       &mbr, &gpt, &super_bcache,
>        NULL };
>
>  #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-bcache" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux