summaryrefslogtreecommitdiff
path: root/drivers/staging/lustre/lustre/include/lustre_disk.h
blob: 5e1ac129a681e28f4620e81abff337bb3ca70cce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
/*
 * GPL HEADER START
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 only,
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License version 2 for more details (a copy is included
 * in the LICENSE file that accompanied this code).
 *
 * You should have received a copy of the GNU General Public License
 * version 2 along with this program; If not, see
 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 *
 * GPL HEADER END
 */
/*
 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
 * Use is subject to license terms.
 *
 * Copyright (c) 2011, 2012, Intel Corporation.
 */
/*
 * This file is part of Lustre, http://www.lustre.org/
 * Lustre is a trademark of Sun Microsystems, Inc.
 *
 * lustre/include/lustre_disk.h
 *
 * Lustre disk format definitions.
 *
 * Author: Nathan Rutman <nathan@clusterfs.com>
 */

#ifndef _LUSTRE_DISK_H
#define _LUSTRE_DISK_H

/** \defgroup disk disk
 *
 * @{
 */

#include "../../include/linux/libcfs/libcfs.h"
#include "../../include/linux/lnet/types.h"
#include <linux/backing-dev.h>

/****************** persistent mount data *********************/

#define LDD_F_SV_TYPE_MDT   0x0001
#define LDD_F_SV_TYPE_OST   0x0002
#define LDD_F_SV_TYPE_MGS   0x0004
#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT  | \
			    LDD_F_SV_TYPE_OST  | \
			    LDD_F_SV_TYPE_MGS)
#define LDD_F_SV_ALL	0x0008

/****************** mount command *********************/

/* The lmd is only used internally by Lustre; mount simply passes
   everything as string options */

#define LMD_MAGIC    0xbdacbd03

/* gleaned from the mount command - no persistent info here */
struct lustre_mount_data {
	__u32      lmd_magic;
	__u32      lmd_flags;	 /* lustre mount flags */
	int	lmd_mgs_failnodes; /* mgs failover node count */
	int	lmd_exclude_count;
	int	lmd_recovery_time_soft;
	int	lmd_recovery_time_hard;
	char      *lmd_dev;	   /* device name */
	char      *lmd_profile;       /* client only */
	char      *lmd_mgssec;	/* sptlrpc flavor to mgs */
	char      *lmd_opts;	  /* lustre mount options (as opposed to
					 _device_ mount options) */
	char      *lmd_params;	/* lustre params */
	__u32     *lmd_exclude;       /* array of OSTs to ignore */
	char	*lmd_mgs;	   /* MGS nid */
	char	*lmd_osd_type;      /* OSD type */
};

#define LMD_FLG_SERVER		0x0001	/* Mounting a server */
#define LMD_FLG_CLIENT		0x0002	/* Mounting a client */
#define LMD_FLG_ABORT_RECOV	0x0008	/* Abort recovery */
#define LMD_FLG_NOSVC		0x0010	/* Only start MGS/MGC for servers,
					   no other services */
#define LMD_FLG_NOMGS		0x0020	/* Only start target for servers, reusing
					   existing MGS services */
#define LMD_FLG_WRITECONF	0x0040	/* Rewrite config log */
#define LMD_FLG_NOIR		0x0080	/* NO imperative recovery */
#define LMD_FLG_NOSCRUB		0x0100	/* Do not trigger scrub automatically */
#define LMD_FLG_MGS		0x0200	/* Also start MGS along with server */
#define LMD_FLG_IAM		0x0400	/* IAM dir */
#define LMD_FLG_NO_PRIMNODE	0x0800	/* all nodes are service nodes */
#define LMD_FLG_VIRGIN		0x1000	/* the service registers first time */
#define LMD_FLG_UPDATE		0x2000	/* update parameters */
#define LMD_FLG_HSM		0x4000	/* Start coordinator */

#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)

/****************** last_rcvd file *********************/

/** version recovery epoch */
#define LR_EPOCH_BITS   32
#define lr_epoch(a) ((a) >> LR_EPOCH_BITS)
#define LR_EXPIRE_INTERVALS 16 /**< number of intervals to track transno */
#define ENOENT_VERSION 1 /** 'virtual' version of non-existent object */

#define LR_SERVER_SIZE   512
#define LR_CLIENT_START 8192
#define LR_CLIENT_SIZE   128
#if LR_CLIENT_START < LR_SERVER_SIZE
#error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
#endif

/*
 * This limit is arbitrary (131072 clients on x86), but it is convenient to use
 * 2^n * PAGE_CACHE_SIZE * 8 for the number of bits that fit an order-n allocation.
 * If we need more than 131072 clients (order-2 allocation on x86) then this
 * should become an array of single-page pointers that are allocated on demand.
 */
#if (128 * 1024UL) > (PAGE_CACHE_SIZE * 8)
#define LR_MAX_CLIENTS (128 * 1024UL)
#else
#define LR_MAX_CLIENTS (PAGE_CACHE_SIZE * 8)
#endif

/** COMPAT_146: this is an OST (temporary) */
#define OBD_COMPAT_OST	  0x00000002
/** COMPAT_146: this is an MDT (temporary) */
#define OBD_COMPAT_MDT	  0x00000004
/** 2.0 server, interop flag to show server version is changed */
#define OBD_COMPAT_20	   0x00000008

/** MDS handles LOV_OBJID file */
#define OBD_ROCOMPAT_LOVOBJID   0x00000001

/** OST handles group subdirs */
#define OBD_INCOMPAT_GROUPS     0x00000001
/** this is an OST */
#define OBD_INCOMPAT_OST	0x00000002
/** this is an MDT */
#define OBD_INCOMPAT_MDT	0x00000004
/** common last_rvcd format */
#define OBD_INCOMPAT_COMMON_LR  0x00000008
/** FID is enabled */
#define OBD_INCOMPAT_FID	0x00000010
/** Size-on-MDS is enabled */
#define OBD_INCOMPAT_SOM	0x00000020
/** filesystem using iam format to store directory entries */
#define OBD_INCOMPAT_IAM_DIR    0x00000040
/** LMA attribute contains per-inode incompatible flags */
#define OBD_INCOMPAT_LMA	0x00000080
/** lmm_stripe_count has been shrunk from __u32 to __u16 and the remaining 16
 * bits are now used to store a generation. Once we start changing the layout
 * and bumping the generation, old versions expecting a 32-bit lmm_stripe_count
 * will be confused by interpreting stripe_count | gen << 16 as the actual
 * stripe count */
#define OBD_INCOMPAT_LMM_VER    0x00000100
/** multiple OI files for MDT */
#define OBD_INCOMPAT_MULTI_OI   0x00000200

/* Data stored per server at the head of the last_rcvd file.  In le32 order.
   This should be common to filter_internal.h, lustre_mds.h */
struct lr_server_data {
	__u8  lsd_uuid[40];	/* server UUID */
	__u64 lsd_last_transno;    /* last completed transaction ID */
	__u64 lsd_compat14;	/* reserved - compat with old last_rcvd */
	__u64 lsd_mount_count;     /* incarnation number */
	__u32 lsd_feature_compat;  /* compatible feature flags */
	__u32 lsd_feature_rocompat;/* read-only compatible feature flags */
	__u32 lsd_feature_incompat;/* incompatible feature flags */
	__u32 lsd_server_size;     /* size of server data area */
	__u32 lsd_client_start;    /* start of per-client data area */
	__u16 lsd_client_size;     /* size of per-client data area */
	__u16 lsd_subdir_count;    /* number of subdirectories for objects */
	__u64 lsd_catalog_oid;     /* recovery catalog object id */
	__u32 lsd_catalog_ogen;    /* recovery catalog inode generation */
	__u8  lsd_peeruuid[40];    /* UUID of MDS associated with this OST */
	__u32 lsd_osd_index;       /* index number of OST in LOV */
	__u32 lsd_padding1;	/* was lsd_mdt_index, unused in 2.4.0 */
	__u32 lsd_start_epoch;     /* VBR: start epoch from last boot */
	/** transaction values since lsd_trans_table_time */
	__u64 lsd_trans_table[LR_EXPIRE_INTERVALS];
	/** start point of transno table below */
	__u32 lsd_trans_table_time; /* time of first slot in table above */
	__u32 lsd_expire_intervals; /* LR_EXPIRE_INTERVALS */
	__u8  lsd_padding[LR_SERVER_SIZE - 288];
};

/* Data stored per client in the last_rcvd file.  In le32 order. */
struct lsd_client_data {
	__u8  lcd_uuid[40];      /* client UUID */
	__u64 lcd_last_transno; /* last completed transaction ID */
	__u64 lcd_last_xid;     /* xid for the last transaction */
	__u32 lcd_last_result;  /* result from last RPC */
	__u32 lcd_last_data;    /* per-op data (disposition for open &c.) */
	/* for MDS_CLOSE requests */
	__u64 lcd_last_close_transno; /* last completed transaction ID */
	__u64 lcd_last_close_xid;     /* xid for the last transaction */
	__u32 lcd_last_close_result;  /* result from last RPC */
	__u32 lcd_last_close_data;    /* per-op data */
	/* VBR: last versions */
	__u64 lcd_pre_versions[4];
	__u32 lcd_last_epoch;
	/** orphans handling for delayed export rely on that */
	__u32 lcd_first_epoch;
	__u8  lcd_padding[LR_CLIENT_SIZE - 128];
};

/* bug20354: the lcd_uuid for export of clients may be wrong */
static inline void check_lcd(char *obd_name, int index,
			     struct lsd_client_data *lcd)
{
	int length = sizeof(lcd->lcd_uuid);

	if (strnlen((char *)lcd->lcd_uuid, length) == length) {
		lcd->lcd_uuid[length - 1] = '\0';

		LCONSOLE_ERROR("the client UUID (%s) on %s for exports stored in last_rcvd(index = %d) is bad!\n",
			       lcd->lcd_uuid, obd_name, index);
	}
}

/* last_rcvd handling */
static inline void lsd_le_to_cpu(struct lr_server_data *buf,
				 struct lr_server_data *lsd)
{
	int i;

	memcpy(lsd->lsd_uuid, buf->lsd_uuid, sizeof(lsd->lsd_uuid));
	lsd->lsd_last_transno     = le64_to_cpu(buf->lsd_last_transno);
	lsd->lsd_compat14	 = le64_to_cpu(buf->lsd_compat14);
	lsd->lsd_mount_count      = le64_to_cpu(buf->lsd_mount_count);
	lsd->lsd_feature_compat   = le32_to_cpu(buf->lsd_feature_compat);
	lsd->lsd_feature_rocompat = le32_to_cpu(buf->lsd_feature_rocompat);
	lsd->lsd_feature_incompat = le32_to_cpu(buf->lsd_feature_incompat);
	lsd->lsd_server_size      = le32_to_cpu(buf->lsd_server_size);
	lsd->lsd_client_start     = le32_to_cpu(buf->lsd_client_start);
	lsd->lsd_client_size      = le16_to_cpu(buf->lsd_client_size);
	lsd->lsd_subdir_count     = le16_to_cpu(buf->lsd_subdir_count);
	lsd->lsd_catalog_oid      = le64_to_cpu(buf->lsd_catalog_oid);
	lsd->lsd_catalog_ogen     = le32_to_cpu(buf->lsd_catalog_ogen);
	memcpy(lsd->lsd_peeruuid, buf->lsd_peeruuid, sizeof(lsd->lsd_peeruuid));
	lsd->lsd_osd_index	= le32_to_cpu(buf->lsd_osd_index);
	lsd->lsd_padding1	= le32_to_cpu(buf->lsd_padding1);
	lsd->lsd_start_epoch      = le32_to_cpu(buf->lsd_start_epoch);
	for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
		lsd->lsd_trans_table[i] = le64_to_cpu(buf->lsd_trans_table[i]);
	lsd->lsd_trans_table_time = le32_to_cpu(buf->lsd_trans_table_time);
	lsd->lsd_expire_intervals = le32_to_cpu(buf->lsd_expire_intervals);
}

static inline void lsd_cpu_to_le(struct lr_server_data *lsd,
				 struct lr_server_data *buf)
{
	int i;

	memcpy(buf->lsd_uuid, lsd->lsd_uuid, sizeof(buf->lsd_uuid));
	buf->lsd_last_transno     = cpu_to_le64(lsd->lsd_last_transno);
	buf->lsd_compat14	 = cpu_to_le64(lsd->lsd_compat14);
	buf->lsd_mount_count      = cpu_to_le64(lsd->lsd_mount_count);
	buf->lsd_feature_compat   = cpu_to_le32(lsd->lsd_feature_compat);
	buf->lsd_feature_rocompat = cpu_to_le32(lsd->lsd_feature_rocompat);
	buf->lsd_feature_incompat = cpu_to_le32(lsd->lsd_feature_incompat);
	buf->lsd_server_size      = cpu_to_le32(lsd->lsd_server_size);
	buf->lsd_client_start     = cpu_to_le32(lsd->lsd_client_start);
	buf->lsd_client_size      = cpu_to_le16(lsd->lsd_client_size);
	buf->lsd_subdir_count     = cpu_to_le16(lsd->lsd_subdir_count);
	buf->lsd_catalog_oid      = cpu_to_le64(lsd->lsd_catalog_oid);
	buf->lsd_catalog_ogen     = cpu_to_le32(lsd->lsd_catalog_ogen);
	memcpy(buf->lsd_peeruuid, lsd->lsd_peeruuid, sizeof(buf->lsd_peeruuid));
	buf->lsd_osd_index	  = cpu_to_le32(lsd->lsd_osd_index);
	buf->lsd_padding1	  = cpu_to_le32(lsd->lsd_padding1);
	buf->lsd_start_epoch      = cpu_to_le32(lsd->lsd_start_epoch);
	for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
		buf->lsd_trans_table[i] = cpu_to_le64(lsd->lsd_trans_table[i]);
	buf->lsd_trans_table_time = cpu_to_le32(lsd->lsd_trans_table_time);
	buf->lsd_expire_intervals = cpu_to_le32(lsd->lsd_expire_intervals);
}

static inline void lcd_le_to_cpu(struct lsd_client_data *buf,
				 struct lsd_client_data *lcd)
{
	memcpy(lcd->lcd_uuid, buf->lcd_uuid, sizeof (lcd->lcd_uuid));
	lcd->lcd_last_transno       = le64_to_cpu(buf->lcd_last_transno);
	lcd->lcd_last_xid	   = le64_to_cpu(buf->lcd_last_xid);
	lcd->lcd_last_result	= le32_to_cpu(buf->lcd_last_result);
	lcd->lcd_last_data	  = le32_to_cpu(buf->lcd_last_data);
	lcd->lcd_last_close_transno = le64_to_cpu(buf->lcd_last_close_transno);
	lcd->lcd_last_close_xid     = le64_to_cpu(buf->lcd_last_close_xid);
	lcd->lcd_last_close_result  = le32_to_cpu(buf->lcd_last_close_result);
	lcd->lcd_last_close_data    = le32_to_cpu(buf->lcd_last_close_data);
	lcd->lcd_pre_versions[0]    = le64_to_cpu(buf->lcd_pre_versions[0]);
	lcd->lcd_pre_versions[1]    = le64_to_cpu(buf->lcd_pre_versions[1]);
	lcd->lcd_pre_versions[2]    = le64_to_cpu(buf->lcd_pre_versions[2]);
	lcd->lcd_pre_versions[3]    = le64_to_cpu(buf->lcd_pre_versions[3]);
	lcd->lcd_last_epoch	 = le32_to_cpu(buf->lcd_last_epoch);
	lcd->lcd_first_epoch	= le32_to_cpu(buf->lcd_first_epoch);
}

static inline void lcd_cpu_to_le(struct lsd_client_data *lcd,
				 struct lsd_client_data *buf)
{
	memcpy(buf->lcd_uuid, lcd->lcd_uuid, sizeof (lcd->lcd_uuid));
	buf->lcd_last_transno       = cpu_to_le64(lcd->lcd_last_transno);
	buf->lcd_last_xid	   = cpu_to_le64(lcd->lcd_last_xid);
	buf->lcd_last_result	= cpu_to_le32(lcd->lcd_last_result);
	buf->lcd_last_data	  = cpu_to_le32(lcd->lcd_last_data);
	buf->lcd_last_close_transno = cpu_to_le64(lcd->lcd_last_close_transno);
	buf->lcd_last_close_xid     = cpu_to_le64(lcd->lcd_last_close_xid);
	buf->lcd_last_close_result  = cpu_to_le32(lcd->lcd_last_close_result);
	buf->lcd_last_close_data    = cpu_to_le32(lcd->lcd_last_close_data);
	buf->lcd_pre_versions[0]    = cpu_to_le64(lcd->lcd_pre_versions[0]);
	buf->lcd_pre_versions[1]    = cpu_to_le64(lcd->lcd_pre_versions[1]);
	buf->lcd_pre_versions[2]    = cpu_to_le64(lcd->lcd_pre_versions[2]);
	buf->lcd_pre_versions[3]    = cpu_to_le64(lcd->lcd_pre_versions[3]);
	buf->lcd_last_epoch	 = cpu_to_le32(lcd->lcd_last_epoch);
	buf->lcd_first_epoch	= cpu_to_le32(lcd->lcd_first_epoch);
}

static inline __u64 lcd_last_transno(struct lsd_client_data *lcd)
{
	return (lcd->lcd_last_transno > lcd->lcd_last_close_transno ?
		lcd->lcd_last_transno : lcd->lcd_last_close_transno);
}

static inline __u64 lcd_last_xid(struct lsd_client_data *lcd)
{
	return (lcd->lcd_last_xid > lcd->lcd_last_close_xid ?
		lcd->lcd_last_xid : lcd->lcd_last_close_xid);
}

/****************** superblock additional info *********************/

struct ll_sb_info;

struct lustre_sb_info {
	int		       lsi_flags;
	struct obd_device	*lsi_mgc;     /* mgc obd */
	struct lustre_mount_data *lsi_lmd;     /* mount command info */
	struct ll_sb_info	*lsi_llsbi;   /* add'l client sbi info */
	struct dt_device	 *lsi_dt_dev;  /* dt device to access disk fs*/
	struct vfsmount	  *lsi_srv_mnt; /* the one server mount */
	atomic_t	      lsi_mounts;  /* references to the srv_mnt */
	char			  lsi_svname[MTI_NAME_MAXLEN];
	char			  lsi_osd_obdname[64];
	char			  lsi_osd_uuid[64];
	struct obd_export	 *lsi_osd_exp;
	char			  lsi_osd_type[16];
	char			  lsi_fstype[16];
	struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs
						  own backing_dev_info */
};

#define LSI_UMOUNT_FAILOVER	      0x00200000
#define LSI_BDI_INITIALIZED	      0x00400000

#define     s2lsi(sb)	((struct lustre_sb_info *)((sb)->s_fs_info))
#define     s2lsi_nocast(sb) ((sb)->s_fs_info)

#define     get_profile_name(sb)   (s2lsi(sb)->lsi_lmd->lmd_profile)
#define	    get_mount_flags(sb)	   (s2lsi(sb)->lsi_lmd->lmd_flags)
#define	    get_mntdev_name(sb)	   (s2lsi(sb)->lsi_lmd->lmd_dev)

/****************** mount lookup info *********************/

struct lustre_mount_info {
	char		 *lmi_name;
	struct super_block   *lmi_sb;
	struct vfsmount      *lmi_mnt;
	struct list_head	    lmi_list_chain;
};

/****************** prototypes *********************/

/* obd_mount.c */

int lustre_start_mgc(struct super_block *sb);
void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
						  struct vfsmount *mnt));
void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb));
int lustre_common_put_super(struct super_block *sb);

int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);

/** @} disk */

#endif /* _LUSTRE_DISK_H */